--- BasedOnStyle: Chromium
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: false
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: true
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
- foreach
IncludeBlocks: Regroup
- Regex: '^<ext/.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentCaseLabels: true
IndentGotoLabels: true
IndentPPDirectives: None
IndentWidth: 2
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
- Language: Cpp
- cc
- CC
- cpp
- Cpp
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
- pb
- PB
- proto
- EqualsProto
- EquivToProto
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: Auto
TabWidth: 8
UseCRLF: false
UseTab: Never

View file

steps:
name: default
steps: steps:
- name: build-shared - name: build
image: arminfriedl/xwim-build:shared image: arminfriedl/xwim-build
commands: commands:
- meson wrap install gtest || true - meson wrap install gtest
- meson target/shared - meson build
- ninja -C target/shared - ninja -C build
- mv target/shared/src/xwim xwim-x86_64-glibc-linux-shared - ninja -C build test && ninja -C build coverage
- echo "******** TEST LOGS ***********"
- name: build-static - cat build/meson-logs/testlog.txt
image: arminfriedl/xwim-build:static - echo "****** COVERAGE LOGS *********"
commands: - cat build/meson-logs/coverage.txt
- meson wrap install gtest || true
- meson --default-library=static target/static
- ninja -C target/static
- mv target/static/src/xwim xwim-x86_64-musl-linux-static
- name: publish-binaries
image: appleboy/drone-scp
from_secret: deploy_user
from_secret: deploy_password
port: 22
target: /var/services/dirlist/repo/cicd/xwim/${DRONE_COMMIT_SHA:0:8}/
- xwim-x86_64-glibc-linux-shared
- xwim-x86_64-musl-linux-static
- build-shared
- build-static
trigger: trigger:
event: event:
event:
name: release
steps:
- name: build-shared - name: build
image: arminfriedl/xwim-build:shared image: arminfriedl/xwim-build
commands: commands:
- meson wrap install gtest || true - meson wrap install gtest
- meson --buildtype=release target/shared - meson --buildtype=release build
- ninja -C target/shared - ninja -C build
- strip target/shared/src/xwim - mkdir xwim-${DRONE_TAG}-x86_64-glibc-linux
- mkdir xwim-${DRONE_TAG}-x86_64-glibc-linux-shared - mv build/src/xwim xwim-${DRONE_TAG}-x86_64-glibc-linux
- mv target/shared/src/xwim xwim-${DRONE_TAG}-x86_64-glibc-linux-shared
- name: build-static
image: arminfriedl/xwim-build:static
- meson wrap install gtest || true
- meson --buildtype=release --default-library=static target/static
- ninja -C target/static
- strip target/static/src/xwim
- mkdir xwim-${DRONE_TAG}-x86_64-musl-linux-static
- mv target/static/src/xwim xwim-${DRONE_TAG}-x86_64-musl-linux-static
- name: package - name: package
image: arminfriedl/xwim-build image: arminfriedl/xwim-build
commands: commands:
- tar czf xwim-${DRONE_TAG}-x86_64-glibc-linux-shared.tar.gz xwim-${DRONE_TAG}-x86_64-glibc-linux-shared/xwim - tar cjf xwim-${DRONE_TAG}-x86_64-glibc-linux.tar.bz2 xwim-${DRONE_TAG}-x86_64-glibc-linux/xwim
- tar czf xwim-${DRONE_TAG}-x86_64-musl-linux-static.tar.gz xwim-${DRONE_TAG}-x86_64-musl-linux-static/xwim - tar czf xwim-${DRONE_TAG}-x86_64-glibc-linux.tar.gz xwim-${DRONE_TAG}-x86_64-glibc-linux/xwim
depends_on: - zip -r xwim-${DRONE_TAG} xwim-${DRONE_TAG}-x86_64-glibc-linux
- build-shared
- build-static
- name: publish - name: publish
image: plugins/gitea-release image: plugins/gitea-release
@ -83,14 +49,13 @@ steps:
api_key: api_key:
from_secret: gitea_token from_secret: gitea_token
files: files:
- xwim-${DRONE_TAG}-x86_64-glibc-linux-shared.tar.gz - xwim-${DRONE_TAG}-x86_64-glibc-linux.tar.bz2
- xwim-${DRONE_TAG}-x86_64-musl-linux-static.tar.gz - xwim-${DRONE_TAG}-x86_64-glibc-linux.tar.gz
- xwim-${DRONE_TAG}
title: xwim ${DRONE_TAG} title: xwim ${DRONE_TAG}
checksum: checksum:
- md5 - md5
- sha256 - sha256
- package
trigger:
event:

.gitignore vendored
View file

@ -2,11 +2,11 @@
build/ build/
target/ target/
compile_commands.json compile_commands.json
.ccls-cache .ccls-cache
### C++ ###
# Prerequisites
# Edit at,vim,emacs,linux,macos,ninja,windows,jetbrains+all,clion+all,visualstudiocode # Edit at,c++,emacs,ninja
### C++ ### ### C++ ###
# Prerequisites # Prerequisites
@ -42,94 +42,6 @@ compile_commands.json
*.out *.out
*.app *.app
### CLion+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference:
# User-specific stuff
# AWS User-specific
# Generated files
# Sensitive or high-churn files
# Gradle
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
# Mongo Explorer plugin
# File-based project format
# IntelliJ
# mpeltonen/sbt-idea plugin
# JIRA plugin
# Cursive Clojure plugin
# SonarLint plugin
# Crashlytics plugin (for Android Studio and IntelliJ)
# Editor-based Rest Client
# Android studio 3.1+ serialized cache file
### CLion+all Patch ###
# Ignore everything but code style settings and run configurations
# that are supposed to be shared within teams.
### Emacs ### ### Emacs ###
# -*- mode: gitignore; -*- # -*- mode: gitignore; -*-
*~ *~
@ -181,108 +93,6 @@ flycheck_*.el
/ /
### JetBrains+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference:
# User-specific stuff
# AWS User-specific
# Generated files
# Sensitive or high-churn files
# Gradle
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
# Mongo Explorer plugin
# File-based project format
# IntelliJ
# mpeltonen/sbt-idea plugin
# JIRA plugin
# Cursive Clojure plugin
# SonarLint plugin
# Crashlytics plugin (for Android Studio and IntelliJ)
# Editor-based Rest Client
# Android studio 3.1+ serialized cache file
### JetBrains+all Patch ###
# Ignore everything but code style settings and run configurations
# that are supposed to be shared within teams.
### Linux ###
# temporary files which can be created if a process still has a handle open of a deleted file
# KDE directory preferences
# Linux trash folder which might appear on any partition or disk
# .nfs files are created when an open file is removed but is still being accessed
### macOS ###
# General
# Icon must end with two \r
# Thumbnails
# Files that might appear in the root of a volume
# Directories potentially created on remote AFP share
Network Trash Folder
Temporary Items
### macOS Patch ###
# iCloud generated files
### Ninja ### ### Ninja ###
.ninja_deps .ninja_deps
.ninja_log .ninja_log
@ -290,7 +100,6 @@ Temporary Items
### Vim ### ### Vim ###
# Swap # Swap
[._]*.s[a-v][a-z] [._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p] [._]*.sw[a-p]
[._]s[a-rt-v][a-z] [._]s[a-rt-v][a-z]
[._]ss[a-gi-z] [._]ss[a-gi-z]
@ -302,54 +111,14 @@ Sessionx.vim
# Temporary # Temporary
.netrwhist .netrwhist
# Auto-generated tag files # Auto-generated tag files
tags tags
# Persistent undo # Persistent undo
[._]*.un~ [._]*.un~
### VisualStudioCode ### # Coc configuration directory
.vscode/* .vim
# Local History for Visual Studio Code # End of,c++,emacs,ninja
# Built Visual Studio Code Extensions
### VisualStudioCode Patch ###
# Ignore all local history of files
### Windows ###
# Windows thumbnail cache files
# Dump file
# Folder config file
# Recycle Bin used on file shares
# Windows Installer files
# Windows shortcuts
# End of,vim,emacs,linux,macos,ninja,windows,jetbrains+all,clion+all,visualstudiocode

View file

@ -1,12 +0,0 @@
all: compile_commands.json
cd build && ninja
cd build && ninja -t compdb > compile_commands.json
cd build && ninja -t clean

View file

@ -5,9 +5,7 @@ Do What I Mean Extractor
![]( ![](
[xkcd-1168]( Continuing the emacs tradition of "Do What I Mean" tools, xwim is a replacement
Continuing the emacs tradition of "Do What I Mean" tools, xwim is replacement
for the excellent, but unfortunately unmaintained, for the excellent, but unfortunately unmaintained,
[dtrx]( xwim is a command line tool that [dtrx]( xwim is a command line tool that
targets two problems with archives: targets two problems with archives:
@ -17,27 +15,6 @@ considerably between formats
- Inconsiderately packaged archives tend to spill their content over the - Inconsiderately packaged archives tend to spill their content over the
directory they are extracted to directory they are extracted to
`dtrx` is a Python script that sets up the command line and calls appropriate
archiving binaries (if installed). In contrast `xwim` is a compiled binary based
directly on archiving libraries, which some may appreciate. It can optionally be
statically linked if you want it entirely self-contained.
# Install
`xwim` currently released for Linux only. There are two flavers: statically
linked and dynamically linked. The releases can be downloaded from and should run on most 64-bit
GNU/Linux distributions.
For the dynamically linked version, the following dependencies have to be
- [spdlog](
- [fmt](
- [libarchive](
Windows support is planned for the first stable release. Packaging for various
distributions is also planned once `xwim` stabilizes. Please reach out if you
can help.
# Usage # Usage
Invoking `xwim` is as simple as: Invoking `xwim` is as simple as:
@ -46,25 +23,8 @@ xwim archive.tar.gz
``` ```
This will extract the archive to the current folder. If the archive contains a This will extract the archive to the current folder. If the archive contains a
single root folder it is just extracted as is. Otherwise xwim creates a folder single root folder it is just extracted as is. Otherwise xwim first creates a
named after the archive and extracts the contents there. folder named after the archive and extracts the contents there.
xwim /home/user/
This will create an archive in the "platform native" format (zip on windows,
tar.gz on unix) in the current working directory. The archive contains a single
root folder `user` and is itself named `` or `user.tar.gz`.
xwim /home/user/file.txt
This will create an archive in the "platform native" format (zip on windows,
tar.gz on unix) in the current working directory. The archive contains a single
entry `file.txt` and is itself named `` or `file.tar.gz`.
# Examples
@ -98,13 +58,26 @@ xwim will create a folder `archive` in the current directory and extract the
archive contents there. archive contents there.
# Supported formats # Supported formats
Currently `xwim` supports `tar.gz` and `zip` archives. However, this will xwim supports most formats supported by [libarchive](
rapidly expand to many more formats until a stable release is officially
Take a look `Archiver.hpp` if you want to help and have some time for testing. - 7-zip: 7z, 7zip
Most formats can readily be added if they are supported by libarchive. For other - zip: jar, zip
formats you have to add an `Archiver` implementation. - bzip2: bz2, bzip2
- gzip: gz, gzip
- xzip: xz
- rar: rar
- tar with compression: tgz, tar.gz, tar.bz2, tar.xz
# Install
xwim is currently released as a dynamically linked glibc binary only. The
releases can be downloaded from
and should run on most glibc based GNU/Linux distributions. The following
dependencies have to be installed:
- [spdlog](
- [fmt](
- [libarchive](
Approaching the first stable release we will release for more platforms.
# Build # Build
xwim is built with [meson]( To compile xwim from source xwim is built with [meson]( To compile xwim from source
@ -153,28 +126,14 @@ Per default xwim chooses an appropriate log level according to your build type
- off - off
# Contributing # Contributing
While xwim is still in incubator phase (i.e. before version 1.0) its main While xwim is still in incubator phase (i.e. before version 1.0) it's main
repository is hosted on with a mirror on repository is hosted on with a mirror on With the first stable release it will most With the first stable release it will most
likely move to GitHub as its main repository. likely move to GitHub as it's main repository.
If you want to contribute, you can either issue a pull request on its Github If you want to contribute, you can either issue a pull request on it's Github
mirror (will be cherry picked into the main repository) or send patches to mirror (will be cherry picked into the main repository) or send patches to
dev[at]friedl[dot]net. dev[at]friedl[dot]net.
If you are interested in a long-term co-maintainership you can also drop me a If you are interested in a long-term co-maintainership you can also drop me a
mail for an account on mail for an account on
# Known Issues
- <strong>Parsing filters is unsupported</strong>
There is a somewhat long standing
[bug]( in libarchive. rar
files might fail with `Parsing filters is unsupported`. This is because `rar`
is a proprietary format and `libarchive` does not implement the full machinery
necessary to support `rar` completely. `xwim` is all about convenience. If you
want to help with supporting `rar`, please keep in mind that this means we
have we want to take the [official `unrar`
library]( if possible. This is also a
licensing issue as `unrar` is proprietary and its license seemingly not GPL

View file

@ -1,12 +1,8 @@
project('xwim', 'cpp', project('xwim', 'cpp',
version: '0.4', version: '0.2',
default_options: ['cpp_std=c++17', default_options: ['cpp_std=c++17',
'warning_level=3', 'warning_level=3',
'b_ndebug=if-release']) 'b_coverage=true'])
add_global_arguments('-DVERSION='+meson.version(), language: 'cpp')
add_global_arguments('-DSPDLOG_FMT_EXTERNAL', language: 'cpp')
add_global_arguments('-DFMT_HEADER_ONLY', language: 'cpp')
subdir('src') subdir('src')
subdir('doc') subdir('doc')

View file

@ -1,147 +0,0 @@
#include "Archiver.hpp"
#include "Formats.hpp"
#include <spdlog/spdlog.h>
#include <filesystem>
#include <map>
#include <memory>
#include "util/Common.hpp"
#if defined(unix) || defined(__unix__) || defined(__unix)
std::string default_extension = ".tar.gz";
#elif defined(_win32) || defined(__win32__) || defined(__windows__)
std::string default_extension = ".zip";
std::string default_extension = ".zip";
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
// Extract longest known extension from path
fs::path archive_extension(const fs::path& path) {
// TODO: creates lots of paths, refactor
fs::path ext;
fs::path tmp_ext;
fs::path tmp_path;
// cater for trailing `/` which is represented
// as empty path element
for (auto p : path) {
if (!p.empty()) {
tmp_path /= p;
while (tmp_path.has_extension()) {
tmp_ext = tmp_path.extension() += tmp_ext;
Format format = find_extension_format(tmp_ext);
if (format != Format::UNKNOWN) {
// (Combined) extension known. Remember as `ext` and keep
// looking for even longer extensions.
ext = tmp_ext;
} // else: (Combined) extension not known, keep `ext` as-is but try
// longer extensions
tmp_path = tmp_path.stem();
return ext;
// Strip longest known extension from path
fs::path strip_archive_extension(const fs::path& path) {
// TODO: creates lots of paths, refactor
int longest_ext = 0;
int tmp_longest_ext = 0;
fs::path tmp_ext;
fs::path tmp_path;
fs::path stem_path;
// cater for trailing `/` which is represented
// as empty path element
for(auto p: path) {
if(!p.empty()) {
tmp_path /= p;
stem_path = tmp_path;
spdlog::debug("Checking {} extensions", tmp_path);
while (tmp_path.has_extension()) {
tmp_ext = tmp_path.extension() += tmp_ext;
spdlog::debug("Looking for {} in known extensions", tmp_ext);
Format format = find_extension_format(tmp_ext);
if (format != Format::UNKNOWN) {
// (Combined) extension known. Remember as `longest_ext` and keep
// looking for even longer extensions.
longest_ext = tmp_longest_ext;
} // else: (Combined) extension not known, keep `longest_ext` as-is but try
// longer extensions
spdlog::debug("Stemming {} to {}", tmp_path, tmp_path.stem());
tmp_path = tmp_path.stem();
spdlog::debug("Found {} extensions", longest_ext);
tmp_path = stem_path;
for (int i = 0; i < longest_ext; i++) tmp_path = tmp_path.stem();
spdlog::debug("Stripped path is {} ", tmp_path);
return tmp_path;
std::filesystem::path default_archive(const std::filesystem::path& base) {
string base_s = base.string();
string ext_s = default_extension;
return fs::path{fmt::format("{}{}", base_s, ext_s)};
bool can_handle_archive(const fs::path& path) {
fs::path ext = archive_extension(path);
if (format_extensions.find(ext.string()) != format_extensions.end()) {
spdlog::debug("Found {} in known formats", ext);
return true;
spdlog::debug("Could not find {} in known formats", ext);
return false;
Format parse_format(const fs::path& path) {
spdlog::debug("Looking for path {}", path);
fs::path ext = archive_extension(path);
spdlog::debug("Looking for ext {}", ext);
Format format = find_extension_format(ext);
if (format == Format::UNKNOWN) {
throw XwimError{"No known archiver for {}", path};
return format;
unique_ptr<Archiver> make_archiver(const string& archive_name) {
switch (parse_format(archive_name)) {
case Format::TAR_GZIP: case Format::TAR_BZIP2:
case Format::TAR_COMPRESS: case Format::TAR_LZIP:
case Format::TAR_XZ: case Format::TAR_ZSTD:
case Format::ZIP:
return make_unique<LibArchiver>();
throw XwimError{
"Cannot construct archiver for {}. `extension_format` surjection "
"invariant violated?",
} // namespace xwim

View file

@ -1,43 +0,0 @@
#pragma once
#include <fmt/core.h>
#include <filesystem>
#include <map>
#include <memory>
#include <set>
#include "util/Common.hpp"
#include "Formats.hpp"
namespace xwim {
class Archiver {
virtual void compress(std::set<std::filesystem::path> ins,
std::filesystem::path archive_out) = 0;
virtual void extract(std::filesystem::path archive_in,
std::filesystem::path out) = 0;
virtual ~Archiver() = default;
class LibArchiver : public Archiver {
void compress(std::set<std::filesystem::path> ins,
std::filesystem::path archive_out);
void extract(std::filesystem::path archive_in, std::filesystem::path out);
std::filesystem::path archive_extension(const std::filesystem::path& path);
std::filesystem::path strip_archive_extension(const std::filesystem::path& path);
std::filesystem::path default_archive(const std::filesystem::path& base);
Format parse_format(const std::filesystem::path& path);
bool can_handle_archive(const std::filesystem::path& path);
std::unique_ptr<Archiver> make_archiver(const std::string& archive_name);
} // namespace xwim

View file

@ -1,49 +0,0 @@
#pragma once
namespace xwim {
using namespace std;
// Invariant:
// `extensions_format` defines a surjection from `format_extensions`
// to `Formats`
enum class Format {
const set<string> format_extensions{
// tar formats see:
/* bzip2 */ ".tar.bz2", ".tb2", ".tbz", ".tbz2", ".tz2",
/* gzip */ ".tar.gz", ".taz", ".tgz",
/* lzip */ ".tar.lz",
/* xz */ ".tar.xz", ".txz",
/* compress */ ".tar.Z", ".tZ", ".taZ",
/* zstd */ ".tar.zst", ".tzst",
/* zip */ ".zip"
const map<set<string>, Format> extensions_format{
{{".tar.bz2", ".tb2", ".tbz", ".tbz2", ".tz2"}, Format::TAR_BZIP2},
{{".tar.gz", ".taz", ".tgz"}, Format::TAR_GZIP},
{{".tar.lz"}, Format::TAR_LZIP},
{{".tar.xz", ".txz"}, Format::TAR_XZ},
{{".tar.Z", ".tZ", ".taZ"}, Format::TAR_COMPRESS},
{{".tar.zst", ".tzst"}, Format::TAR_ZSTD},
{{".zip"}, Format::ZIP}
inline Format find_extension_format(const string& ext) {
for(auto ef: extensions_format) {
auto f = ef.first.find(ext);
if(f != ef.first.end()) {
return ef.second;
return Format::UNKNOWN;

View file

@ -1,226 +0,0 @@
#include "UserIntent.hpp"
#include <spdlog/spdlog.h>
#include <algorithm>
#include <filesystem>
#include "Archiver.hpp"
namespace xwim {
unique_ptr<UserIntent> make_compress_intent(const UserOpt &userOpt) {
if (userOpt.paths.size() == 1) {
return make_unique<CompressSingleIntent>(
CompressSingleIntent{*userOpt.paths.begin(), userOpt.out});
if (!userOpt.out.has_value()) {
throw XwimError("Cannot guess output for multiple targets");
return make_unique<CompressManyIntent>(
CompressManyIntent{userOpt.paths, userOpt.out.value()});
unique_ptr<UserIntent> make_extract_intent(const UserOpt &userOpt) {
for (const path &p : userOpt.paths) {
if (!can_handle_archive(p)) {
throw XwimError("Cannot extract path {}", p);
return make_unique<ExtractIntent>(ExtractIntent{userOpt.paths, userOpt.out});
unique_ptr<UserIntent> try_infer_compress_intent(const UserOpt &userOpt) {
if (!userOpt.out.has_value()) {
spdlog::debug("No <out> provided");
if (userOpt.paths.size() != 1) {
"Not a single-path compression. Cannot guess <out> for many-path "
return nullptr;
spdlog::debug("Only one <path> provided. Assume single-path compression.");
return make_unique<CompressSingleIntent>(
CompressSingleIntent{*userOpt.paths.begin(), userOpt.out});
spdlog::debug("<out> provided: {}", userOpt.out.value());
if (can_handle_archive(userOpt.out.value())) {
spdlog::debug("{} given and a known archive format, assume compression",
return make_compress_intent(userOpt);
"Cannot compress multiple paths without a user-provided output archive");
return nullptr;
unique_ptr<UserIntent> try_infer_extract_intent(const UserOpt &userOpt) {
bool can_extract_all =
std::all_of(userOpt.paths.begin(), userOpt.paths.end(),
[](const path &path) { return can_handle_archive(path); });
if (!can_extract_all) {
"Cannot extract all provided <paths>. Assume this is not an "
for (const path &p : userOpt.paths) {
if (!can_handle_archive(p)) {
spdlog::debug("Cannot handle {}", p);
return nullptr;
if (userOpt.out.has_value() && can_handle_archive(userOpt.out.value())) {
"Could extract all provided <paths>. But also {} looks like an "
"archive. Ambiguous intent. Assume this is not an extraction.",
return nullptr;
"Could extract all provided <paths>. But also <out> looks like an "
"archive. Ambiguous intent. Assume this is not an extraction.");
return make_extract_intent(userOpt);
unique_ptr<UserIntent> make_intent(const UserOpt &userOpt) {
if (userOpt.wants_compress() && userOpt.wants_extract()) {
throw XwimError("Cannot compress and extract simultaneously");
if (userOpt.paths.empty()) {
throw XwimError("No input given...");
// explicitly specified intent
if (userOpt.wants_compress()) return make_compress_intent(userOpt);
if (userOpt.wants_extract()) return make_extract_intent(userOpt);
spdlog::info("Intent not explicitly provided, trying to infer intent");
if (auto intent = try_infer_extract_intent(userOpt)) {
spdlog::info("Extraction intent inferred");
return intent;
spdlog::info("Cannot infer extraction intent");
if (auto intent = try_infer_compress_intent(userOpt)) {
spdlog::info("Compression intent inferred");
return intent;
spdlog::info("Cannot infer compression intent");
throw XwimError("Cannot guess intent");
void ExtractIntent::dwim_reparent(const path &out) {
// move extraction if extraction resulted in only one entry and that entries
// name is already the stripped archive name, i.e. reduce unnecessary nesting
auto dit = std::filesystem::directory_iterator(out);
auto dit_path = dit->path();
if (dit == std::filesystem::directory_iterator()) {
"Cannot flatten extraction folder: extraction folder is empty");
if (!is_directory(dit_path)) {
spdlog::debug("Cannot flatten extraction folder: {} is not a directory",
if (next(dit) != std::filesystem::directory_iterator()) {
spdlog::debug("Cannot flatten extraction folder: multiple items extracted");
if (!std::filesystem::equivalent(dit_path.filename(), out.filename())) {
"Cannot flatten extraction folder: archive entry differs from archive "
"name [extraction folder: {}, archive entry: {}]",
out.filename(), dit_path.filename());
spdlog::debug("Output folder [{}] is equivalent to archive entry [{}]", out,
spdlog::info("Flattening extraction folder");
int i = rand_int(0, 100000);
path tmp_out = path{out};
tmp_out.concat(fmt::format(".xwim{}", i));
spdlog::debug("Move {} to {}", dit_path, tmp_out);
std::filesystem::rename(dit_path, tmp_out);
spdlog::debug("Remove parent path {}", out);
spdlog::debug("Moving {} to {}", tmp_out, out);
std::filesystem::rename(tmp_out, out);
path ExtractIntent::out_path(const path &p) {
if (!this->out.has_value()) {
// not out path given, create from archive name
path out = std::filesystem::current_path() / strip_archive_extension(p);
return out;
if (this->archives.size() == 1) {
// out given and only one archive to extract, just extract into `out`
return this->out.value();
// out given and multiple archives to extract, create subfolder
// for each archive
path out = this->out.value() / strip_archive_extension(p);
return out;
void ExtractIntent::execute() {
for (const path &p : this->archives) {
std::unique_ptr<Archiver> archiver = make_archiver(p);
path out = this->out_path(p);
archiver->extract(p, out);
path CompressSingleIntent::out_path() {
if (this->out.has_value()) {
if (!can_handle_archive(this->out.value())) {
throw XwimError("Unknown archive format {}", this->out.value());
return this->out.value();
return default_archive(strip_archive_extension(this->in).stem());
void CompressSingleIntent::execute() {
path out = this->out_path();
unique_ptr<Archiver> archiver = make_archiver(out);
set<path> ins{this->in};
archiver->compress(ins, out);
void CompressManyIntent::execute() {
if (!can_handle_archive(this->out)) {
throw XwimError("Unknown archive format {}", this->out);
unique_ptr<Archiver> archiver = make_archiver(this->out);
archiver->compress(this->in_paths, this->out);
} // namespace xwim

View file

@ -1,93 +0,0 @@
#pragma once
#include <optional>
#include <set>
#include "util/Common.hpp"
#include "UserOpt.hpp"
namespace xwim {
using namespace std;
using std::filesystem::path;
class UserIntent {
virtual void execute() = 0;
virtual ~UserIntent() = default;
/* Factory method to construct a UserIntent which implements `execute()` */
unique_ptr<UserIntent> make_intent(const UserOpt& userOpt);
* Extraction intent
* Extracts one or multiple archives. Optionally extracts them to given `out` folder. Otherwise extracts them to the
* current working directory.
class ExtractIntent: public UserIntent {
set<path> archives;
optional<path> out;
void dwim_reparent(const path& out);
path out_path(const path& p);
ExtractIntent(set<path> archives, optional<path> out): archives(archives), out(out) {};
~ExtractIntent() override = default;
void execute() override;
* Compress intent for a single file or folder.
* Compresses a single path which may be a file or a folder.
* No `out` path given:
* - derives the archive name from the input path
* - uses the default archive format for the platform
* `out` path given:
* - `out` path must be a path with a valid archive name (including extension)
* - tries to compress the input to the out archive
* - if the `out` base name is different from the input base name, puts the input into a new folder
* with base name inside the archive (archive base name is always the name of the archive content)
class CompressSingleIntent : public UserIntent {
path in;
optional<path> out;
path out_path();
CompressSingleIntent(path in, optional<path> out) : UserIntent(), in(in), out(out) {};
~CompressSingleIntent() override = default;
void execute() override;
* Compress intent for multiple files and/or folders.
* Compresses multiple files and/or folders to a single archive as given by the `out` path. Since `out` cannot be
* guessed from the input in this case it is mandatory.
* A new, single root folder with base name equal to base name of the `out` archive is created inside the archive. All
* input files are put into this root folder.
class CompressManyIntent: public UserIntent {
set<path> in_paths;
path out;
CompressManyIntent(set<path> in_paths, path out): UserIntent(), in_paths(in_paths), out(out) {};
~CompressManyIntent() override = default;
void execute() override;
} // namespace xwim

View file

@ -1,52 +0,0 @@
#include "UserOpt.hpp"
#include <tclap/CmdLine.h>
template <>
struct TCLAP::ArgTraits<std::filesystem::path> {
// We use `operator=` here for path construction
// because `operator>>` (`ValueLike`) causes a split at
// whitespace
typedef StringLike ValueCategory;
namespace xwim {
UserOpt::UserOpt(int argc, char** argv) {
// clang-format off
TCLAP::CmdLine cmd
{"xwim - Do What I Mean Extractor", ' ', "0.3.0"};
TCLAP::SwitchArg arg_compress
{"c", "compress", "Compress <files>", cmd, false};
TCLAP::SwitchArg arg_extract
{"x", "extract", "Extract <file>", cmd, false};
TCLAP::SwitchArg arg_noninteractive
{"i", "non-interactive", "Non-interactive, fail on ambiguity", cmd, false};
TCLAP::ValueArg<fs::path> arg_outfile
{"o", "out", "Out <file-or-path>", false, fs::path{}, "A path on the filesystem", cmd};
TCLAP::MultiSwitchArg arg_verbose
{"v", "verbose", "Verbosity level", cmd, 0};
TCLAP::UnlabeledMultiArg<fs::path> arg_paths
{"files", "Archive(s) to extract or file(s) to compress", true, "A path on the filesystem", cmd};
// clang-format on
cmd.parse(argc, argv);
if (arg_compress.isSet()) this->compress = arg_compress.getValue();
if (arg_extract.isSet()) this->extract = arg_extract.getValue();
if (arg_outfile.isSet()) this->out = arg_outfile.getValue();
this->verbosity = arg_verbose.getValue();
this->interactive = !arg_noninteractive.getValue();
if (arg_paths.isSet()) {
this->paths =
set<fs::path>{arg_paths.getValue().begin(), arg_paths.getValue().end()};
} // namespace xwim

View file

@ -1,31 +0,0 @@
#pragma once
#include <optional>
#include <set>
#include "util/Common.hpp"
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
struct UserOpt {
optional<bool> compress;
optional<bool> extract;
bool interactive;
int verbosity;
std::optional<fs::path> out;
std::set<fs::path> paths;
UserOpt(int argc, char** argv);
bool wants_compress() const {
return this->compress.has_value() && this->compress.value();
bool wants_extract() const {
return this->extract.has_value() && this->extract.value();
} // namespace xwim

src/archive.cpp Normal file
View file

@ -0,0 +1,121 @@
#include <spdlog/spdlog.h>
#include <sys/stat.h>
namespace logger = spdlog;
#include <archive.h>
#include <archive_entry.h>
#include <algorithm>
#include <filesystem>
#include <iostream>
#include <stdexcept>
#include "archive_sys.hpp"
#include "archive.hpp"
#include "spec.hpp"
#include "fileformats.hpp"
namespace xwim {
static void _spec_is_root_filename(ArchiveSpec* spec,
ArchiveEntryView entry,
std::filesystem::path* filepath) {
auto entry_path = entry.path();
auto norm_stem = filepath->filename();
norm_stem = xwim::stem(norm_stem);
if (*entry_path.begin() != norm_stem) {
logger::debug("Archive root does not match archive name");
spec->is_root_filename = false;
} else {
logger::debug("Archive root matches archive name");
spec->is_root_filename = true;
logger::debug("\t-> Archive root: {}", entry_path.begin()->string());
logger::debug("\t-> Archive stem: {}", norm_stem.string());
static void _spec_is_root_dir(ArchiveSpec* spec, ArchiveEntryView entry) {
if (entry.is_directory()) {
logger::debug("Archive root is directory");
spec->is_root_dir = true;
} else {
logger::debug("Archive root is not a directory");
spec->is_root_dir = false;
logger::debug("\t-> Archive mode_t: {0:o}", entry.file_type());
static void _spec_has_single_root(ArchiveSpec* spec,
ArchiveEntryView first_entry,
ArchiveReaderSys& archive_reader) {
std::filesystem::path first_entry_root = *(first_entry.path().begin());
logger::trace("Testing roots");
spec->has_single_root = true;
while (archive_reader.advance()) {
ArchiveEntryView entry = archive_reader.cur();
auto next_entry = entry.path();
logger::trace("Path: {}, Root: {}", next_entry.string(),
if (first_entry_root != *next_entry.begin()) {
logger::debug("Archive has multiple roots");
logger::debug("\t-> Archive root I: {}",
logger::debug("\t-> Archive root II: {}", next_entry.begin()->string());
spec->has_single_root = false;
if (spec->has_single_root)
logger::debug("Archive has single root: {}", first_entry_root.string());
Archive::Archive(std::filesystem::path path) : path{path} {}
ArchiveSpec Archive::check() {
logger::trace("Creating archive spec for {}", this->path.string());
ArchiveReaderSys archive_reader {this->path};
ArchiveSpec archive_spec;
if (!archive_reader.advance()) { // can't advance even once, archive is empty
logger::debug("Archive is empty");
return {false, false, false};
ArchiveEntryView first_entry = archive_reader.cur();
logger::trace("Found archive entry {}", first_entry.path_name());
_spec_is_root_filename(&archive_spec, first_entry, &this->path);
_spec_is_root_dir(&archive_spec, first_entry);
_spec_has_single_root(&archive_spec, first_entry, archive_reader);
return archive_spec;
void Archive::extract(ExtractSpec extract_spec) {
std::filesystem::path abs_path = std::filesystem::absolute(this->path);
std::unique_ptr<ArchiveExtractorSys> extractor;
if(extract_spec.make_dir) {
logger::trace("Creating extract directory {}", extract_spec.dirname.string());
extractor = std::unique_ptr<ArchiveExtractorSys>(new ArchiveExtractorSys{extract_spec.dirname});
} else {
extractor = std::unique_ptr<ArchiveExtractorSys>(new ArchiveExtractorSys{});
ArchiveReaderSys reader{abs_path};
} // namespace xwim

src/archive.hpp Normal file
View file

@ -0,0 +1,50 @@
#pragma once
#include <archive.h>
#include <fmt/format.h>
#include <filesystem>
#include <stdexcept>
#include <string>
#include <string_view>
#include "spec.hpp"
namespace xwim {
/** Class for interacting with archives */
class Archive {
std::filesystem::path path;
explicit Archive(std::filesystem::path path);
/** Generate an ArchiveSpec by analysing the archive at `path`
* @returns ArchiveSpec for the archive
ArchiveSpec check();
/** Extract the archive at `path` according to given ExtractSpec */
void extract(ExtractSpec extract_spec);
class ArchiveException : public std::exception {
std::string _what;
ArchiveException(std::string what, archive* archive) {
if (archive_error_string(archive)) {
_what = fmt::format("{}: {}", what, archive_error_string(archive));
} else {
_what = fmt::format("{}", what);
virtual const char* what() const noexcept
{ return this->_what.c_str(); }
} // namespace xwim

src/archive_sys.cpp Normal file
View file

@ -0,0 +1,142 @@
#include <archive_entry.h>
#include <spdlog/spdlog.h>
namespace logger = spdlog;
#include "archive_sys.hpp"
#include <archive.h>
#include <filesystem>
#include <memory>
bool xwim::ArchiveEntryView::is_empty() {
return (this->ae == nullptr);
std::string xwim::ArchiveEntryView::path_name() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return archive_entry_pathname(this->ae);
std::filesystem::path xwim::ArchiveEntryView::path() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return std::filesystem::path{this->path_name()};
mode_t xwim::ArchiveEntryView::file_type() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return archive_entry_filetype(this->ae);
bool xwim::ArchiveEntryView::is_directory() {
return S_ISDIR(this->file_type());
xwim::ArchiveReaderSys::ArchiveReaderSys(std::filesystem::path& path) {
int r; // libarchive error handling
logger::trace("Setting up archive reader");
this->ar = archive_read_new();
logger::trace("Reading archive at {}", path.c_str());
r = archive_read_open_filename(this->ar, path.c_str(), 10240);
if (r != ARCHIVE_OK)
throw ArchiveSysException{"Could not open archive file", this->ar};
logger::trace("Archive read succesfully");
xwim::ArchiveReaderSys::~ArchiveReaderSys() {
logger::trace("Destructing ArchiveReaderSys");
if (this->ar) archive_read_free(this->ar);
bool xwim::ArchiveReaderSys::advance() {
int r; // libarchive error handling
logger::trace("Advancing reader to next archive entry");
r = archive_read_next_header(this->ar, &this->ae);
if (r == ARCHIVE_EOF) { this->ae = nullptr; return false; }
if (r != ARCHIVE_OK) throw(ArchiveSysException{"Could not list archive", this->ar});
logger::trace("Got entry {}", archive_entry_pathname(ae));
return true;
const xwim::ArchiveEntryView xwim::ArchiveReaderSys::cur() {
return ArchiveEntryView{this->ae};
xwim::ArchiveExtractorSys::ArchiveExtractorSys(std::filesystem::path& root) {
logger::trace("Constructing ArchiveExtractorSys with path {}", root.string());
this->writer = archive_write_disk_new();
logger::trace("Constructed ArchiveExtractorSys at {:p}", (void*) this->writer);
xwim::ArchiveExtractorSys::ArchiveExtractorSys() {
logger::trace("Construction ArchiveExtractorSys without root");
this->writer = archive_write_disk_new();
logger::trace("Constructed ArchiveExtractorSys at {:p}", (void*) this->writer);
void xwim::ArchiveExtractorSys::extract_all(xwim::ArchiveReaderSys& reader) {
while(reader.advance()) {
// forward declared
static int copy_data(struct archive* ar, struct archive* aw);
void xwim::ArchiveExtractorSys::extract_entry(xwim::ArchiveReaderSys& reader) {
int r;
r = archive_write_header(this->writer,;
if (r != ARCHIVE_OK) {
throw(ArchiveSysException("Could not extract entry",;
r = copy_data(, this->writer);
if (r != ARCHIVE_OK) {
throw(ArchiveSysException("Could not extract entry",;
logger::trace("Destructing ArchiveExtractorSys at {:p}", (void*) this->writer);
if(this->writer) {
static int copy_data(struct archive* ar, struct archive* aw) {
int r;
const void* buff;
size_t size;
int64_t offset;
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
if (r != ARCHIVE_OK) {
return (r);
r = archive_write_data_block(aw, buff, size, offset);
if (r != ARCHIVE_OK) {
return (r);

src/archive_sys.hpp Normal file
View file

@ -0,0 +1,101 @@
#pragma once
#include <archive.h>
#include <filesystem>
#include <memory>
#include <fmt/format.h>
namespace xwim {
/** A view into an archive entry
* The view is non-owning and the caller must guarantee
* that the parent archive entry is valid when the view
* is accessed.
class ArchiveEntryView {
archive_entry* ae;
ArchiveEntryView() = default;
ArchiveEntryView(archive_entry* entry) : ae{entry} {}
bool is_empty();
std::string path_name();
std::filesystem::path path();
mode_t file_type();
bool is_directory();
/** A reader for archive files
* Shim for `libarchive`. Iterates through
* entries of an archive with `next()`
class ArchiveReaderSys {
archive* ar;
archive_entry* ae;
friend class ArchiveExtractorSys;
ArchiveReaderSys(std::filesystem::path& path);
/** Advances the internal entry pointer
* @return true if the pointer advanced to the next entry
* false if the end of the archive was reached
bool advance();
/** Returns a non-owning view of the current entry
* ArchiveEntryView is a non-owning view of the currently
* active entry in this reader. A retrieved archive entry
* may not be used after another call to advance in the
* same reader.
* @return a view to the archive entry this reader currently
* points to
const ArchiveEntryView cur();
/** A extractor for archive files
* Shim for `libarchive`.
class ArchiveExtractorSys {
archive* writer;
ArchiveExtractorSys(std::filesystem::path& root);
void extract_all(ArchiveReaderSys& reader);
void extract_entry(ArchiveReaderSys& reader);
class ArchiveSysException : public std::exception {
std::string _what;
ArchiveSysException(std::string what, archive* archive) {
if (archive_error_string(archive)) {
_what = fmt::format("{}: {}", what, archive_error_string(archive));
} else {
_what = fmt::format("{}", what);
ArchiveSysException(std::string what) { _what = fmt::format("{}", what); }
virtual const char* what() const noexcept { return this->_what.c_str(); }
} // namespace xwim

View file

@ -1,167 +0,0 @@
#include <archive.h>
#include <archive_entry.h>
#include <fcntl.h>
#include <fmt/core.h>
#include <spdlog/spdlog.h>
#include <sys/stat.h>
#include <filesystem>
#include <iostream>
#include <memory>
#include "../Archiver.hpp"
#include "../util/Common.hpp"
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
static int copy_data(shared_ptr<archive> reader, shared_ptr<archive> writer);
void LibArchiver::compress(set<fs::path> ins, fs::path archive_out) {
spdlog::debug("Compressing to {}", archive_out);
int r; // libarchive error handling
static char buff[16384]; // read buffer
// cannot use unique_ptr here since unique_ptr requires a
// complete type. `archive` is forward declared only.
shared_ptr<archive> writer;
writer = shared_ptr<archive>(archive_write_new(), archive_write_free);
// archive_write_add_filter_gzip(writer.get());
// archive_write_set_format_pax_restricted(writer.get());
archive_write_set_format_filter_by_ext(writer.get(), archive_out.c_str());
archive_write_open_filename(writer.get(), archive_out.c_str());
shared_ptr<archive> reader;
shared_ptr<archive_entry> entry = shared_ptr<archive_entry>(archive_entry_new(), archive_entry_free);
for (auto in : ins) {
spdlog::debug("Compressing {}", in);
reader = shared_ptr<archive>(archive_read_disk_new(), archive_read_free);
r = archive_read_disk_open(reader.get(), in.c_str());
if (r != ARCHIVE_OK) {
throw XwimError{"Failed opening {}. {}", in,
for (;;) {
r = archive_read_next_header2(reader.get(), entry.get());
if (r == ARCHIVE_EOF) break;
if (r != ARCHIVE_OK) {
throw XwimError{"Failed compressing archive entry. {}",
spdlog::debug("Adding {} to archive", archive_entry_pathname(entry.get()));
r = archive_write_header(writer.get(), entry.get());
if (r != ARCHIVE_OK) {
throw XwimError{"Failed writing archive entry. {}",
/* For now, we use a simpler loop to copy data
* into the target archive. */
int fd = open(archive_entry_sourcepath(entry.get()), O_RDONLY);
ssize_t len = read(fd, buff, sizeof(buff));
while (len > 0) {
archive_write_data(writer.get(), buff, len);
len = read(fd, buff, sizeof(buff));
void LibArchiver::extract(fs::path archive_in, fs::path out) {
spdlog::debug("Extracting archive {} to {}", archive_in, out);
int r; // libarchive error handling
// cannot use unique_ptr here since unique_ptr requires a
// complete type. `archive` is forward declared only.
shared_ptr<archive> reader;
reader = shared_ptr<archive>(archive_read_new(), archive_read_free);
r = archive_read_open_filename(reader.get(), archive_in.c_str(), 10240);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed opening archive {}. {}", archive_in,
shared_ptr<archive> writer;
writer = shared_ptr<archive>(archive_write_disk_new(), archive_write_free);
fs::path cur_path = fs::current_path();
archive_entry *entry;
for (;;) {
r = archive_read_next_header(reader.get(), &entry);
if (r == ARCHIVE_EOF) break;
if (r != ARCHIVE_OK) {
throw XwimError{"Failed extracting archive entry. {}",
r = archive_write_header(writer.get(), entry);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed writing archive entry header. {}",
if (archive_entry_size(entry) > 0) {
r = copy_data(reader, writer);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed writing archive entry data. {}",
r = archive_write_finish_entry(writer.get());
if (r != ARCHIVE_OK) {
throw XwimError{"Failed finishing archive entry data. {}",
if (r != ARCHIVE_OK && r != ARCHIVE_EOF) {
throw XwimError{"Failed extracting archive {}. {}", archive_in,
static int copy_data(shared_ptr<archive> reader, shared_ptr<archive> writer) {
int r;
const void *buff;
size_t size;
int64_t offset;
for (;;) {
r = archive_read_data_block(reader.get(), &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
if (r != ARCHIVE_OK) {
return (r);
r = archive_write_data_block(writer.get(), buff, size, offset);
if (r != ARCHIVE_OK) {
return (r);
} // namespace xwim

src/fileformats.hpp Normal file
View file

@ -0,0 +1,69 @@
/** @file fileformats.hpp
* @brief Handle archive extensions
#pragma once
#include <spdlog/spdlog.h>
#include <optional>
namespace logger = spdlog;
#include <filesystem>
#include <set>
#include <string>
namespace xwim {
/** Common archive formats understood by xwim
* The underlying libarchive backend retrieves format information by a process
* called `bidding`. Hence, this information is mainly used to strip extensions.
* Stripping extensions via `std::filesystem::path` does not work reliably since
* it gets easily confused by dots in the regular file name.
const std::set<std::string> fileformats{".7z", ".7zip", ".jar", ".tgz",
".bz2", ".bzip2", ".gz", ".gzip",
".rar", ".tar", ".xz", ".zip"};
/** Strip archive extensions from a path
* @returns Base filename without archive extensions
inline std::filesystem::path stem(const std::filesystem::path& path) {
std::filesystem::path p_stem{path};
logger::trace("Stemming {}", p_stem.string());
p_stem = p_stem.filename();
while (fileformats.find(p_stem.extension().string()) != fileformats.end()) {
p_stem = p_stem.stem();
logger::trace("Stemmed to {}", p_stem.string());
logger::trace("Finished stemming {}", p_stem.string());
return p_stem;
/** Get the archive extension of a path.
* The archive extension may be a combination of supported fileformats in which
* case all of them are returned.
* @returns Archive extension of the archive or path() if no (known) extension
* exists.
inline std::filesystem::path ext(const std::filesystem::path& path) {
std::filesystem::path p_ext{path};
logger::trace("Extracting extension of {}", p_ext.string());
std::filesystem::path p_ext_collector;
while (fileformats.find(p_ext.extension().string()) != fileformats.end()) {
// path extension() const
p_ext_collector = p_ext.extension().concat(p_ext_collector.string());
return p_ext_collector;
} // namespace xwim

View file

@ -1,26 +1,56 @@
#include <spdlog/common.h> #include <spdlog/common.h>
#include <spdlog/spdlog.h>
#include <cstdlib> #include <cstdlib>
#include <filesystem> namespace logger = spdlog;
#include "UserIntent.hpp" #include <iostream>
#include "UserOpt.hpp" #include <ostream>
#include "util/Common.hpp" #include <string>
#include "util/Log.hpp" #include <list>
using namespace xwim; #include "util/log.hpp"
using namespace std; #include "util/argparse.hpp"
#include "archive.hpp"
#include "spec.hpp"
#include "fileformats.hpp"
int main(int argc, char** argv) { int main(int argc, char** argv) {
log::init(); xwim::log::init();
UserOpt user_opt = UserOpt{argc, argv};
log::init(user_opt.verbosity); xwim::argparse::XwimPath xwim_path;
try { try {
unique_ptr<UserIntent> user_intent = make_intent(user_opt); xwim_path = xwim::argparse::parse(argc, argv);
user_intent->execute(); } catch (xwim::argparse::ArgParseException& ex) {
} catch (XwimError& e) { logger::error("{}\n", ex.what());
spdlog::error(e.what()); std::cout << xwim::argparse::usage();
} }
try {
xwim::Archive archive{xwim_path.path()};
xwim::ArchiveSpec archive_spec = archive.check();
logger::info("{}", archive_spec);
xwim::ExtractSpec extract_spec{};
if (!archive_spec.has_single_root || !archive_spec.is_root_filename) {
extract_spec.make_dir = true;
std::filesystem::path stem = xwim::stem(xwim_path.path());
extract_spec.dirname = stem;
if (archive_spec.has_subarchive) {
extract_spec.extract_subarchive = true;
logger::info("{}", extract_spec);
} catch (xwim::ArchiveException& ae) {
logger::error("{}", ae.what());

View file

@ -1,12 +1,10 @@
xwim_src = ['main.cpp', 'Archiver.cpp', 'UserOpt.cpp', 'UserIntent.cpp'] xwim_src = ['main.cpp',
xwim_archiver = ['archiver/LibArchiver.cpp'] xwim_libs = [dependency('libarchive', required: true),
dependency('fmt', required: true),
dependency('spdlog', required: true)]
is_static = get_option('default_library')=='static' executable('xwim', xwim_src, dependencies: xwim_libs)
xwim_libs = [dependency('libarchive', required: true, static: is_static),
dependency('spdlog', required: true, static: is_static),
dependency('fmt', required: true, static: is_static),
dependency('tclap', required: true, static: is_static)]
executable('xwim', xwim_src+xwim_archiver, dependencies: xwim_libs)

src/spec.hpp Normal file
View file

@ -0,0 +1,80 @@
#pragma once
#include <archive.h>
#include <fmt/format.h>
#include <filesystem>
#include <memory>
namespace xwim {
/** Properties of an archive
* These properties can be retrieved by analyzing the
* archive. There is no outside-knowledge. All information
* is in the archive.
struct ArchiveSpec {
bool has_single_root = false; /** There is only a single file xor a single
folder at the archive's root */
bool is_root_filename = false; /** the name of the (single) root is the same
as the stemmed archive file name. Cannot be
true if `has_single_root` is false */
bool is_root_dir = false; /** The (single) root is a folder. Cnnot be true if
`has_single_root` is false */
bool has_subarchive = false; /** Whether the archive contains sub-archives */
/** Properties influencing the extraction process
* These properties can be set to influence the extraction
* process accordingly.
struct ExtractSpec {
bool make_dir = false; /** Create a new directory for extraction at `dirname` */
std::filesystem::path dirname{}; /** The path to a directory for extraction */
bool extract_subarchive = false; /** Recursively extract sub-archives */
} // namespace xwim
#if FMT_VERSION < 50300
typedef fmt::basic_parse_context<char> format_parse_context;
template <>
struct fmt::formatter<xwim::ArchiveSpec> {
constexpr auto parse(format_parse_context & ctx) {
return ctx.begin();
template <typename FormatContext>
auto format(const xwim::ArchiveSpec& spec, FormatContext& ctx) {
return format_to(ctx.out(),
" .has_single_root={},"
" .is_root_filename={}"
" .is_root_dir={}"
" .has_subarchive={}"
" ]",
spec.has_single_root, spec.is_root_filename,
spec.is_root_dir, spec.has_subarchive);
template <>
struct fmt::formatter<xwim::ExtractSpec> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const xwim::ExtractSpec& spec, FormatContext& ctx) {
return format_to(ctx.out(),
" .make_dir={},"
" .dirname={}"
" .extract_subarchive={}"
" ]",
spec.make_dir, spec.dirname.string(),

View file

@ -1,30 +0,0 @@
#pragma once
#include <fmt/core.h>
#include <filesystem>
#include <string>
#include <random>
template <>
struct fmt::formatter<std::filesystem::path> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const std::filesystem::path& path, FormatContext& ctx) {
return format_to(ctx.out(), path.string());
class XwimError : public std::runtime_error {
template <typename... Args>
XwimError(const std::string& fmt, const Args... args)
: std::runtime_error(fmt::format(fmt, args...)){}
inline int rand_int(int from, int to) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> distrib(from, to);
return distrib(gen);

src/util/argparse.cpp Normal file
View file

@ -0,0 +1,29 @@
#include "argparse.hpp"
namespace xwim {
namespace argparse {
XwimPath parse(int argc, char** argv) {
return XwimPath{argc, argv};
// contructs XwimPath{} first so that destructurs may running
XwimPath::XwimPath(int argc, char** argv) : XwimPath{} {
if (argc < 2) throw ArgParseException{"No argument provided"};
if (argc > 2) throw ArgParseException{"Too many arguments provided"};
this->_path = std::filesystem::path{argv[1]};
// Remove when compression in place
if(!is_archive()) throw ArgParseException{"Not a known archive format"};
bool XwimPath::is_archive() {
return !xwim::ext(_path).empty();
std::filesystem::path XwimPath::path() const {
return std::filesystem::path{_path};

src/util/argparse.hpp Normal file
View file

@ -0,0 +1,79 @@
#pragma once
#include <filesystem>
#include <ostream>
#include <sstream>
#include "../fileformats.hpp"
namespace xwim {
* xwim allows for
* 1. an archive
* 2. a file or folder
* In case of (1) the archive will be extracted according to the xwim
* do-what-i-mean rules.
* In case of (2) the file or folder will be compressed into a "platform native"
* format, i.e. what appears to be the most widely used format on that platform.
* In case of unix this is tar.gz. In case of windows this is zip. The archive
* gets the same name as the file or folder and a proper extension.
* A list of files or folders is unsupported as it would be too ambigious to
* choose a name for the archive. A list of archives is unsupported for
* consistency reasons. Any mixture is unsupported as it would be too ambigious
* what the user wants. This is subject to change in the future.
namespace argparse {
class XwimPath {
std::filesystem::path _path;
XwimPath() : _path{} {};
XwimPath(int argc, char** argv);
bool is_archive();
std::filesystem::path path() const;
class ArgParseException : public std::exception {
std::string _what;
ArgParseException(std::string what) : _what{what} {};
virtual const char* what() const noexcept { return this->_what.c_str(); }
XwimPath parse(int argc, char** argv);
inline std::string usage() {
std::stringstream s;
s << "USAGE:"
<< "\t xwim <path>\n"
<< "\n"
<< "PARAMS:" << std::left << std::setfill('.') << std::setw(10)
<< "\t path "
<< " Archive\n"
<< "\n"
<< "FORMATS:\n"
<< "\t .7z, .7zip .jar, .tgz, .bz2, .bzip2\n"
<< "\t .gz, .gzip, .rar, .tar, .xz, .zip\n"
<< "\n"
<< "EXAMPLES:\n"
<< "\t Extract archive archive.tar.gz:\n"
<< "\t xwim archive.tar.gz\n"
<< std::endl;
return s.str();
} // namespace argparse
} // namespace xwim

View file

@ -1,15 +1,17 @@
#pragma once #pragma once
#include <spdlog/common.h> #include <spdlog/common.h>
#include <spdlog/spdlog.h> #include <spdlog/spdlog.h>
#include <cstdlib> #include <cstdlib>
#ifdef NDEBUG #ifdef NDEBUG
#else #else
#endif #endif
namespace xwim::log { namespace xwim {
namespace log {
/** /**
* Get log level from XWIM_LOGLEVEL environment variable. * Get log level from XWIM_LOGLEVEL environment variable.
@ -58,27 +60,7 @@ spdlog::level::level_enum _init_from_compile() {
* The determined level is then set for the default logger via * The determined level is then set for the default logger via
* `spdlog::set_level`. * `spdlog::set_level`.
*/ */
void init(int verbosity = -1, void init(spdlog::level::level_enum level = spdlog::level::level_enum::off) {
spdlog::level::level_enum level = spdlog::level::level_enum::off) {
if (verbosity != -1) {
switch (verbosity) {
case 0:
case 1:
case 2:
case 3:
if (spdlog::level::level_enum::off != level) { if (spdlog::level::level_enum::off != level) {
spdlog::set_level(level); spdlog::set_level(level);
return; return;
@ -90,7 +72,8 @@ void init(int verbosity = -1,
return; return;
} }
spdlog::set_level(_init_from_compile()); return spdlog::set_level(_init_from_compile());
} }
} // namespace xwim::log } // namespace log
} // namespace xwim

test/archive_test.cpp Normal file
View file

@ -0,0 +1,11 @@
#include <gtest/gtest.h>
#include <archive.hpp>
#include <spec.hpp>
TEST(ArchiveTest, ArchiveSpecDetectsSingleRoot) {
xwim::Archive archive("test/archives/root.tar.gz");
xwim::ArchiveSpec spec = archive.check();

test/fileformats_test.cpp Normal file
View file

@ -0,0 +1,64 @@
#include <gtest/gtest.h>
#include <fileformats.hpp>
#include <string>
TEST(FileformatsTest, StemStripsSingleKnownExtension) {
std::filesystem::path archive_path {"/some/path/to/file.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file"});
TEST(FileformatsTest, StemStripsMultipleKnownExtensions) {
std::filesystem::path archive_path{"/some/path/to/file.tar.rar.gz.7z.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file"});
TEST(FileformatsTest, StemStripsOnlyKnownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.ukn.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file.ukn"});
TEST(FileformatsTest, StemStripsNothingWithoutKnownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.ukn"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file.ukn"});
TEST(FileformatsTest, StemStripsNothingWithoutExtension) {
std::filesystem::path archive_path{"/some/path/to/filerar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"filerar"});
TEST(FileExtTest, ExtGetsKnownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.rar"};
ASSERT_EQ(xwim::ext(archive_path), std::filesystem::path{".rar"});
TEST(FileExtTest, CombinedExtensionGetsAll) {
std::filesystem::path archive_path{"/some/path/to/file.tar.gz"};
ASSERT_EQ(xwim::ext(archive_path), std::filesystem::path{".tar.gz"});
TEST(FileExtTest, ExtEmptyForUnknownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.ukn"};
TEST(FileExtTest, CombinedExtensionGetsKnown) {
std::filesystem::path archive_path{"/some/path/to/file.ukn.tar.gz"};
ASSERT_EQ(xwim::ext(archive_path), std::filesystem::path{".tar.gz"});
TEST(FileExtTest, CombinedExtensionLastUnknownEmpty) {
std::filesystem::path archive_path{"/some/path/to/file.tar.gz.ukn"};

View file

@ -2,10 +2,22 @@
gtest_proj = subproject('gtest') gtest_proj = subproject('gtest')
gtest_dep = gtest_proj.get_variable('gtest_main_dep') gtest_dep = gtest_proj.get_variable('gtest_main_dep')
# subdir('archives') xwim_src = ['../src/archive.cpp',
user_opt_test_exe = executable('user_opt_test_exe', '../src/archive_sys.cpp']
sources: ['user_opt_test.cpp', '../src/UserOpt.cpp'],
include_directories: ['../src'],
dependencies: [gtest_dep])
test('user opt parsing test', user_opt_test_exe) subdir('archives')
archive_test_exe = executable('archive_test_exe',
sources: ['archive_test.cpp', xwim_src],
include_directories: ['../src'],
dependencies: [gtest_dep, xwim_libs])
test('archive test', archive_test_exe)
fileformats_test_exe = executable('fileformats_test_exe',
sources: ['fileformats_test.cpp', xwim_src],
include_directories: ['../src'],
dependencies: [gtest_dep, xwim_libs])
test('fileformats test', fileformats_test_exe)

View file

@ -1,90 +0,0 @@
#include <gtest/gtest-death-test.h>
#include "gtest/gtest.h"
#include <filesystem>
#include <string>
#include "UserOpt.hpp"
TEST(UserOpt, compress) {
using namespace xwim;
// clang-format off
char* args[] = {
// clang-format on
UserOpt uo = UserOpt{3, args};
TEST(UserOpt, exclusive_actions) {
using namespace xwim;
// clang-format off
char* args[] = {
// clang-format on
UserOpt uo = UserOpt{4, args};
TEST(UserOpt, whitespace_in_path) {
using namespace xwim;
// clang-format off
char* args[] = {
const_cast<char*>("/foo/bar baz/a file"),
// clang-format on
UserOpt uo = UserOpt{3, args};
ASSERT_TRUE(uo.paths.find(std::filesystem::path("/foo/bar baz/a file")) !=
TEST(UserOpt, mixed_output_and_paths) {
using namespace xwim;
// clang-format off
char* args[] = {
const_cast<char*>("/foo/bar baz/output"),
const_cast<char*>("/foo/bar baz/a path"),
const_cast<char*>("/foo/bar baz/another path"),
// clang-format on
UserOpt uo = UserOpt{5, args};
ASSERT_TRUE(uo.paths.find(std::filesystem::path("/foo/bar baz/a path")) !=
ASSERT_TRUE(uo.paths.find(std::filesystem::path("/foo/bar baz/another path")) !=
ASSERT_TRUE(uo.out == std::filesystem::path("/foo/bar baz/output"));
TEST(UserOpt, output_defaults_to_nullopt) {
using namespace xwim;
// clang-format off
char* args[] = {
// clang-format on
UserOpt uo = UserOpt{2, args};

