Compare commits

..

No commits in common. "master" and "dynamic-loglevel" have entirely different histories.

63 changed files with 715 additions and 1556 deletions

View file

@ -1,168 +1 @@
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: false
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: true
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentCaseLabels: true
IndentGotoLabels: true
IndentPPDirectives: None
IndentWidth: 2
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: Auto
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
UseCRLF: false
UseTab: Never
...
BasedOnStyle: Chromium

View file

@ -3,38 +3,17 @@ type: docker
name: default
steps:
- name: build-shared
image: arminfriedl/xwim-build:shared
- name: build
image: arminfriedl/xwim-build
commands:
- meson wrap install gtest || true
- meson target/shared
- ninja -C target/shared
- mv target/shared/src/xwim xwim-x86_64-glibc-linux-shared
- name: build-static
image: arminfriedl/xwim-build:static
commands:
- meson wrap install gtest || true
- meson --default-library=static target/static
- ninja -C target/static
- mv target/static/src/xwim xwim-x86_64-musl-linux-static
- name: publish-binaries
image: appleboy/drone-scp
settings:
host: friedl.net
username:
from_secret: deploy_user
password:
from_secret: deploy_password
port: 22
target: /var/services/dirlist/repo/cicd/xwim/${DRONE_COMMIT_SHA:0:8}/
source:
- xwim-x86_64-glibc-linux-shared
- xwim-x86_64-musl-linux-static
depends_on:
- build-shared
- build-static
- meson wrap install gtest
- meson build
- ninja -C build
- ninja -C build test && ninja -C build coverage
- echo "******** TEST LOGS ***********"
- cat build/meson-logs/testlog.txt
- echo "****** COVERAGE LOGS *********"
- cat build/meson-logs/coverage.txt
trigger:
event:
@ -47,34 +26,21 @@ type: docker
name: release
steps:
- name: build-shared
image: arminfriedl/xwim-build:shared
- name: build
image: arminfriedl/xwim-build
commands:
- meson wrap install gtest || true
- meson --buildtype=release target/shared
- ninja -C target/shared
- strip target/shared/src/xwim
- mkdir xwim-${DRONE_TAG}-x86_64-glibc-linux-shared
- mv target/shared/src/xwim xwim-${DRONE_TAG}-x86_64-glibc-linux-shared
- name: build-static
image: arminfriedl/xwim-build:static
commands:
- meson wrap install gtest || true
- meson --buildtype=release --default-library=static target/static
- ninja -C target/static
- strip target/static/src/xwim
- mkdir xwim-${DRONE_TAG}-x86_64-musl-linux-static
- mv target/static/src/xwim xwim-${DRONE_TAG}-x86_64-musl-linux-static
- meson wrap install gtest
- meson --buildtype=release build
- ninja -C build
- mkdir xwim-${DRONE_TAG}-x86_64-glibc-linux
- mv build/src/xwim xwim-${DRONE_TAG}-x86_64-glibc-linux
- name: package
image: arminfriedl/xwim-build
commands:
- tar czf xwim-${DRONE_TAG}-x86_64-glibc-linux-shared.tar.gz xwim-${DRONE_TAG}-x86_64-glibc-linux-shared/xwim
- tar czf xwim-${DRONE_TAG}-x86_64-musl-linux-static.tar.gz xwim-${DRONE_TAG}-x86_64-musl-linux-static/xwim
depends_on:
- build-shared
- build-static
- tar cjf xwim-${DRONE_TAG}-x86_64-glibc-linux.tar.bz2 xwim-${DRONE_TAG}-x86_64-glibc-linux/xwim
- tar czf xwim-${DRONE_TAG}-x86_64-glibc-linux.tar.gz xwim-${DRONE_TAG}-x86_64-glibc-linux/xwim
- zip -r xwim-${DRONE_TAG}-x86_64-glibc-linux.zip xwim-${DRONE_TAG}-x86_64-glibc-linux
- name: publish
image: plugins/gitea-release
@ -83,14 +49,13 @@ steps:
api_key:
from_secret: gitea_token
files:
- xwim-${DRONE_TAG}-x86_64-glibc-linux-shared.tar.gz
- xwim-${DRONE_TAG}-x86_64-musl-linux-static.tar.gz
- xwim-${DRONE_TAG}-x86_64-glibc-linux.tar.bz2
- xwim-${DRONE_TAG}-x86_64-glibc-linux.tar.gz
- xwim-${DRONE_TAG}-x86_64-glibc-linux.zip
title: xwim ${DRONE_TAG}
checksum:
- md5
- sha256
depends_on:
- package
trigger:
event:

247
.gitignore vendored
View file

@ -2,11 +2,11 @@
build/
target/
compile_commands.json
.vscode
.ccls-cache
.idea/codeStyles/**
# Created by https://www.toptal.com/developers/gitignore/api/c++,vim,emacs,linux,macos,ninja,windows,jetbrains+all,clion+all,visualstudiocode
# Edit at https://www.toptal.com/developers/gitignore?templates=c++,vim,emacs,linux,macos,ninja,windows,jetbrains+all,clion+all,visualstudiocode
# Created by https://www.gitignore.io/api/vim,c++,emacs,ninja
# Edit at https://www.gitignore.io/?templates=vim,c++,emacs,ninja
### C++ ###
# Prerequisites
@ -42,94 +42,6 @@ compile_commands.json
*.out
*.app
### CLion+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### CLion+all Patch ###
# Ignore everything but code style settings and run configurations
# that are supposed to be shared within teams.
.idea/*
!.idea/codeStyles
!.idea/runConfigurations
### Emacs ###
# -*- mode: gitignore; -*-
*~
@ -181,108 +93,6 @@ flycheck_*.el
/network-security.data
### JetBrains+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
# AWS User-specific
# Generated files
# Sensitive or high-churn files
# Gradle
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
# Mongo Explorer plugin
# File-based project format
# IntelliJ
# mpeltonen/sbt-idea plugin
# JIRA plugin
# Cursive Clojure plugin
# SonarLint plugin
# Crashlytics plugin (for Android Studio and IntelliJ)
# Editor-based Rest Client
# Android studio 3.1+ serialized cache file
### JetBrains+all Patch ###
# Ignore everything but code style settings and run configurations
# that are supposed to be shared within teams.
### Linux ###
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### macOS Patch ###
# iCloud generated files
*.icloud
### Ninja ###
.ninja_deps
.ninja_log
@ -290,7 +100,6 @@ Temporary Items
### Vim ###
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
@ -302,54 +111,14 @@ Sessionx.vim
# Temporary
.netrwhist
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Coc configuration directory
.vim
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# End of https://www.toptal.com/developers/gitignore/api/c++,vim,emacs,linux,macos,ninja,windows,jetbrains+all,clion+all,visualstudiocode
# End of https://www.gitignore.io/api/vim,c++,emacs,ninja

View file

@ -1,12 +0,0 @@
all: compile_commands.json
cd build && ninja
compile_commands.json:
cd build && ninja -t compdb > compile_commands.json
clean:
cd build && ninja -t clean
.PHONY:
compile_commands.json
clean

View file

@ -5,9 +5,7 @@ Do What I Mean Extractor
![https://xkcd.com/1168/](https://imgs.xkcd.com/comics/tar.png)
[xkcd-1168](https://xkcd.com/1168/)
Continuing the emacs tradition of "Do What I Mean" tools, xwim is replacement
Continuing the emacs tradition of "Do What I Mean" tools, xwim is a replacement
for the excellent, but unfortunately unmaintained,
[dtrx](https://github.com/brettcs/dtrx). xwim is a command line tool that
targets two problems with archives:
@ -17,27 +15,6 @@ considerably between formats
- Inconsiderately packaged archives tend to spill their content over the
directory they are extracted to
`dtrx` is a Python script that sets up the command line and calls appropriate
archiving binaries (if installed). In contrast `xwim` is a compiled binary based
directly on archiving libraries, which some may appreciate. It can optionally be
statically linked if you want it entirely self-contained.
# Install
`xwim` currently released for Linux only. There are two flavers: statically
linked and dynamically linked. The releases can be downloaded from
https://git.friedl.net/incubator/xwim/releases and should run on most 64-bit
GNU/Linux distributions.
For the dynamically linked version, the following dependencies have to be
installed:
- [spdlog](https://github.com/gabime/spdlog)
- [fmt](https://github.com/fmtlib/fmt)
- [libarchive](https://github.com/libarchive/libarchive)
Windows support is planned for the first stable release. Packaging for various
distributions is also planned once `xwim` stabilizes. Please reach out if you
can help.
# Usage
Invoking `xwim` is as simple as:
@ -46,25 +23,8 @@ xwim archive.tar.gz
```
This will extract the archive to the current folder. If the archive contains a
single root folder it is just extracted as is. Otherwise xwim creates a folder
named after the archive and extracts the contents there.
```shell
xwim /home/user/
```
This will create an archive in the "platform native" format (zip on windows,
tar.gz on unix) in the current working directory. The archive contains a single
root folder `user` and is itself named `user.zip` or `user.tar.gz`.
```shell
xwim /home/user/file.txt
```
This will create an archive in the "platform native" format (zip on windows,
tar.gz on unix) in the current working directory. The archive contains a single
entry `file.txt` and is itself named `file.zip` or `file.tar.gz`.
single root folder it is just extracted as is. Otherwise xwim first creates a
folder named after the archive and extracts the contents there.
# Examples
@ -98,13 +58,26 @@ xwim will create a folder `archive` in the current directory and extract the
archive contents there.
# Supported formats
Currently `xwim` supports `tar.gz` and `zip` archives. However, this will
rapidly expand to many more formats until a stable release is officially
announced.
xwim supports most formats supported by [libarchive](https://libarchive.org/):
Take a look `Archiver.hpp` if you want to help and have some time for testing.
Most formats can readily be added if they are supported by libarchive. For other
formats you have to add an `Archiver` implementation.
- 7-zip: 7z, 7zip
- zip: jar, zip
- bzip2: bz2, bzip2
- gzip: gz, gzip
- xzip: xz
- rar: rar
- tar with compression: tgz, tar.gz, tar.bz2, tar.xz
# Install
xwim is currently released as a dynamically linked glibc binary only. The
releases can be downloaded from https://git.friedl.net/incubator/xwim/releases
and should run on most glibc based GNU/Linux distributions. The following
dependencies have to be installed:
- [spdlog](https://github.com/gabime/spdlog)
- [fmt](https://github.com/fmtlib/fmt)
- [libarchive](https://github.com/libarchive/libarchive)
Approaching the first stable release we will release for more platforms.
# Build
xwim is built with [meson](https://mesonbuild.com/). To compile xwim from source
@ -153,28 +126,14 @@ Per default xwim chooses an appropriate log level according to your build type
- off
# Contributing
While xwim is still in incubator phase (i.e. before version 1.0) its main
While xwim is still in incubator phase (i.e. before version 1.0) it's main
repository is hosted on https://git.friedl.net/incubator/xwim with a mirror on
https://github.com/arminfriedl/xwim. With the first stable release it will most
likely move to GitHub as its main repository.
likely move to GitHub as it's main repository.
If you want to contribute, you can either issue a pull request on its Github
If you want to contribute, you can either issue a pull request on it's Github
mirror (will be cherry picked into the main repository) or send patches to
dev[at]friedl[dot]net.
If you are interested in a long-term co-maintainership you can also drop me a
mail for an account on https://git.friedl.net.
# Known Issues
- <strong>Parsing filters is unsupported</strong>
There is a somewhat long standing
[bug](https://github.com/libarchive/libarchive/issues/373) in libarchive. rar
files might fail with `Parsing filters is unsupported`. This is because `rar`
is a proprietary format and `libarchive` does not implement the full machinery
necessary to support `rar` completely. `xwim` is all about convenience. If you
want to help with supporting `rar`, please keep in mind that this means we
have we want to take the [official `unrar`
library](https://www.rarlab.com/rar_add.htm) if possible. This is also a
licensing issue as `unrar` is proprietary and its license seemingly not GPL
compatible.

View file

@ -1,12 +1,8 @@
project('xwim', 'cpp',
version: '0.4',
version: '0.2',
default_options: ['cpp_std=c++17',
'warning_level=3',
'b_ndebug=if-release'])
add_global_arguments('-DVERSION='+meson.version(), language: 'cpp')
add_global_arguments('-DSPDLOG_FMT_EXTERNAL', language: 'cpp')
add_global_arguments('-DFMT_HEADER_ONLY', language: 'cpp')
'b_coverage=true'])
subdir('src')
subdir('doc')

View file

@ -1,147 +0,0 @@
#include "Archiver.hpp"
#include "Formats.hpp"
#include <spdlog/spdlog.h>
#include <filesystem>
#include <map>
#include <memory>
#include "util/Common.hpp"
#if defined(unix) || defined(__unix__) || defined(__unix)
std::string default_extension = ".tar.gz";
#elif defined(_win32) || defined(__win32__) || defined(__windows__)
std::string default_extension = ".zip";
#else
std::string default_extension = ".zip";
#endif
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
// Extract longest known extension from path
fs::path archive_extension(const fs::path& path) {
// TODO: creates lots of paths, refactor
fs::path ext;
fs::path tmp_ext;
fs::path tmp_path;
// cater for trailing `/` which is represented
// as empty path element
for (auto p : path) {
if (!p.empty()) {
tmp_path /= p;
}
}
while (tmp_path.has_extension()) {
tmp_ext = tmp_path.extension() += tmp_ext;
Format format = find_extension_format(tmp_ext);
if (format != Format::UNKNOWN) {
// (Combined) extension known. Remember as `ext` and keep
// looking for even longer extensions.
ext = tmp_ext;
} // else: (Combined) extension not known, keep `ext` as-is but try
// longer extensions
tmp_path = tmp_path.stem();
}
return ext;
}
// Strip longest known extension from path
fs::path strip_archive_extension(const fs::path& path) {
// TODO: creates lots of paths, refactor
int longest_ext = 0;
int tmp_longest_ext = 0;
fs::path tmp_ext;
fs::path tmp_path;
fs::path stem_path;
// cater for trailing `/` which is represented
// as empty path element
for(auto p: path) {
if(!p.empty()) {
tmp_path /= p;
}
}
stem_path = tmp_path;
spdlog::debug("Checking {} extensions", tmp_path);
while (tmp_path.has_extension()) {
tmp_ext = tmp_path.extension() += tmp_ext;
spdlog::debug("Looking for {} in known extensions", tmp_ext);
Format format = find_extension_format(tmp_ext);
tmp_longest_ext++;
if (format != Format::UNKNOWN) {
// (Combined) extension known. Remember as `longest_ext` and keep
// looking for even longer extensions.
longest_ext = tmp_longest_ext;
} // else: (Combined) extension not known, keep `longest_ext` as-is but try
// longer extensions
spdlog::debug("Stemming {} to {}", tmp_path, tmp_path.stem());
tmp_path = tmp_path.stem();
}
spdlog::debug("Found {} extensions", longest_ext);
tmp_path = stem_path;
for (int i = 0; i < longest_ext; i++) tmp_path = tmp_path.stem();
spdlog::debug("Stripped path is {} ", tmp_path);
return tmp_path;
}
std::filesystem::path default_archive(const std::filesystem::path& base) {
string base_s = base.string();
string ext_s = default_extension;
return fs::path{fmt::format("{}{}", base_s, ext_s)};
}
bool can_handle_archive(const fs::path& path) {
fs::path ext = archive_extension(path);
if (format_extensions.find(ext.string()) != format_extensions.end()) {
spdlog::debug("Found {} in known formats", ext);
return true;
}
spdlog::debug("Could not find {} in known formats", ext);
return false;
}
Format parse_format(const fs::path& path) {
spdlog::debug("Looking for path {}", path);
fs::path ext = archive_extension(path);
spdlog::debug("Looking for ext {}", ext);
Format format = find_extension_format(ext);
if (format == Format::UNKNOWN) {
throw XwimError{"No known archiver for {}", path};
}
return format;
}
unique_ptr<Archiver> make_archiver(const string& archive_name) {
switch (parse_format(archive_name)) {
case Format::TAR_GZIP: case Format::TAR_BZIP2:
case Format::TAR_COMPRESS: case Format::TAR_LZIP:
case Format::TAR_XZ: case Format::TAR_ZSTD:
case Format::ZIP:
return make_unique<LibArchiver>();
default:
throw XwimError{
"Cannot construct archiver for {}. `extension_format` surjection "
"invariant violated?",
archive_name};
};
}
} // namespace xwim

View file

@ -1,43 +0,0 @@
#pragma once
#include <fmt/core.h>
#include <filesystem>
#include <map>
#include <memory>
#include <set>
#include "util/Common.hpp"
#include "Formats.hpp"
namespace xwim {
class Archiver {
public:
virtual void compress(std::set<std::filesystem::path> ins,
std::filesystem::path archive_out) = 0;
virtual void extract(std::filesystem::path archive_in,
std::filesystem::path out) = 0;
virtual ~Archiver() = default;
};
class LibArchiver : public Archiver {
public:
void compress(std::set<std::filesystem::path> ins,
std::filesystem::path archive_out);
void extract(std::filesystem::path archive_in, std::filesystem::path out);
};
std::filesystem::path archive_extension(const std::filesystem::path& path);
std::filesystem::path strip_archive_extension(const std::filesystem::path& path);
std::filesystem::path default_archive(const std::filesystem::path& base);
Format parse_format(const std::filesystem::path& path);
bool can_handle_archive(const std::filesystem::path& path);
std::unique_ptr<Archiver> make_archiver(const std::string& archive_name);
} // namespace xwim

View file

@ -1,49 +0,0 @@
#pragma once
namespace xwim {
using namespace std;
// Invariant:
// `extensions_format` defines a surjection from `format_extensions`
// to `Formats`
enum class Format {
UNKNOWN,
TAR_BZIP2, TAR_GZIP, TAR_LZIP, TAR_XZ, TAR_COMPRESS, TAR_ZSTD,
ZIP
};
const set<string> format_extensions{
// tar formats see: https://en.wikipedia.org/wiki/Tar_(computing)#Suffixes_for_compressed_files
/* bzip2 */ ".tar.bz2", ".tb2", ".tbz", ".tbz2", ".tz2",
/* gzip */ ".tar.gz", ".taz", ".tgz",
/* lzip */ ".tar.lz",
/* xz */ ".tar.xz", ".txz",
/* compress */ ".tar.Z", ".tZ", ".taZ",
/* zstd */ ".tar.zst", ".tzst",
/* zip */ ".zip"
};
const map<set<string>, Format> extensions_format{
{{".tar.bz2", ".tb2", ".tbz", ".tbz2", ".tz2"}, Format::TAR_BZIP2},
{{".tar.gz", ".taz", ".tgz"}, Format::TAR_GZIP},
{{".tar.lz"}, Format::TAR_LZIP},
{{".tar.xz", ".txz"}, Format::TAR_XZ},
{{".tar.Z", ".tZ", ".taZ"}, Format::TAR_COMPRESS},
{{".tar.zst", ".tzst"}, Format::TAR_ZSTD},
{{".zip"}, Format::ZIP}
};
inline Format find_extension_format(const string& ext) {
for(auto ef: extensions_format) {
auto f = ef.first.find(ext);
if(f != ef.first.end()) {
return ef.second;
}
}
return Format::UNKNOWN;
}
}

View file

@ -1,226 +0,0 @@
#include "UserIntent.hpp"
#include <spdlog/spdlog.h>
#include <algorithm>
#include <filesystem>
#include "Archiver.hpp"
namespace xwim {
unique_ptr<UserIntent> make_compress_intent(const UserOpt &userOpt) {
if (userOpt.paths.size() == 1) {
return make_unique<CompressSingleIntent>(
CompressSingleIntent{*userOpt.paths.begin(), userOpt.out});
}
if (!userOpt.out.has_value()) {
throw XwimError("Cannot guess output for multiple targets");
}
return make_unique<CompressManyIntent>(
CompressManyIntent{userOpt.paths, userOpt.out.value()});
}
unique_ptr<UserIntent> make_extract_intent(const UserOpt &userOpt) {
for (const path &p : userOpt.paths) {
if (!can_handle_archive(p)) {
throw XwimError("Cannot extract path {}", p);
}
}
return make_unique<ExtractIntent>(ExtractIntent{userOpt.paths, userOpt.out});
}
unique_ptr<UserIntent> try_infer_compress_intent(const UserOpt &userOpt) {
if (!userOpt.out.has_value()) {
spdlog::debug("No <out> provided");
if (userOpt.paths.size() != 1) {
spdlog::debug(
"Not a single-path compression. Cannot guess <out> for many-path "
"compression");
return nullptr;
}
spdlog::debug("Only one <path> provided. Assume single-path compression.");
return make_unique<CompressSingleIntent>(
CompressSingleIntent{*userOpt.paths.begin(), userOpt.out});
}
spdlog::debug("<out> provided: {}", userOpt.out.value());
if (can_handle_archive(userOpt.out.value())) {
spdlog::debug("{} given and a known archive format, assume compression",
userOpt.out.value());
return make_compress_intent(userOpt);
}
spdlog::debug(
"Cannot compress multiple paths without a user-provided output archive");
return nullptr;
}
unique_ptr<UserIntent> try_infer_extract_intent(const UserOpt &userOpt) {
bool can_extract_all =
std::all_of(userOpt.paths.begin(), userOpt.paths.end(),
[](const path &path) { return can_handle_archive(path); });
if (!can_extract_all) {
spdlog::debug(
"Cannot extract all provided <paths>. Assume this is not an "
"extraction.");
for (const path &p : userOpt.paths) {
if (!can_handle_archive(p)) {
spdlog::debug("Cannot handle {}", p);
}
}
return nullptr;
}
if (userOpt.out.has_value() && can_handle_archive(userOpt.out.value())) {
spdlog::debug(
"Could extract all provided <paths>. But also {} looks like an "
"archive. Ambiguous intent. Assume this is not an extraction.",
userOpt.out.value());
return nullptr;
}
spdlog::debug(
"Could extract all provided <paths>. But also <out> looks like an "
"archive. Ambiguous intent. Assume this is not an extraction.");
return make_extract_intent(userOpt);
}
unique_ptr<UserIntent> make_intent(const UserOpt &userOpt) {
if (userOpt.wants_compress() && userOpt.wants_extract()) {
throw XwimError("Cannot compress and extract simultaneously");
}
if (userOpt.paths.empty()) {
throw XwimError("No input given...");
}
// explicitly specified intent
if (userOpt.wants_compress()) return make_compress_intent(userOpt);
if (userOpt.wants_extract()) return make_extract_intent(userOpt);
spdlog::info("Intent not explicitly provided, trying to infer intent");
if (auto intent = try_infer_extract_intent(userOpt)) {
spdlog::info("Extraction intent inferred");
return intent;
}
spdlog::info("Cannot infer extraction intent");
if (auto intent = try_infer_compress_intent(userOpt)) {
spdlog::info("Compression intent inferred");
return intent;
}
spdlog::info("Cannot infer compression intent");
throw XwimError("Cannot guess intent");
}
void ExtractIntent::dwim_reparent(const path &out) {
// move extraction if extraction resulted in only one entry and that entries
// name is already the stripped archive name, i.e. reduce unnecessary nesting
auto dit = std::filesystem::directory_iterator(out);
auto dit_path = dit->path();
if (dit == std::filesystem::directory_iterator()) {
spdlog::debug(
"Cannot flatten extraction folder: extraction folder is empty");
return;
}
if (!is_directory(dit_path)) {
spdlog::debug("Cannot flatten extraction folder: {} is not a directory",
dit_path);
return;
}
if (next(dit) != std::filesystem::directory_iterator()) {
spdlog::debug("Cannot flatten extraction folder: multiple items extracted");
return;
}
if (!std::filesystem::equivalent(dit_path.filename(), out.filename())) {
spdlog::debug(
"Cannot flatten extraction folder: archive entry differs from archive "
"name [extraction folder: {}, archive entry: {}]",
out.filename(), dit_path.filename());
return;
}
spdlog::debug("Output folder [{}] is equivalent to archive entry [{}]", out,
dit_path);
spdlog::info("Flattening extraction folder");
int i = rand_int(0, 100000);
path tmp_out = path{out};
tmp_out.concat(fmt::format(".xwim{}", i));
spdlog::debug("Move {} to {}", dit_path, tmp_out);
std::filesystem::rename(dit_path, tmp_out);
spdlog::debug("Remove parent path {}", out);
std::filesystem::remove(out);
spdlog::debug("Moving {} to {}", tmp_out, out);
std::filesystem::rename(tmp_out, out);
}
path ExtractIntent::out_path(const path &p) {
if (!this->out.has_value()) {
// not out path given, create from archive name
path out = std::filesystem::current_path() / strip_archive_extension(p);
create_directories(out);
return out;
}
if (this->archives.size() == 1) {
// out given and only one archive to extract, just extract into `out`
create_directories(this->out.value());
return this->out.value();
}
// out given and multiple archives to extract, create subfolder
// for each archive
create_directories(this->out.value());
path out = this->out.value() / strip_archive_extension(p);
return out;
}
void ExtractIntent::execute() {
for (const path &p : this->archives) {
std::unique_ptr<Archiver> archiver = make_archiver(p);
path out = this->out_path(p);
archiver->extract(p, out);
this->dwim_reparent(out);
}
}
path CompressSingleIntent::out_path() {
if (this->out.has_value()) {
if (!can_handle_archive(this->out.value())) {
throw XwimError("Unknown archive format {}", this->out.value());
}
return this->out.value();
}
return default_archive(strip_archive_extension(this->in).stem());
}
void CompressSingleIntent::execute() {
path out = this->out_path();
unique_ptr<Archiver> archiver = make_archiver(out);
set<path> ins{this->in};
archiver->compress(ins, out);
};
void CompressManyIntent::execute() {
if (!can_handle_archive(this->out)) {
throw XwimError("Unknown archive format {}", this->out);
}
unique_ptr<Archiver> archiver = make_archiver(this->out);
archiver->compress(this->in_paths, this->out);
}
} // namespace xwim

View file

@ -1,93 +0,0 @@
#pragma once
#include <optional>
#include <set>
#include "util/Common.hpp"
#include "UserOpt.hpp"
namespace xwim {
using namespace std;
using std::filesystem::path;
class UserIntent {
public:
virtual void execute() = 0;
virtual ~UserIntent() = default;
};
/* Factory method to construct a UserIntent which implements `execute()` */
unique_ptr<UserIntent> make_intent(const UserOpt& userOpt);
/**
* Extraction intent
*
* Extracts one or multiple archives. Optionally extracts them to given `out` folder. Otherwise extracts them to the
* current working directory.
*/
class ExtractIntent: public UserIntent {
private:
set<path> archives;
optional<path> out;
void dwim_reparent(const path& out);
path out_path(const path& p);
public:
ExtractIntent(set<path> archives, optional<path> out): archives(archives), out(out) {};
~ExtractIntent() override = default;
void execute() override;
};
/**
* Compress intent for a single file or folder.
*
* Compresses a single path which may be a file or a folder.
*
* No `out` path given:
* - derives the archive name from the input path
* - uses the default archive format for the platform
*
* `out` path given:
* - `out` path must be a path with a valid archive name (including extension)
* - tries to compress the input to the out archive
* - if the `out` base name is different from the input base name, puts the input into a new folder
* with base name inside the archive (archive base name is always the name of the archive content)
*/
class CompressSingleIntent : public UserIntent {
private:
path in;
optional<path> out;
path out_path();
public:
CompressSingleIntent(path in, optional<path> out) : UserIntent(), in(in), out(out) {};
~CompressSingleIntent() override = default;
void execute() override;
};
/**
* Compress intent for multiple files and/or folders.
*
* Compresses multiple files and/or folders to a single archive as given by the `out` path. Since `out` cannot be
* guessed from the input in this case it is mandatory.
*
* A new, single root folder with base name equal to base name of the `out` archive is created inside the archive. All
* input files are put into this root folder.
*/
class CompressManyIntent: public UserIntent {
private:
set<path> in_paths;
path out;
public:
CompressManyIntent(set<path> in_paths, path out): UserIntent(), in_paths(in_paths), out(out) {};
~CompressManyIntent() override = default;
void execute() override;
};
} // namespace xwim

View file

@ -1,52 +0,0 @@
#include "UserOpt.hpp"
#include <tclap/CmdLine.h>
template <>
struct TCLAP::ArgTraits<std::filesystem::path> {
// We use `operator=` here for path construction
// because `operator>>` (`ValueLike`) causes a split at
// whitespace
typedef StringLike ValueCategory;
};
namespace xwim {
UserOpt::UserOpt(int argc, char** argv) {
// clang-format off
TCLAP::CmdLine cmd
{"xwim - Do What I Mean Extractor", ' ', "0.3.0"};
TCLAP::SwitchArg arg_compress
{"c", "compress", "Compress <files>", cmd, false};
TCLAP::SwitchArg arg_extract
{"x", "extract", "Extract <file>", cmd, false};
TCLAP::SwitchArg arg_noninteractive
{"i", "non-interactive", "Non-interactive, fail on ambiguity", cmd, false};
TCLAP::ValueArg<fs::path> arg_outfile
{"o", "out", "Out <file-or-path>", false, fs::path{}, "A path on the filesystem", cmd};
TCLAP::MultiSwitchArg arg_verbose
{"v", "verbose", "Verbosity level", cmd, 0};
TCLAP::UnlabeledMultiArg<fs::path> arg_paths
{"files", "Archive(s) to extract or file(s) to compress", true, "A path on the filesystem", cmd};
// clang-format on
cmd.parse(argc, argv);
if (arg_compress.isSet()) this->compress = arg_compress.getValue();
if (arg_extract.isSet()) this->extract = arg_extract.getValue();
if (arg_outfile.isSet()) this->out = arg_outfile.getValue();
this->verbosity = arg_verbose.getValue();
this->interactive = !arg_noninteractive.getValue();
if (arg_paths.isSet()) {
this->paths =
set<fs::path>{arg_paths.getValue().begin(), arg_paths.getValue().end()};
}
}
} // namespace xwim

View file

@ -1,31 +0,0 @@
#pragma once
#include <optional>
#include <set>
#include "util/Common.hpp"
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
struct UserOpt {
optional<bool> compress;
optional<bool> extract;
bool interactive;
int verbosity;
std::optional<fs::path> out;
std::set<fs::path> paths;
UserOpt(int argc, char** argv);
bool wants_compress() const {
return this->compress.has_value() && this->compress.value();
}
bool wants_extract() const {
return this->extract.has_value() && this->extract.value();
}
};
} // namespace xwim

121
src/archive.cpp Normal file
View file

@ -0,0 +1,121 @@
#include <spdlog/spdlog.h>
#include <sys/stat.h>
namespace logger = spdlog;
#include <archive.h>
#include <archive_entry.h>
#include <algorithm>
#include <filesystem>
#include <iostream>
#include <stdexcept>
#include "archive_sys.hpp"
#include "archive.hpp"
#include "spec.hpp"
#include "fileformats.hpp"
namespace xwim {
static void _spec_is_root_filename(ArchiveSpec* spec,
ArchiveEntryView entry,
std::filesystem::path* filepath) {
auto entry_path = entry.path();
auto norm_stem = filepath->filename();
norm_stem = xwim::stem(norm_stem);
if (*entry_path.begin() != norm_stem) {
logger::debug("Archive root does not match archive name");
spec->is_root_filename = false;
} else {
logger::debug("Archive root matches archive name");
spec->is_root_filename = true;
}
logger::debug("\t-> Archive root: {}", entry_path.begin()->string());
logger::debug("\t-> Archive stem: {}", norm_stem.string());
}
static void _spec_is_root_dir(ArchiveSpec* spec, ArchiveEntryView entry) {
if (entry.is_directory()) {
logger::debug("Archive root is directory");
spec->is_root_dir = true;
} else {
logger::debug("Archive root is not a directory");
spec->is_root_dir = false;
}
logger::debug("\t-> Archive mode_t: {0:o}", entry.file_type());
}
static void _spec_has_single_root(ArchiveSpec* spec,
ArchiveEntryView first_entry,
ArchiveReaderSys& archive_reader) {
std::filesystem::path first_entry_root = *(first_entry.path().begin());
logger::trace("Testing roots");
spec->has_single_root = true;
while (archive_reader.advance()) {
ArchiveEntryView entry = archive_reader.cur();
auto next_entry = entry.path();
logger::trace("Path: {}, Root: {}", next_entry.string(),
next_entry.begin()->string());
if (first_entry_root != *next_entry.begin()) {
logger::debug("Archive has multiple roots");
logger::debug("\t-> Archive root I: {}",
first_entry_root.begin()->string());
logger::debug("\t-> Archive root II: {}", next_entry.begin()->string());
spec->has_single_root = false;
break;
}
}
if (spec->has_single_root)
logger::debug("Archive has single root: {}", first_entry_root.string());
}
Archive::Archive(std::filesystem::path path) : path{path} {}
ArchiveSpec Archive::check() {
logger::trace("Creating archive spec for {}", this->path.string());
ArchiveReaderSys archive_reader {this->path};
ArchiveSpec archive_spec;
if (!archive_reader.advance()) { // can't advance even once, archive is empty
logger::debug("Archive is empty");
return {false, false, false};
}
ArchiveEntryView first_entry = archive_reader.cur();
logger::trace("Found archive entry {}", first_entry.path_name());
_spec_is_root_filename(&archive_spec, first_entry, &this->path);
_spec_is_root_dir(&archive_spec, first_entry);
_spec_has_single_root(&archive_spec, first_entry, archive_reader);
return archive_spec;
}
void Archive::extract(ExtractSpec extract_spec) {
std::filesystem::path abs_path = std::filesystem::absolute(this->path);
std::unique_ptr<ArchiveExtractorSys> extractor;
if(extract_spec.make_dir) {
logger::trace("Creating extract directory {}", extract_spec.dirname.string());
extractor = std::unique_ptr<ArchiveExtractorSys>(new ArchiveExtractorSys{extract_spec.dirname});
} else {
extractor = std::unique_ptr<ArchiveExtractorSys>(new ArchiveExtractorSys{});
}
ArchiveReaderSys reader{abs_path};
extractor->extract_all(reader);
}
} // namespace xwim

50
src/archive.hpp Normal file
View file

@ -0,0 +1,50 @@
#pragma once
#include <archive.h>
#include <fmt/format.h>
#include <filesystem>
#include <stdexcept>
#include <string>
#include <string_view>
#include "spec.hpp"
namespace xwim {
/** Class for interacting with archives */
class Archive {
private:
std::filesystem::path path;
public:
explicit Archive(std::filesystem::path path);
/** Generate an ArchiveSpec by analysing the archive at `path`
*
* @returns ArchiveSpec for the archive
*/
ArchiveSpec check();
/** Extract the archive at `path` according to given ExtractSpec */
void extract(ExtractSpec extract_spec);
};
class ArchiveException : public std::exception {
private:
std::string _what;
public:
ArchiveException(std::string what, archive* archive) {
if (archive_error_string(archive)) {
_what = fmt::format("{}: {}", what, archive_error_string(archive));
} else {
_what = fmt::format("{}", what);
}
}
virtual const char* what() const noexcept
{ return this->_what.c_str(); }
};
} // namespace xwim

142
src/archive_sys.cpp Normal file
View file

@ -0,0 +1,142 @@
#include <archive_entry.h>
#include <spdlog/spdlog.h>
namespace logger = spdlog;
#include "archive_sys.hpp"
#include <archive.h>
#include <filesystem>
#include <memory>
bool xwim::ArchiveEntryView::is_empty() {
return (this->ae == nullptr);
}
std::string xwim::ArchiveEntryView::path_name() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return archive_entry_pathname(this->ae);
}
std::filesystem::path xwim::ArchiveEntryView::path() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return std::filesystem::path{this->path_name()};
}
mode_t xwim::ArchiveEntryView::file_type() {
if (!this->ae) throw ArchiveSysException{"Access to invalid archive entry"};
return archive_entry_filetype(this->ae);
}
bool xwim::ArchiveEntryView::is_directory() {
return S_ISDIR(this->file_type());
}
xwim::ArchiveReaderSys::ArchiveReaderSys(std::filesystem::path& path) {
int r; // libarchive error handling
logger::trace("Setting up archive reader");
this->ar = archive_read_new();
archive_read_support_filter_all(this->ar);
archive_read_support_format_all(this->ar);
logger::trace("Reading archive at {}", path.c_str());
r = archive_read_open_filename(this->ar, path.c_str(), 10240);
if (r != ARCHIVE_OK)
throw ArchiveSysException{"Could not open archive file", this->ar};
logger::trace("Archive read succesfully");
}
xwim::ArchiveReaderSys::~ArchiveReaderSys() {
logger::trace("Destructing ArchiveReaderSys");
if (this->ar) archive_read_free(this->ar);
}
bool xwim::ArchiveReaderSys::advance() {
int r; // libarchive error handling
logger::trace("Advancing reader to next archive entry");
r = archive_read_next_header(this->ar, &this->ae);
if (r == ARCHIVE_EOF) { this->ae = nullptr; return false; }
if (r != ARCHIVE_OK) throw(ArchiveSysException{"Could not list archive", this->ar});
logger::trace("Got entry {}", archive_entry_pathname(ae));
return true;
}
const xwim::ArchiveEntryView xwim::ArchiveReaderSys::cur() {
return ArchiveEntryView{this->ae};
}
xwim::ArchiveExtractorSys::ArchiveExtractorSys(std::filesystem::path& root) {
logger::trace("Constructing ArchiveExtractorSys with path {}", root.string());
std::filesystem::create_directories(root);
std::filesystem::current_path(root);
this->writer = archive_write_disk_new();
archive_write_disk_set_standard_lookup(this->writer);
logger::trace("Constructed ArchiveExtractorSys at {:p}", (void*) this->writer);
}
xwim::ArchiveExtractorSys::ArchiveExtractorSys() {
logger::trace("Construction ArchiveExtractorSys without root");
this->writer = archive_write_disk_new();
archive_write_disk_set_standard_lookup(this->writer);
logger::trace("Constructed ArchiveExtractorSys at {:p}", (void*) this->writer);
}
void xwim::ArchiveExtractorSys::extract_all(xwim::ArchiveReaderSys& reader) {
while(reader.advance()) {
this->extract_entry(reader);
}
}
// forward declared
static int copy_data(struct archive* ar, struct archive* aw);
void xwim::ArchiveExtractorSys::extract_entry(xwim::ArchiveReaderSys& reader) {
int r;
r = archive_write_header(this->writer, reader.ae);
if (r != ARCHIVE_OK) {
throw(ArchiveSysException("Could not extract entry", reader.ar));
}
r = copy_data(reader.ar, this->writer);
if (r != ARCHIVE_OK) {
throw(ArchiveSysException("Could not extract entry", reader.ar));
}
}
xwim::ArchiveExtractorSys::~ArchiveExtractorSys(){
logger::trace("Destructing ArchiveExtractorSys at {:p}", (void*) this->writer);
if(this->writer) {
archive_write_close(this->writer);
archive_write_free(this->writer);
}
}
static int copy_data(struct archive* ar, struct archive* aw) {
int r;
const void* buff;
size_t size;
int64_t offset;
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
}
if (r != ARCHIVE_OK) {
return (r);
}
r = archive_write_data_block(aw, buff, size, offset);
if (r != ARCHIVE_OK) {
return (r);
}
}
}

101
src/archive_sys.hpp Normal file
View file

@ -0,0 +1,101 @@
#pragma once
#include <archive.h>
#include <filesystem>
#include <memory>
#include <fmt/format.h>
namespace xwim {
/** A view into an archive entry
*
* The view is non-owning and the caller must guarantee
* that the parent archive entry is valid when the view
* is accessed.
*/
class ArchiveEntryView {
private:
archive_entry* ae;
public:
ArchiveEntryView() = default;
ArchiveEntryView(archive_entry* entry) : ae{entry} {}
bool is_empty();
std::string path_name();
std::filesystem::path path();
mode_t file_type();
bool is_directory();
};
/** A reader for archive files
*
* Shim for `libarchive`. Iterates through
* entries of an archive with `next()`
*/
class ArchiveReaderSys {
private:
archive* ar;
archive_entry* ae;
friend class ArchiveExtractorSys;
public:
ArchiveReaderSys(std::filesystem::path& path);
~ArchiveReaderSys();
/** Advances the internal entry pointer
*
* @return true if the pointer advanced to the next entry
* false if the end of the archive was reached
*/
bool advance();
/** Returns a non-owning view of the current entry
*
* ArchiveEntryView is a non-owning view of the currently
* active entry in this reader. A retrieved archive entry
* may not be used after another call to advance in the
* same reader.
*
* @return a view to the archive entry this reader currently
* points to
*/
const ArchiveEntryView cur();
};
/** A extractor for archive files
*
* Shim for `libarchive`.
*/
class ArchiveExtractorSys {
private:
archive* writer;
public:
ArchiveExtractorSys(std::filesystem::path& root);
ArchiveExtractorSys();
~ArchiveExtractorSys();
void extract_all(ArchiveReaderSys& reader);
void extract_entry(ArchiveReaderSys& reader);
};
class ArchiveSysException : public std::exception {
private:
std::string _what;
public:
ArchiveSysException(std::string what, archive* archive) {
if (archive_error_string(archive)) {
_what = fmt::format("{}: {}", what, archive_error_string(archive));
} else {
_what = fmt::format("{}", what);
}
}
ArchiveSysException(std::string what) { _what = fmt::format("{}", what); }
virtual const char* what() const noexcept { return this->_what.c_str(); }
};
} // namespace xwim

View file

@ -1,167 +0,0 @@
#include <archive.h>
#include <archive_entry.h>
#include <fcntl.h>
#include <fmt/core.h>
#include <spdlog/spdlog.h>
#include <sys/stat.h>
#include <filesystem>
#include <iostream>
#include <memory>
#include "../Archiver.hpp"
#include "../util/Common.hpp"
namespace xwim {
using namespace std;
namespace fs = std::filesystem;
static int copy_data(shared_ptr<archive> reader, shared_ptr<archive> writer);
void LibArchiver::compress(set<fs::path> ins, fs::path archive_out) {
spdlog::debug("Compressing to {}", archive_out);
int r; // libarchive error handling
static char buff[16384]; // read buffer
// cannot use unique_ptr here since unique_ptr requires a
// complete type. `archive` is forward declared only.
shared_ptr<archive> writer;
writer = shared_ptr<archive>(archive_write_new(), archive_write_free);
// archive_write_add_filter_gzip(writer.get());
// archive_write_set_format_pax_restricted(writer.get());
archive_write_set_format_filter_by_ext(writer.get(), archive_out.c_str());
archive_write_open_filename(writer.get(), archive_out.c_str());
shared_ptr<archive> reader;
shared_ptr<archive_entry> entry = shared_ptr<archive_entry>(archive_entry_new(), archive_entry_free);
for (auto in : ins) {
spdlog::debug("Compressing {}", in);
reader = shared_ptr<archive>(archive_read_disk_new(), archive_read_free);
archive_read_disk_set_standard_lookup(reader.get());
r = archive_read_disk_open(reader.get(), in.c_str());
if (r != ARCHIVE_OK) {
throw XwimError{"Failed opening {}. {}", in,
archive_error_string(reader.get())};
}
for (;;) {
r = archive_read_next_header2(reader.get(), entry.get());
if (r == ARCHIVE_EOF) break;
if (r != ARCHIVE_OK) {
throw XwimError{"Failed compressing archive entry. {}",
archive_error_string(reader.get())};
}
spdlog::debug("Adding {} to archive", archive_entry_pathname(entry.get()));
r = archive_write_header(writer.get(), entry.get());
if (r != ARCHIVE_OK) {
throw XwimError{"Failed writing archive entry. {}",
archive_error_string(writer.get())};
}
/* For now, we use a simpler loop to copy data
* into the target archive. */
int fd = open(archive_entry_sourcepath(entry.get()), O_RDONLY);
ssize_t len = read(fd, buff, sizeof(buff));
while (len > 0) {
archive_write_data(writer.get(), buff, len);
len = read(fd, buff, sizeof(buff));
}
close(fd);
archive_entry_clear(entry.get());
archive_read_disk_descend(reader.get());
}
}
}
void LibArchiver::extract(fs::path archive_in, fs::path out) {
spdlog::debug("Extracting archive {} to {}", archive_in, out);
int r; // libarchive error handling
// cannot use unique_ptr here since unique_ptr requires a
// complete type. `archive` is forward declared only.
shared_ptr<archive> reader;
reader = shared_ptr<archive>(archive_read_new(), archive_read_free);
archive_read_support_filter_all(reader.get());
archive_read_support_format_all(reader.get());
r = archive_read_open_filename(reader.get(), archive_in.c_str(), 10240);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed opening archive {}. {}", archive_in,
archive_error_string(reader.get())};
}
shared_ptr<archive> writer;
writer = shared_ptr<archive>(archive_write_disk_new(), archive_write_free);
archive_write_disk_set_standard_lookup(writer.get());
fs::create_directories(out);
fs::path cur_path = fs::current_path();
fs::current_path(out);
archive_entry *entry;
for (;;) {
r = archive_read_next_header(reader.get(), &entry);
if (r == ARCHIVE_EOF) break;
if (r != ARCHIVE_OK) {
throw XwimError{"Failed extracting archive entry. {}",
archive_error_string(reader.get())};
}
r = archive_write_header(writer.get(), entry);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed writing archive entry header. {}",
archive_error_string(writer.get())};
}
if (archive_entry_size(entry) > 0) {
r = copy_data(reader, writer);
if (r != ARCHIVE_OK) {
throw XwimError{"Failed writing archive entry data. {}",
archive_error_string(writer.get())};
}
}
r = archive_write_finish_entry(writer.get());
if (r != ARCHIVE_OK) {
throw XwimError{"Failed finishing archive entry data. {}",
archive_error_string(writer.get())};
}
}
if (r != ARCHIVE_OK && r != ARCHIVE_EOF) {
throw XwimError{"Failed extracting archive {}. {}", archive_in,
archive_error_string(reader.get())};
}
fs::current_path(cur_path);
}
static int copy_data(shared_ptr<archive> reader, shared_ptr<archive> writer) {
int r;
const void *buff;
size_t size;
int64_t offset;
for (;;) {
r = archive_read_data_block(reader.get(), &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
}
if (r != ARCHIVE_OK) {
return (r);
}
r = archive_write_data_block(writer.get(), buff, size, offset);
if (r != ARCHIVE_OK) {
return (r);
}
}
}
} // namespace xwim

46
src/fileformats.hpp Normal file
View file

@ -0,0 +1,46 @@
/** @file fileformats.hpp
* @brief Handle archive extensions
*/
#pragma once
#include <spdlog/spdlog.h>
namespace logger = spdlog;
#include <filesystem>
#include <set>
#include <string>
namespace xwim {
/** Common archive formats understood by xwim
*
* The underlying libarchive backend retrieves format information by a process
* called `bidding`. Hence, this information is mainly used to strip extensions.
*
* Stripping extensions via `std::filesystem::path` does not work reliably since
* it gets easily confused by dots in the regular file name.
*/
const std::set<std::string> fileformats{".7z", ".7zip", ".jar", ".tgz",
".bz2", ".bzip2", ".gz", ".gzip",
".rar", ".tar", "xz", ".zip"};
/** Strip archive extensions from a path
*
* @returns Base filename without archive extensions
*/
inline std::filesystem::path stem(const std::filesystem::path& path) {
std::filesystem::path p_stem{path};
logger::trace("Stemming {}", p_stem.string());
p_stem = p_stem.filename();
while (fileformats.find(p_stem.extension().string()) != fileformats.end()) {
p_stem = p_stem.stem();
logger::trace("Stemmed to {}", p_stem.string());
}
logger::trace("Finished stemming {}", p_stem.string());
return p_stem;
}
} // namespace xwim

View file

@ -1,26 +1,45 @@
#include <spdlog/common.h>
#include <spdlog/spdlog.h>
namespace logger = spdlog;
#include <cstdlib>
#include <filesystem>
#include <iostream>
#include <ostream>
#include <string>
#include <list>
#include "UserIntent.hpp"
#include "UserOpt.hpp"
#include "util/Common.hpp"
#include "util/Log.hpp"
using namespace xwim;
using namespace std;
#include "util/log.hpp"
#include "archive.hpp"
#include "spec.hpp"
#include "fileformats.hpp"
int main(int argc, char** argv) {
log::init();
UserOpt user_opt = UserOpt{argc, argv};
log::init(user_opt.verbosity);
xwim::log::init();
try {
unique_ptr<UserIntent> user_intent = make_intent(user_opt);
user_intent->execute();
} catch (XwimError& e) {
spdlog::error(e.what());
std::filesystem::path filepath{argv[1]};
xwim::Archive archive{filepath};
xwim::ArchiveSpec archive_spec = archive.check();
logger::info("{}", archive_spec);
xwim::ExtractSpec extract_spec{};
if (!archive_spec.has_single_root || !archive_spec.is_root_filename) {
extract_spec.make_dir = true;
std::filesystem::path stem = xwim::stem(filepath);
extract_spec.dirname = stem;
}
if (archive_spec.has_subarchive) {
extract_spec.extract_subarchive = true;
}
logger::info("{}", extract_spec);
archive.extract(extract_spec);
} catch (xwim::ArchiveException& ae) {
logger::error("{}", ae.what());
}
}
}

View file

@ -1,12 +1,9 @@
xwim_src = ['main.cpp', 'Archiver.cpp', 'UserOpt.cpp', 'UserIntent.cpp']
xwim_src = ['main.cpp',
'archive.cpp',
'archive_sys.cpp']
xwim_archiver = ['archiver/LibArchiver.cpp']
xwim_libs = [dependency('libarchive', required: true),
dependency('fmt', required: true),
dependency('spdlog', required: true)]
is_static = get_option('default_library')=='static'
xwim_libs = [dependency('libarchive', required: true, static: is_static),
dependency('spdlog', required: true, static: is_static),
dependency('fmt', required: true, static: is_static),
dependency('tclap', required: true, static: is_static)]
executable('xwim', xwim_src+xwim_archiver, dependencies: xwim_libs)
executable('xwim', xwim_src, dependencies: xwim_libs)

80
src/spec.hpp Normal file
View file

@ -0,0 +1,80 @@
#pragma once
#include <archive.h>
#include <fmt/format.h>
#include <filesystem>
#include <memory>
namespace xwim {
/** Properties of an archive
*
* These properties can be retrieved by analyzing the
* archive. There is no outside-knowledge. All information
* is in the archive.
*/
struct ArchiveSpec {
bool has_single_root = false; /** There is only a single file xor a single
folder at the archive's root */
bool is_root_filename = false; /** the name of the (single) root is the same
as the stemmed archive file name. Cannot be
true if `has_single_root` is false */
bool is_root_dir = false; /** The (single) root is a folder. Cnnot be true if
`has_single_root` is false */
bool has_subarchive = false; /** Whether the archive contains sub-archives */
};
/** Properties influencing the extraction process
*
* These properties can be set to influence the extraction
* process accordingly.
*/
struct ExtractSpec {
bool make_dir = false; /** Create a new directory for extraction at `dirname` */
std::filesystem::path dirname{}; /** The path to a directory for extraction */
bool extract_subarchive = false; /** Recursively extract sub-archives */
};
} // namespace xwim
#if FMT_VERSION < 50300
typedef fmt::basic_parse_context<char> format_parse_context;
#endif
template <>
struct fmt::formatter<xwim::ArchiveSpec> {
constexpr auto parse(format_parse_context & ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const xwim::ArchiveSpec& spec, FormatContext& ctx) {
return format_to(ctx.out(),
"Archive["
" .has_single_root={},"
" .is_root_filename={}"
" .is_root_dir={}"
" .has_subarchive={}"
" ]",
spec.has_single_root, spec.is_root_filename,
spec.is_root_dir, spec.has_subarchive);
}
};
template <>
struct fmt::formatter<xwim::ExtractSpec> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const xwim::ExtractSpec& spec, FormatContext& ctx) {
return format_to(ctx.out(),
"Extract["
" .make_dir={},"
" .dirname={}"
" .extract_subarchive={}"
" ]",
spec.make_dir, spec.dirname.string(),
spec.extract_subarchive);
}
};

View file

@ -1,30 +0,0 @@
#pragma once
#include <fmt/core.h>
#include <filesystem>
#include <string>
#include <random>
template <>
struct fmt::formatter<std::filesystem::path> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const std::filesystem::path& path, FormatContext& ctx) {
return format_to(ctx.out(), path.string());
}
};
class XwimError : public std::runtime_error {
public:
template <typename... Args>
XwimError(const std::string& fmt, const Args... args)
: std::runtime_error(fmt::format(fmt, args...)){}
};
inline int rand_int(int from, int to) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> distrib(from, to);
return distrib(gen);
}

View file

@ -1,15 +1,17 @@
#pragma once
#include <spdlog/common.h>
#include <spdlog/spdlog.h>
#include <cstdlib>
#ifdef NDEBUG
#define XWIM_LOGLEVEL SPDLOG_LEVEL_ERROR
#else
#define XWIM_LOGLEVEL SPDLOG_LEVEL_DEBUG
#endif
namespace xwim::log {
namespace xwim {
namespace log {
/**
* Get log level from XWIM_LOGLEVEL environment variable.
@ -32,7 +34,7 @@ spdlog::level::level_enum _init_from_env() {
}
return lvl;
}
};
/**
* Get log level from compile time definition.
@ -58,27 +60,7 @@ spdlog::level::level_enum _init_from_compile() {
* The determined level is then set for the default logger via
* `spdlog::set_level`.
*/
void init(int verbosity = -1,
spdlog::level::level_enum level = spdlog::level::level_enum::off) {
if (verbosity != -1) {
switch (verbosity) {
case 0:
spdlog::set_level(spdlog::level::off);
break;
case 1:
spdlog::set_level(spdlog::level::info);
break;
case 2:
spdlog::set_level(spdlog::level::debug);
break;
case 3:
default:
spdlog::set_level(spdlog::level::trace);
break;
}
return;
}
void init(spdlog::level::level_enum level = spdlog::level::level_enum::off) {
if (spdlog::level::level_enum::off != level) {
spdlog::set_level(level);
return;
@ -90,7 +72,8 @@ void init(int verbosity = -1,
return;
}
spdlog::set_level(_init_from_compile());
return spdlog::set_level(_init_from_compile());
}
} // namespace xwim::log
} // namespace log
} // namespace xwim

11
test/archive_test.cpp Normal file
View file

@ -0,0 +1,11 @@
#include <gtest/gtest.h>
#include <archive.hpp>
#include <spec.hpp>
TEST(ArchiveTest, ArchiveSpecDetectsSingleRoot) {
xwim::Archive archive("test/archives/root.tar.gz");
xwim::ArchiveSpec spec = archive.check();
ASSERT_TRUE(spec.has_single_root);
}

34
test/fileformats_test.cpp Normal file
View file

@ -0,0 +1,34 @@
#include <gtest/gtest.h>
#include <fileformats.hpp>
#include <string>
TEST(FileformatsTest, StemStripsSingleKnownExtension) {
std::filesystem::path archive_path {"/some/path/to/file.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file"});
}
TEST(FileformatsTest, StemStripsMultipleKnownExtensions) {
std::filesystem::path archive_path{"/some/path/to/file.tar.rar.gz.7z.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file"});
}
TEST(FileformatsTest, StemStripsOnlyKnownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.ukn.rar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file.ukn"});
}
TEST(FileformatsTest, StemStripsNothingWithoutKnownExtension) {
std::filesystem::path archive_path{"/some/path/to/file.ukn"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"file.ukn"});
}
TEST(FileformatsTest, StemStripsNothingWithoutExtension) {
std::filesystem::path archive_path{"/some/path/to/filerar"};
ASSERT_EQ(xwim::stem(archive_path), std::filesystem::path{"filerar"});
}

View file

@ -2,10 +2,22 @@
gtest_proj = subproject('gtest')
gtest_dep = gtest_proj.get_variable('gtest_main_dep')
# subdir('archives')
user_opt_test_exe = executable('user_opt_test_exe',
sources: ['user_opt_test.cpp', '../src/UserOpt.cpp'],
include_directories: ['../src'],
dependencies: [gtest_dep])
xwim_src = ['../src/archive.cpp',
'../src/archive_sys.cpp']
test('user opt parsing test', user_opt_test_exe)
subdir('archives')
archive_test_exe = executable('archive_test_exe',
sources: ['archive_test.cpp', xwim_src],
include_directories: ['../src'],
dependencies: [gtest_dep, xwim_libs])
test('archive test', archive_test_exe)
fileformats_test_exe = executable('fileformats_test_exe',
sources: ['fileformats_test.cpp', xwim_src],
include_directories: ['../src'],
dependencies: [gtest_dep, xwim_libs])
test('fileformats test', fileformats_test_exe)

View file

@ -1,90 +0,0 @@
#include <gtest/gtest-death-test.h>
#include "gtest/gtest.h"
#include <filesystem>
#include <string>
#include "UserOpt.hpp"
TEST(UserOpt, compress) {
using namespace xwim;
// clang-format off
char* args[] = {
const_cast<char*>("xwim"),
const_cast<char*>("-c"),
const_cast<char*>("mandator_paths"),
nullptr};
// clang-format on
UserOpt uo = UserOpt{3, args};
ASSERT_TRUE(uo.compress);
ASSERT_FALSE(uo.extract);
}
TEST(UserOpt, exclusive_actions) {
using namespace xwim;
// clang-format off
char* args[] = {
const_cast<char*>("xwim"),
const_cast<char*>("-c"),
const_cast<char*>("-x"),
const_cast<char*>("mandatory_paths"),
nullptr};
// clang-format on
UserOpt uo = UserOpt{4, args};
ASSERT_TRUE(uo.compress);
ASSERT_TRUE(uo.extract);
}
TEST(UserOpt, whitespace_in_path) {
using namespace xwim;
// clang-format off
char* args[] = {
const_cast<char*>("xwim"),
const_cast<char*>("-c"),
const_cast<char*>("/foo/bar baz/a file"),
nullptr};
// clang-format on
UserOpt uo = UserOpt{3, args};
ASSERT_TRUE(uo.paths.find(std::filesystem::path("/foo/bar baz/a file")) !=
uo.paths.end());
}
TEST(UserOpt, mixed_output_and_paths) {
using namespace xwim;
// clang-format off
char* args[] = {
const_cast<char*>("xwim"),
const_cast<char*>("-o"),
const_cast<char*>("/foo/bar baz/output"),
const_cast<char*>("/foo/bar baz/a path"),
const_cast<char*>("/foo/bar baz/another path"),
nullptr};
// clang-format on
UserOpt uo = UserOpt{5, args};
ASSERT_TRUE(uo.paths.find(std::filesystem::path("/foo/bar baz/a path")) !=
uo.paths.end());
ASSERT_TRUE(uo.paths.find(std::filesystem::path("/foo/bar baz/another path")) !=
uo.paths.end());
ASSERT_TRUE(uo.out == std::filesystem::path("/foo/bar baz/output"));
}
TEST(UserOpt, output_defaults_to_nullopt) {
using namespace xwim;
// clang-format off
char* args[] = {
const_cast<char*>("xwim"),
const_cast<char*>("/foo/bar"),
nullptr};
// clang-format on
UserOpt uo = UserOpt{2, args};
ASSERT_FALSE(uo.out);
}

Binary file not shown.

View file

@ -1 +0,0 @@
äahääm

View file

@ -1 +0,0 @@
äahääm

Binary file not shown.

View file

@ -1,17 +0,0 @@
#!/bin/python3
import os;
import sys;
import subprocess;
from fnmatch import fnmatch;
for root, dirs, files in os.walk('../../'):
for f in files:
if len(sys.argv) > 1 and not fnmatch(f, sys.argv[1]):
continue;
print(f"Running {f}")
print(f"{os.path.join(root,f)}")
r = subprocess.run(["../../../target/src/xwim", os.path.join(root, f)], capture_output=True, encoding='utf-8')
print(f"{r.stdout}")
print(f"{r.stderr}", file=sys.stderr)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.