diff --git a/.dir-locals.el b/.dir-locals.el index 628f512ab..4616d4003 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -1,47 +1,64 @@ ((python-mode . ((indent-tabs-mode . nil))) - (autoconf-mode . ((indent-tabs-mode . t))) - (c++-mode . ((c-file-style . "k&r") - (indent-tabs-mode . nil) - (c-basic-offset . 4) - (c-file-offsets . - ((block-close . 0) - (brace-list-close . 0) - (brace-list-entry . 0) - (brace-list-intro . +) - (case-label . 0) - (class-close . 0) - (defun-block-intro . +) - (defun-close . 0) - (defun-open . 0) - (else-clause . 0) - (inclass . +) - (label . 0) - (statement . 0) - (statement-block-intro . +) - (statement-case-intro . +) - (statement-cont . +) - (substatement . +) - (topmost-intro . 0))))) - (c-mode . ((c-file-style . "stroustrup") - (indent-tabs-mode . t) - (tab-width . 8) + (c++-mode . ((c-file-style . "linux-kernel") + (c-basic-offset . 8) + (c-label-minimum-indentation . 0) + (c-offsets-alist . ( + (arglist-close . c-lineup-arglist-tabs-only) + (arglist-cont-nonempty . + (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only)) + (arglist-intro . +) + (brace-list-intro . +) + (c . c-lineup-C-comments) + (case-label . 0) + (comment-intro . c-lineup-comment) + (cpp-define-intro . +) + (cpp-macro . -1000) + (cpp-macro-cont . +) + (defun-block-intro . +) + (else-clause . 0) + (func-decl-cont . +) + (inclass . +) + (inher-cont . c-lineup-multi-inher) + (knr-argdecl-intro . 0) + (label . -1000) + (statement . 0) + (statement-block-intro . +) + (statement-case-intro . +) + (statement-cont . +) + (substatement . +) + )) + (indent-tabs-mode . t) + (show-trailing-whitespace . t) + )) + (c-mode . ((c-file-style . "linux-kernel") (c-basic-offset . 8) - (c-file-offsets . - ((block-close . 0) - (brace-list-close . 0) - (brace-list-entry . 0) - (brace-list-intro . +) - (case-label . 0) - (class-close . 0) - (defun-block-intro . +) - (defun-close . 0) - (defun-open . 0) - (else-clause . 0) - (inclass . +) - (label . 0) - (statement . 0) - (statement-block-intro . +) - (statement-case-intro . +) - (statement-cont . +) - (substatement . +) - (topmost-intro . 0)))))) + (c-label-minimum-indentation . 0) + (c-offsets-alist . ( + (arglist-close . c-lineup-arglist-tabs-only) + (arglist-cont-nonempty . + (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only)) + (arglist-intro . +) + (brace-list-intro . +) + (c . c-lineup-C-comments) + (case-label . 0) + (comment-intro . c-lineup-comment) + (cpp-define-intro . +) + (cpp-macro . -1000) + (cpp-macro-cont . +) + (defun-block-intro . +) + (else-clause . 0) + (func-decl-cont . +) + (inclass . +) + (inher-cont . c-lineup-multi-inher) + (knr-argdecl-intro . 0) + (label . -1000) + (statement . 0) + (statement-block-intro . +) + (statement-case-intro . +) + (statement-cont . +) + (substatement . +) + )) + (indent-tabs-mode . t) + (show-trailing-whitespace . t) + )) +) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..4677c34d2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,27 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. Debian] + - Version [e.g. 12] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/issue-report.md b/.github/ISSUE_TEMPLATE/issue-report.md deleted file mode 100644 index 492ce0a1c..000000000 --- a/.github/ISSUE_TEMPLATE/issue-report.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -name: Issue report -about: Create a report to help us improve -title: '' -labels: '' -assignees: '' - ---- - -PLEASE READ BEFORE REPORTING AN ISSUE - -libfuse does not have any active, regular contributors or developers. The current maintainer continues to apply pull requests and tries to make regular releases, but unfortunately has no capacity to do any development beyond addressing high-impact issues. When reporting bugs, please understand that unless you are including a pull request or are reporting a critical issue, you will probably not get a response. - -To prevent the issue tracker from being flooded with issues that no-one is intending to work on, and to give more visibility to critical issues that users should be aware of and that most urgently need attention, I will also close most bug reports once they've been inactive for a while. - -Please note that this isn't meant to imply that you haven't found a bug - you most likely have and I'm grateful that you took the time to report it. Unfortunately, libfuse is a purely volunteer driven project, and at the moment there simply aren't any volunteers. diff --git a/.github/workflows/abicheck.yml b/.github/workflows/abicheck.yml index 297af751e..245b1bbc1 100644 --- a/.github/workflows/abicheck.yml +++ b/.github/workflows/abicheck.yml @@ -26,18 +26,18 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get -y install abigail-tools clang gcc + sudo apt-get -y install abigail-tools clang gcc liburing-dev libnuma-dev - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: path: current - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: path: previous ref: ${{ github.event.pull_request.base.ref }} - - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 with: python-version: '3.12' @@ -59,7 +59,8 @@ jobs: - name: Run abidiff run: abidiff --no-added-syms - --headers-dir1 previous/include/ - --headers-dir2 current/include/ - previous/build/lib/libfused.so - current/build/lib/libfused.so + --suppressions current/.github/workflows/abidiff_suppressions.abignore + --headers-dir1 previous/include/ + --headers-dir2 current/include/ + previous/build/lib/libfuse3.so + current/build/lib/libfuse3.so diff --git a/.github/workflows/abicheck_prev_release.yml b/.github/workflows/abicheck_prev_release.yml new file mode 100644 index 000000000..235341b55 --- /dev/null +++ b/.github/workflows/abicheck_prev_release.yml @@ -0,0 +1,76 @@ +--- +name: 'libfuse ABI check against previous major release' + +on: + push: + branches: + - master + - 'fuse-[0-9]+.[0-9]+*' # This will match branches like 3.17, 3.18, 4.0, etc. + pull_request: + branches: + - master + - 'fuse-[0-9]+.[0-9]+*' +permissions: + contents: read + +jobs: + abi: + runs-on: '${{ matrix.os }}' + strategy: + matrix: + os: + - ubuntu-latest + + steps: + - name: Install dependencies (Ubuntu) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get -y install abigail-tools clang gcc liburing-dev libnuma-dev + + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + path: current + fetch-depth: 0 # Fetch all history and tags + + - name: Determine previous major release tag + id: prev_release + run: | + cd current + chmod +x .github/workflows/find_previous_release_tag.sh + PREV_TAG=$(.github/workflows/find_previous_release_tag.sh) + echo "prev_tag=$PREV_TAG" >> $GITHUB_OUTPUT + + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + path: previous + ref: ${{ steps.prev_release.outputs.prev_tag }} + + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + with: + python-version: '3.12' + + - name: Build current + working-directory: current + run: | + pip install -r requirements.txt + meson setup build --buildtype=debug + meson compile -C build + + - name: Build previous + working-directory: previous + run: | + echo "Previous release tag: ${{ steps.prev_release.outputs.prev_tag }}" + echo "Commit-id of previous release: $(git show HEAD)" + pip install -r requirements.txt + meson setup build --buildtype=debug + meson compile -C build + + - name: Run abidiff + run: abidiff + --no-added-syms + --suppressions current/.github/workflows/abidiff_suppressions.abignore + --headers-dir1 previous/include/ + --headers-dir2 current/include/ + previous/build/lib/libfuse3.so + current/build/lib/libfuse3.so diff --git a/.github/workflows/abidiff_suppressions.abignore b/.github/workflows/abidiff_suppressions.abignore new file mode 100644 index 000000000..949efda6b --- /dev/null +++ b/.github/workflows/abidiff_suppressions.abignore @@ -0,0 +1,36 @@ +# Suppression file for abidiff false positives in libfuse +# This file suppresses ABI changes that are actually compatible but flagged by abidiff + +[suppress_type] +# Suppress the fuse_conn_info reserved array transformation +# This change is ABI-compatible: uint32_t[16] -> uint16_t request_timeout + uint16_t[31] +# Both use exactly 64 bytes (16*4 = 32*2 = 64 bytes) +name = fuse_conn_info +# Suppress changes to the reserved field that are size/offset related +change_kind = size-or-offset-change +has_data_member_inserted_at = offset_in_bits(512) + +[suppress_type] +# Also suppress the general struct size change for fuse_conn_info +# since the total size remains the same (128 bytes) by a static assertion +# in the code +name = fuse_conn_info +change_kind = size-change + +[suppress_type] +# Suppress ALL changes to fuse_operations struct +# These are backward compatible due to the op_size mechanism in fuse_main() +# Applications pass sizeof(struct fuse_operations) at compile time, +# and the library uses memcpy(&fs->op, op, op_size) to safely copy only +# the fields the application knows about. New fields remain NULL. +name = fuse_operations +has_data_member_inserted_at = end +has_size_change = yes + +[suppress_type] +# Suppress ALL changes to fuse_lowlevel_ops struct +# These are backward compatible due to the op_size mechanism in fuse_session_new() +# Same pattern as fuse_operations - op_size controls safe copying +name = fuse_lowlevel_ops +has_data_member_inserted_at = end +has_size_change = yes diff --git a/.github/workflows/bsd.yaml b/.github/workflows/bsd.yaml new file mode 100644 index 000000000..8edb96bce --- /dev/null +++ b/.github/workflows/bsd.yaml @@ -0,0 +1,33 @@ +--- +# TODO: integrate into matrix.os in pr-ci.yml +# TODO: add NetBSD and DragonFlyBSD +name: 'Build (FreeBSD)' +on: + push: + branches: + - master + - 'fuse-[0-9]+.[0-9]+*' + pull_request: + branches: + - master + - 'fuse-[0-9]+.[0-9]+*' + +jobs: + build_bsd: + runs-on: ubuntu-latest + name: Build under FreeBSD + steps: + - name: Checkout + uses: actions/checkout@v6.0.1 + - name: Build + uses: vmactions/freebsd-vm@v1 + with: + usesh: true + prepare: | + pkg install -y meson ninja + run: | + mkdir build + cd build + meson setup .. + ninja -v +... \ No newline at end of file diff --git a/.github/workflows/checkpatch.yml b/.github/workflows/checkpatch.yml index 622542c54..2d17cf500 100644 --- a/.github/workflows/checkpatch.yml +++ b/.github/workflows/checkpatch.yml @@ -1,20 +1,16 @@ name: Checkpatch on: - push: - branches: - - master pull_request: + types: [opened, synchronize, reopened] branches: - master -permissions: - contents: read jobs: checkpatch: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 0 - name: Install dependencies @@ -23,14 +19,23 @@ jobs: sudo apt-get install -y perl - name: Run checkpatch.pl run: | + if [ -z "${{ github.base_ref }}" ]; then + echo "Not a pull request, skipping checkpatch" + exit 0 + fi git fetch origin ${{ github.base_ref }} base_commit=$(git merge-base origin/${{ github.base_ref }} HEAD) echo "Base commit: $base_commit" echo "Running checkpatch.pl on all commits in the PR:" git rev-list --reverse $base_commit..HEAD | while read commit; do + # Skip dependabot commits - we do not have control over it + if git log -1 --format='%ae' $commit | grep -q "dependabot"; then + echo "Skipping dependabot commit: $commit" + continue + fi subject=$(git log -1 --format=%s $commit) echo "Checking commit: $commit - $subject" - if ! ./checkpatch.pl --no-tree --ignore MAINTAINERS,SPDX_LICENSE_TAG,COMMIT_MESSAGE,FILE_PATH_CHANGES,EMAIL_SUBJECT,AVOID_EXTERNS,GIT_COMMIT_ID,ENOSYS_SYSCALL -g $commit; then + if ! ./checkpatch.pl --max-line-length=100 --no-tree --ignore MAINTAINERS,SPDX_LICENSE_TAG,COMMIT_MESSAGE,FILE_PATH_CHANGES,EMAIL_SUBJECT,AVOID_EXTERNS,GIT_COMMIT_ID,ENOSYS_SYSCALL,ENOSYS,FROM_SIGN_OFF_MISMATCH,QUOTED_COMMIT_ID,PREFER_ATTRIBUTE_ALWAYS_UNUSED,PREFER_DEFINED_ATTRIBUTE_MACRO -g $commit; then echo "checkpatch.pl found issues in commit $commit - $subject" exit 1 fi diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 474f2c8d2..914d81613 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -49,11 +49,11 @@ jobs: build-mode: manual steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 # v3.28.9 + uses: github/codeql-action/init@1b168cd39490f61582a9beae412bb7057a6b2c4e # v3.29.5 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -73,12 +73,12 @@ jobs: - if: matrix.build-mode == 'manual' shell: bash run: | - sudo apt install meson ninja-build python3-pytest + sudo apt install meson ninja-build python3-pytest liburing-dev libnuma-dev meson setup build --buildtype=debug meson compile -C build - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 # v3.28.9 + uses: github/codeql-action/analyze@1b168cd39490f61582a9beae412bb7057a6b2c4e # v3.29.5 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 184fbcfe1..af68e2c9d 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -21,8 +21,8 @@ jobs: steps: - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: Codespell - uses: codespell-project/actions-codespell@406322ec52dd7b488e48c1c4b82e2a8b3a1bf630 # v2.1 + uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579 # v2.2 with: skip: checkpatch.pl diff --git a/.github/workflows/find_previous_release_tag.sh b/.github/workflows/find_previous_release_tag.sh new file mode 100755 index 000000000..0e73336a1 --- /dev/null +++ b/.github/workflows/find_previous_release_tag.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +set -e + +# Script to find the previous major release tag for libfuse +# Usage: ./find_previous_release_tag.sh + +# Get current version from meson.build +# Pattern matches: version : "3.18.0" or version: '3.18.0' +VERSION_PATTERN="version\s*:\s*['\"]" +VERSION_EXTRACT="s/.*version\s*:\s*['\"]([^'\"]+)['\"].*/\1/" + +CURRENT_VERSION=$(grep -E "$VERSION_PATTERN" meson.build | \ + sed -E "$VERSION_EXTRACT") +echo "Current version: $CURRENT_VERSION" >&2 + +# Extract major.minor version (e.g., 3.18 from 3.18.0) +# Pattern captures first two numbers separated by dot +MAJOR_MINOR_PATTERN='s/^([0-9]+\.[0-9]+).*/\1/' + +CURRENT_MAJOR_MINOR=$(echo "$CURRENT_VERSION" | \ + sed -E "$MAJOR_MINOR_PATTERN") +echo "Current major.minor: $CURRENT_MAJOR_MINOR" >&2 + +# Get all major.minor versions from tags, sort them, and find the one before +# current +# Pattern matches tags like: fuse-3.17.0, fuse-3.18.1, etc. +FUSE_TAG_PATTERN="^fuse-[0-9]+\.[0-9]+" +# Pattern extracts major.minor from version strings +TAG_MAJOR_MINOR_PATTERN='s/^([0-9]+\.[0-9]+).*/\1/' + +ALL_MAJOR_MINOR=$(git tag --list | \ + grep -E "$FUSE_TAG_PATTERN" | \ + sed 's/fuse-//' | \ + sed -E "$TAG_MAJOR_MINOR_PATTERN" | \ + sort -V -u) +echo "All major.minor versions found:" >&2 +echo "$ALL_MAJOR_MINOR" >&2 + +# Find the previous major.minor version +PREV_MAJOR_MINOR=$(echo "$ALL_MAJOR_MINOR" | \ + grep -B1 "^${CURRENT_MAJOR_MINOR}$" | \ + head -1) + +if [ -z "$PREV_MAJOR_MINOR" ] || [ "$PREV_MAJOR_MINOR" = "$CURRENT_MAJOR_MINOR" ]; then + echo "Error: No previous major.minor version found before $CURRENT_MAJOR_MINOR" >&2 + exit 1 +fi + +echo "Previous major.minor: $PREV_MAJOR_MINOR" >&2 + +# Get the latest tag for the previous major.minor version +# Pattern matches tags like: fuse-3.17.0, fuse-3.17.1, fuse-3.17.2, etc. +PREV_TAG_PATTERN="^fuse-${PREV_MAJOR_MINOR}\.[0-9]+" + +PREV_TAG=$(git tag --list | \ + grep -E "$PREV_TAG_PATTERN" | \ + sort -V | \ + tail -1) + +if [ -z "$PREV_TAG" ]; then + echo "Error: No previous major release tag found for version $PREV_MAJOR_MINOR" >&2 + exit 1 +fi + +echo "Previous release tag: $PREV_TAG" >&2 + +# Output the tag to stdout (this is what the workflow will capture) +echo "$PREV_TAG" diff --git a/.github/workflows/iwyi-check.yml b/.github/workflows/iwyi-check.yml index 3751f35de..46c5a8ad4 100644 --- a/.github/workflows/iwyi-check.yml +++ b/.github/workflows/iwyi-check.yml @@ -18,7 +18,7 @@ jobs: name: Include What You Use Check runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 0 diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index 4bb7f66de..5a53cce58 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -31,9 +31,10 @@ jobs: sudo apt-get update sudo apt-get install -y clang doxygen gcc gcc-10 gcc-9 valgrind \ gcc-multilib g++-multilib libc6-dev-i386 \ - libpcap0.8-dev:i386 libudev-dev:i386 pkg-config:i386 - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: actions/setup-python@v5 + libpcap0.8-dev:i386 libudev-dev:i386 pkg-config:i386 \ + liburing-dev libnuma-dev + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + - uses: actions/setup-python@v6 with: python-version: '3.12' - run: pip install -r requirements.txt diff --git a/.gitignore b/.gitignore index 9576c6cbf..a0c19bcff 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,9 @@ *.gz \#*# *.orig +*.patch *~ +.DS_Store Makefile.in *.m4 stamp-h* diff --git a/AUTHORS b/AUTHORS index 9f6ed908d..2c63b1feb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -260,24 +260,28 @@ Tyler Hall yangyun Abhishek -# New authors since fuse-3.17.0 +# New authors since fuse-3.17.1 Luis Henriques Zegang - -# New authors since fuse-3.17.1-rc0 -Maksim Harbachou -Vassili Tchersky - -# New authors since fuse-3.17.1-rc1 -jnr0006 -Vassili Tchersky - -# New authors since fuse-3.17.1 swj <1186093704@qq.com> +Gleb Popov <6yearold@gmail.com> +WekaJosh <80121792+WekaJosh@users.noreply.github.com> +Alexander Monakov Ben Dooks - -# New authors since fuse-3.17.2 +Ben Linsay +Dave Vasilevsky +Darrick J. Wong +Georgi Valkov +Alik Aslanyan +jnr0006 +Jingbo Xu Long Li +Maksim Harbachou +Meng Lu Wang +Vassili Tchersky +Vassili Tchersky +izxl007 +Zeno Sebastian Endemann -# New authors since fuse-3.17.3 -Georgi Valkov +# New authors since fuse-3.18.1 +Abhinav Agarwal diff --git a/ChangeLog.rst b/ChangeLog.rst index 730cb0ffd..9a364b518 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,48 @@ +libfuse 3.18.2 (2026-03-18) +=========================== +* Fix two io-uring issues that might be security critical + * fuse-io-uring: Fix UAF and NULL deref in startup error path + * fuse-io-uring: Fix NULL deref and memory leak in fuse_uring_init_queue + +libfuse 3.18.1 (2025-12-20) +=========================== +* Fix a critical ABI issue compared to libfuse-3.17.3+ +* Note: This breaks ABI compatibility to libfuse-3.18.0 + (given that 3.18.0 is out for 2 days only, probably the lesser evil) + +libfuse 3.18.0 (2025-12-18) +=========================== + +New Features +------------ + +* fuse-over-io-uring communication +* statx support +* Request timeouts: Prevent hung operations +* FUSE_NOTIFY_INC_EPOCH: New notification mechanism for epoch counters + +Important Fixes +---------------- + +* Fixed double unmount on FUSE_DESTROY +* Fixed junk readdirplus results when filesystem doesn't fill stat info +* Fixed memory deallocation in fuse_session_loop_remember +* Fixed COPY_FILE_RANGE interface + +Platform Support +---------------- + +* Improved FreeBSD support (mount error reporting, test runner, build fixes) +* Fixed 32-bit architecture builds +* Fixed build with musl libc and older kernels (< 5.9) + +Other Improvements +------------------ + +* Added PanFS to fusermount whitelist +* Thread naming support for easier debugging + + libfuse 3.17.4 (2025-08-19) =========================== - Try to detect mount-utils by checking for /run/mount/utab diff --git a/doc/README.fuse-io-uring b/doc/README.fuse-io-uring new file mode 100644 index 000000000..51348903d --- /dev/null +++ b/doc/README.fuse-io-uring @@ -0,0 +1,38 @@ +fuse-over-io-uring uses io-uring for transport of kernel/userspace +messages. See also https://docs.kernel.org/filesystems/fuse-io-uring.html + +In order to enable it, the kernel module needs to have it enabled: +echo 1 > /sys/module/fuse/parameters/enable_uring + +Additionally, FUSE_CAP_OVER_IO_URING needs to be set and +se->uring.enable has to be true. The latter can be +achieved with the libfuse option '-o io_uring'. + +Default queue-depth is 8 and can be changed with the parameter +'-oio_uring_q_depth'. + +As of now there is always one queue per core. A reduced number +of cores in development. + +Benefits: +- Improved performance by using io_uring for kernel/userspace communication +- Reduced system call overhead compared to traditional FUSE +- Asynchronous I/O operations + +Usage: +To enable io_uring support when mounting a FUSE filesystem: +1. Enable kernel support: echo 1 > /sys/module/fuse/parameters/enable_uring +2. Mount with io_uring option: -o io_uring +3. Optionally adjust queue depth: -o io_uring_q_depth= + +Example: +./my_fuse_fs /source /mountpoint -o io_uring -o io_uring_q_depth=16 + +Requirements: +- Linux kernel with io_uring and FUSE io_uring support enabled +- libfuse compiled with io_uring support + +Build Dependencies: +- liburing (for io_uring support) +- libnuma (required alongside liburing) +- meson build system with option: -Denable-io-uring=true diff --git a/doc/README.fuse_reply_errors b/doc/README.fuse_reply_errors new file mode 100644 index 000000000..d60fd7a29 --- /dev/null +++ b/doc/README.fuse_reply_errors @@ -0,0 +1,5 @@ +Under normal operation, a call to any fuse_reply_* function should not result +in an error. + +Should the kernel abort the fuse connection, a fuse_reply_* call can return +-ENOENT. diff --git a/doc/README.notifications b/doc/README.notifications new file mode 100644 index 000000000..2ca6204b1 --- /dev/null +++ b/doc/README.notifications @@ -0,0 +1,37 @@ +During the life-cycle of a user-space filesystem the usual flow is: + + 1. User-space application does a filesystem-related syscall + 2. Kernel VFS calls into the FUSE kernel driver + 3. FUSE kernel redirects request to the user-space filesystem + 4. User-space server replies to request + 5. FUSE returns reply to VFS + 6. User-space application gets reply from the kernel + +However, there are occasions where the filesystem needs to send notifications to +the kernel that are not in reply to any particular request. If, for example, +when a READ request of 4096 bytes results in the filesystem having more data +available for the specific inode, it may be useful to provide this extra data to +the kernel cache so that future read operations will be faster. + +FUSE provides mechanisms for a user-space server to send the kernel certain +types of asynchronous notifications. Currently, these are the available +notifications: + +|-------------+----------------------------------| +| Operation | libfuse function | +|-------------+----------------------------------| +| POLL | fuse_lowlevel_notify_poll | +| INVAL_INODE | fuse_lowlevel_notify_inval_inode | +| ENTRY | fuse_lowlevel_notify_inval_entry | +| STORE | fuse_lowlevel_notify_store | +| RETRIEVE | fuse_lowlevel_notify_retrieve | +| DELETE | fuse_lowlevel_notify_delete | +| RESEND | - | +|-------------+----------------------------------| + +One important restriction is that these asynchronous operations SHALL NOT be +performed while executing other FUSE requests. Doing so will likely result in +deadlocking the user-space filesystem server. In the example above, if the +server is replying to a READ request and has extra data to add to the kernel +cache, it needs to reply to the READ request first, and, e.g., signal a +different thread to do the STORE. diff --git a/doc/libfuse-operations.txt b/doc/libfuse-operations.txt index a56f89be5..457ff2cdc 100644 --- a/doc/libfuse-operations.txt +++ b/doc/libfuse-operations.txt @@ -1,5 +1,5 @@ List of libfuse operations with their in/out arguments, created with -help of chatgpt. As of kernel 6.9 (protocol 7.40). The list +help of chatgpt. As of kernel 6.18 (protocol 7.45). The list was only partly human verified - use with care. 1. FUSE_LOOKUP (1) @@ -125,7 +125,7 @@ was only partly human verified - use with care. - out_args[2]: Not used 17. FUSE_STATFS (17) - - in_args[0]: Size of fuse_statfs_in (16 bytes) + - in_args[0]: Not used - in_args[1]: Not used - in_args[2]: Not used - out_args[0]: Size of fuse_statfs_out (typically 96 bytes) @@ -368,7 +368,7 @@ was only partly human verified - use with care. - out_args[2]: Not used 47. FUSE_COPY_FILE_RANGE (47) - - in_args[0]: Size of fuse_copy_file_range_in (48 bytes) + - in_args[0]: Size of fuse_copy_file_range_in (56 bytes) - in_args[1]: Not used - in_args[2]: Not used - out_args[0]: Size of fuse_write_out (24 bytes) @@ -416,3 +416,11 @@ was only partly human verified - use with care. - out_args[0]: Size of fuse_statx_out (typically 256 bytes) - out_args[1]: Not used - out_args[2]: Not used + +53. FUSE_COPY_FILE_RANGE_64 (53) + - in_args[0]: Size of fuse_copy_file_range_in (56 bytes) + - in_args[1]: Not used + - in_args[2]: Not used + - out_args[0]: Size of fuse_copy_file_range_out (8 bytes) + - out_args[1]: Not used + - out_args[2]: Not used diff --git a/example/README.compile b/example/README.compile new file mode 100644 index 000000000..a7b681982 --- /dev/null +++ b/example/README.compile @@ -0,0 +1,4 @@ +Note: + * If the pkg-config command fails due to the absence of the fuse3.pc file, + you should configure the path to the fuse3.pc file in the PKG_CONFIG_PATH + variable. \ No newline at end of file diff --git a/example/cuse.c b/example/cuse.c index 330124e87..7f7369a5d 100644 --- a/example/cuse.c +++ b/example/cuse.c @@ -4,7 +4,7 @@ Copyright (C) 2008-2009 Tejun Heo This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ diff --git a/example/cuse_client.c b/example/cuse_client.c index 903ffc63f..de8244473 100644 --- a/example/cuse_client.c +++ b/example/cuse_client.c @@ -4,7 +4,7 @@ Copyright (C) 2008 Tejun Heo This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/hello.c b/example/hello.c index b18563d25..1f997e0b5 100644 --- a/example/hello.c +++ b/example/hello.c @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/hello_ll.c b/example/hello_ll.c index 16fb5e517..3557b4fb6 100644 --- a/example/hello_ll.c +++ b/example/hello_ll.c @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file @@ -14,6 +14,10 @@ * * gcc -Wall hello_ll.c `pkg-config fused --cflags --libs` -o hello_ll * + * Note: If the pkg-config command fails due to the absence of the fuse3.pc + * file, you should configure the path to the fuse3.pc file in the + * PKG_CONFIG_PATH variable. + * * ## Source code ## * \include hello_ll.c */ diff --git a/example/hello_ll_uds.c b/example/hello_ll_uds.c index 80471ab09..701334e54 100644 --- a/example/hello_ll_uds.c +++ b/example/hello_ll_uds.c @@ -4,7 +4,7 @@ Copyright (C) 2022 Tofik Sonono This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/invalidate_path.c b/example/invalidate_path.c index 9d22d2e4b..a4d06885d 100644 --- a/example/invalidate_path.c +++ b/example/invalidate_path.c @@ -4,7 +4,7 @@ (C) 2017 EditShare LLC This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/ioctl.c b/example/ioctl.c index 3b915f959..4af26f5b9 100644 --- a/example/ioctl.c +++ b/example/ioctl.c @@ -4,7 +4,7 @@ Copyright (C) 2008 Tejun Heo This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/ioctl.h b/example/ioctl.h index a4f054cc3..ca7e97541 100644 --- a/example/ioctl.h +++ b/example/ioctl.h @@ -4,7 +4,7 @@ Copyright (C) 2008 Tejun Heo This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/ioctl_client.c b/example/ioctl_client.c index 019d03031..99e27505e 100644 --- a/example/ioctl_client.c +++ b/example/ioctl_client.c @@ -3,10 +3,8 @@ Copyright (C) 2008 SUSE Linux Products GmbH Copyright (C) 2008 Tejun Heo - This program tests the ioctl.c example file systsem. - This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/memfs_ll.cc b/example/memfs_ll.cc index e5877565b..7055a434a 100644 --- a/example/memfs_ll.cc +++ b/example/memfs_ll.cc @@ -3,11 +3,10 @@ Copyright (C) 2024 DataDirect Networks. This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ -#include -#define FUSE_USE_VERSION 317 +#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 18) #include #include @@ -28,6 +27,9 @@ #include #include #include +#ifdef HAVE_LINUX_LIMITS_H +#include +#endif #define MEMFS_ATTR_TIMEOUT 0.0 #define MEMFS_ENTRY_TIMEOUT 0.0 @@ -902,10 +904,14 @@ static void memfs_rename(fuse_req_t req, fuse_ino_t parent, const char *name, Dentry *child_dentry_copy = nullptr; Dentry *existing_dentry = nullptr; +#if defined(RENAME_EXCHANGE) && defined(RENAME_NOREPLACE) if (flags & (RENAME_EXCHANGE | RENAME_NOREPLACE)) { fuse_reply_err(req, EINVAL); return; } +#else + (void)flags; +#endif Inodes.lock(); @@ -1041,53 +1047,30 @@ static void memfs_statfs(fuse_req_t req, [[maybe_unused]] fuse_ino_t ino) fuse_reply_statfs(req, &stbuf); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" static const struct fuse_lowlevel_ops memfs_oper = { - .init = nullptr, - .destroy = nullptr, .lookup = memfs_lookup, .forget = memfs_forget, .getattr = memfs_getattr, .setattr = memfs_setattr, - .readlink = nullptr, - .mknod = nullptr, .mkdir = memfs_mkdir, .unlink = memfs_unlink, .rmdir = memfs_rmdir, - .symlink = nullptr, .rename = memfs_rename, .link = memfs_link, .open = memfs_open, .read = memfs_read, .write = memfs_write, - .flush = nullptr, .release = memfs_release, - .fsync = nullptr, .opendir = memfs_opendir, .readdir = memfs_readdir, .releasedir = memfs_releasedir, - .fsyncdir = nullptr, .statfs = memfs_statfs, - .setxattr = nullptr, - .getxattr = nullptr, - .listxattr = nullptr, - .removexattr = nullptr, - .access = nullptr, .create = memfs_create, - .getlk = nullptr, - .setlk = nullptr, - .bmap = nullptr, - .ioctl = nullptr, - .poll = nullptr, - .write_buf = nullptr, - .retrieve_reply = nullptr, .forget_multi = memfs_forget_multi, - .flock = nullptr, - .fallocate = nullptr, - .readdirplus = nullptr, - .copy_file_range = nullptr, - .lseek = nullptr, - .tmpfile = nullptr, }; +#pragma GCC diagnostic pop int main(int argc, char *argv[]) { diff --git a/example/meson.build b/example/meson.build index b2e896cc4..21e8317a1 100644 --- a/example/meson.build +++ b/example/meson.build @@ -31,13 +31,13 @@ foreach ex : threaded_examples install: false) endforeach -if not platform.endswith('bsd') and platform != 'dragonfly' and add_languages('cpp', required : false) +if platform != 'dragonfly' and add_languages('cpp', required : false) executable('passthrough_hp', 'passthrough_hp.cc', dependencies: [ thread_dep, libfuse_dep ], install: false) executable('memfs_ll', 'memfs_ll.cc', dependencies: [ thread_dep, libfuse_dep ], - cpp_args : '-std=c++17', + cpp_args : '-std=c++20', install: false) endif diff --git a/example/notify_inval_entry.c b/example/notify_inval_entry.c index 0eda6ca4c..5456d22ba 100644 --- a/example/notify_inval_entry.c +++ b/example/notify_inval_entry.c @@ -3,7 +3,7 @@ Copyright (C) 2016 Nikolaus Rath This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file @@ -67,6 +67,12 @@ * To use the function fuse_lowlevel_notify_expire_entry() instead of * fuse_lowlevel_notify_inval_entry(), use the command line option --only-expire * + * Another possible command-line option is --inc-epoch, which will use the FUSE + * low-level function fuse_lowlevel_notify_increment_epoch() instead. This will + * function will force the invalidation of all dentries next time they are + * revalidated. Note that --inc-epoch and --only-expire options are mutually + * exclusive. + * * ## Compilation ## * * gcc -Wall notify_inval_entry.c `pkg-config fused --cflags --libs` -o notify_inval_entry @@ -103,12 +109,14 @@ struct options { float timeout; int update_interval; int only_expire; + int inc_epoch; }; static struct options options = { .timeout = 5, .no_notify = 0, .update_interval = 1, .only_expire = 0, + .inc_epoch = 0, }; #define OPTION(t, p) \ @@ -118,6 +126,7 @@ static const struct fuse_opt option_spec[] = { OPTION("--update-interval=%d", update_interval), OPTION("--timeout=%f", timeout), OPTION("--only-expire", only_expire), + OPTION("--inc-epoch", inc_epoch), FUSE_OPT_END }; @@ -263,7 +272,7 @@ static void update_fs(void) { static void* update_fs_loop(void *data) { struct fuse_session *se = (struct fuse_session*) data; char *old_name; - + int ret = 0; while(!fuse_session_exited(se)) { old_name = strdup(file_name); @@ -271,24 +280,27 @@ static void* update_fs_loop(void *data) { if (!options.no_notify && lookup_cnt) { if(options.only_expire) { // expire entry - int ret = fuse_lowlevel_notify_expire_entry - (se, FUSE_ROOT_ID, old_name, strlen(old_name)); + ret = fuse_lowlevel_notify_expire_entry + (se, FUSE_ROOT_ID, old_name, strlen(old_name)); // no kernel support if (ret == -ENOSYS) { printf("fuse_lowlevel_notify_expire_entry not supported by kernel\n"); - printf("Exiting...\n"); - - fuse_session_exit(se); - // Make sure to exit now, rather than on next request from userspace - pthread_kill(main_thread, SIGPIPE); - break; } + // 1) ret == 0: successful expire of an existing entry // 2) ret == -ENOENT: kernel has already expired the entry / // entry does not exist anymore in the kernel assert(ret == 0 || ret == -ENOENT); + } else if (options.inc_epoch) { // increment epoch + ret = fuse_lowlevel_notify_increment_epoch(se); + + if (ret == -ENOSYS) { + printf("fuse_lowlevel_notify_increment_epoch not supported by kernel\n"); + break; + } + assert(ret == 0); } else { // invalidate entry assert(fuse_lowlevel_notify_inval_entry (se, FUSE_ROOT_ID, old_name, strlen(old_name)) == 0); @@ -297,6 +309,15 @@ static void* update_fs_loop(void *data) { free(old_name); sleep(options.update_interval); } + + if (ret == -ENOSYS) { + printf("Exiting...\n"); + + fuse_session_exit(se); + // Make sure to exit now, rather than on next request from userspace + pthread_kill(main_thread, SIGPIPE); + } + return NULL; } @@ -307,7 +328,8 @@ static void show_help(const char *progname) " --timeout= Timeout for kernel caches\n" " --update-interval= Update-rate of file system contents\n" " --no-notify Disable kernel notifications\n" - " --only-expire Expire entries instead of invalidating them\n" + " --only-expire Expire entries instead of invalidating them\n" + " --inc-epoch Increment epoch, invalidating all dentries\n" "\n"); } @@ -336,6 +358,11 @@ int main(int argc, char *argv[]) { ret = 0; goto err_out1; } + if (options.only_expire && options.inc_epoch) { + printf("'only-expire' and 'inc-epoch' options are exclusive\n"); + ret = 0; + goto err_out1; + } /* Initial contents */ update_fs(); diff --git a/example/notify_inval_inode.c b/example/notify_inval_inode.c index b3721267b..1af9530d4 100644 --- a/example/notify_inval_inode.c +++ b/example/notify_inval_inode.c @@ -3,7 +3,7 @@ Copyright (C) 2016 Nikolaus Rath This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/notify_store_retrieve.c b/example/notify_store_retrieve.c index cb1d27402..44c970af7 100644 --- a/example/notify_store_retrieve.c +++ b/example/notify_store_retrieve.c @@ -3,7 +3,7 @@ Copyright (C) 2016 Nikolaus Rath This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/null.c b/example/null.c index 65670eec7..b1b2bf364 100644 --- a/example/null.c +++ b/example/null.c @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/passthrough.c b/example/passthrough.c index fcc9263d8..81a265aa4 100644 --- a/example/passthrough.c +++ b/example/passthrough.c @@ -4,7 +4,7 @@ Copyright (C) 2011 Sebastian Pipping This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file @@ -16,14 +16,14 @@ * * Compile with * - * gcc -Wall passthrough.c `pkg-config fused --cflags --libs` -o passthrough + * gcc -Wall passthrough.c `pkg-config fuse3 --cflags --libs` -o passthrough * * ## Source code ## * \include passthrough.c */ -#define FUSE_USE_VERSION 31 +#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 18) #define _GNU_SOURCE @@ -40,10 +40,6 @@ #include #include #include -#ifdef __FreeBSD__ -#include -#include -#endif #include #ifdef HAVE_SETXATTR #include @@ -52,12 +48,13 @@ #include "passthrough_helpers.h" static int fill_dir_plus = 0; +static int readdir_zero_ino; static void *xmp_init(struct fuse_conn_info *conn, struct fuse_config *cfg) { (void) conn; - cfg->use_ino = 1; + cfg->use_ino = !readdir_zero_ino; /* parallel_direct_writes feature depends on direct_io features. To make parallel_direct_writes valid, need either set cfg->direct_io @@ -144,6 +141,8 @@ static int xmp_readdir(const char *path, void *buf, fuse_fill_dir_t filler, st.st_ino = de->d_ino; st.st_mode = de->d_type << 12; } + if (readdir_zero_ino) + st.st_ino = 0; if (filler(buf, de->d_name, &st, 0, fill_dir_plus)) break; } @@ -399,7 +398,6 @@ static int xmp_fsync(const char *path, int isdatasync, return 0; } -#ifdef HAVE_POSIX_FALLOCATE static int xmp_fallocate(const char *path, int mode, off_t offset, off_t length, struct fuse_file_info *fi) { @@ -408,9 +406,6 @@ static int xmp_fallocate(const char *path, int mode, (void) fi; - if (mode) - return -EOPNOTSUPP; - if(fi == NULL) fd = open(path, O_WRONLY); else @@ -419,13 +414,12 @@ static int xmp_fallocate(const char *path, int mode, if (fd == -1) return -errno; - res = -posix_fallocate(fd, offset, length); + res = do_fallocate(fd, mode, offset, length); if(fi == NULL) close(fd); return res; } -#endif #ifdef HAVE_SETXATTR /* xattr operations are optional and can safely be left unimplemented */ @@ -528,6 +522,24 @@ static off_t xmp_lseek(const char *path, off_t off, int whence, struct fuse_file return res; } +#ifdef HAVE_STATX +static int xmp_statx(const char *path, int flags, int mask, struct statx *stxbuf, + struct fuse_file_info *fi) +{ + int fd = -1; + int res; + + if (fi) + fd = fi->fh; + + res = statx(fd, path, flags | AT_SYMLINK_NOFOLLOW, mask, stxbuf); + if (res == -1) + return -errno; + + return 0; +} +#endif + static const struct fuse_operations xmp_oper = { .init = xmp_init, .getattr = xmp_getattr, @@ -554,9 +566,7 @@ static const struct fuse_operations xmp_oper = { .statfs = xmp_statfs, .release = xmp_release, .fsync = xmp_fsync, -#ifdef HAVE_POSIX_FALLOCATE .fallocate = xmp_fallocate, -#endif #ifdef HAVE_SETXATTR .setxattr = xmp_setxattr, .getxattr = xmp_getxattr, @@ -567,6 +577,9 @@ static const struct fuse_operations xmp_oper = { .copy_file_range = xmp_copy_file_range, #endif .lseek = xmp_lseek, +#ifdef HAVE_STATX + .statx = xmp_statx, +#endif }; int main(int argc, char *argv[]) @@ -580,6 +593,9 @@ int main(int argc, char *argv[]) for (i=0, new_argc=0; (i This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file @@ -23,7 +23,7 @@ * \include passthrough_fh.c */ -#define FUSE_USE_VERSION 31 +#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 18) #define _GNU_SOURCE @@ -47,6 +47,8 @@ #endif #include /* flock(2) */ +#include "passthrough_helpers.h" + static void *xmp_init(struct fuse_conn_info *conn, struct fuse_config *cfg) { @@ -514,18 +516,13 @@ static int xmp_fsync(const char *path, int isdatasync, return 0; } -#ifdef HAVE_POSIX_FALLOCATE static int xmp_fallocate(const char *path, int mode, off_t offset, off_t length, struct fuse_file_info *fi) { (void) path; - if (mode) - return -EOPNOTSUPP; - - return -posix_fallocate(fi->fh, offset, length); + return do_fallocate(fi->fh, mode, offset, length); } -#endif #ifdef HAVE_SETXATTR /* xattr operations are optional and can safely be left unimplemented */ @@ -619,6 +616,24 @@ static off_t xmp_lseek(const char *path, off_t off, int whence, struct fuse_file return res; } +#ifdef HAVE_STATX +static int xmp_statx(const char *path, int flags, int mask, struct statx *stxbuf, + struct fuse_file_info *fi) +{ + int fd = -1; + int res; + + if (fi) + fd = fi->fh; + + res = statx(fd, path, flags | AT_SYMLINK_NOFOLLOW, mask, stxbuf); + if (res == -1) + return -errno; + + return 0; +} +#endif + static const struct fuse_operations xmp_oper = { .init = xmp_init, .getattr = xmp_getattr, @@ -650,9 +665,7 @@ static const struct fuse_operations xmp_oper = { .flush = xmp_flush, .release = xmp_release, .fsync = xmp_fsync, -#ifdef HAVE_POSIX_FALLOCATE .fallocate = xmp_fallocate, -#endif #ifdef HAVE_SETXATTR .setxattr = xmp_setxattr, .getxattr = xmp_getxattr, @@ -667,6 +680,9 @@ static const struct fuse_operations xmp_oper = { .copy_file_range = xmp_copy_file_range, #endif .lseek = xmp_lseek, +#ifdef HAVE_STATX + .statx = xmp_statx, +#endif }; int main(int argc, char *argv[]) diff --git a/example/passthrough_helpers.h b/example/passthrough_helpers.h index 6b77c3360..326a5c7d6 100644 --- a/example/passthrough_helpers.h +++ b/example/passthrough_helpers.h @@ -23,11 +23,55 @@ * SUCH DAMAGE */ +#ifndef FUSE_EXAMPLE_PASSTHROUGH_HELPERS_H_ +#define FUSE_EXAMPLE_PASSTHROUGH_HELPERS_H_ + +#include +#include +#include +#include +#include + +#ifdef __FreeBSD__ +#include +#include +#endif + +static inline int do_fallocate(int fd, int mode, off_t offset, off_t length) +{ +#ifdef HAVE_FALLOCATE + if (fallocate(fd, mode, offset, length) == -1) + return -errno; + return 0; +#else // HAVE_FALLOCATE + +#ifdef HAVE_POSIX_FALLOCATE + if (mode == 0) + return -posix_fallocate(fd, offset, length); +#endif + +#ifdef HAVE_FSPACECTL + // 0x3 == FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE + if (mode == 0x3) { + struct spacectl_range sr; + + sr.r_offset = offset; + sr.r_len = length; + if (fspacectl(fd, SPACECTL_DEALLOC, &sr, 0, NULL) == -1) + return -errno; + return 0; + } +#endif + + return -EOPNOTSUPP; +#endif // HAVE_FALLOCATE +} + /* * Creates files on the underlying file system in response to a FUSE_MKNOD * operation */ -static int mknod_wrapper(int dirfd, const char *path, const char *link, +static inline int mknod_wrapper(int dirfd, const char *path, const char *link, int mode, dev_t rdev) { int res; @@ -74,3 +118,5 @@ static int mknod_wrapper(int dirfd, const char *path, const char *link, return res; } + +#endif // FUSE_PASSTHROUGH_HELPERS_H_ diff --git a/example/passthrough_hp.cc b/example/passthrough_hp.cc index be089dc85..2dc0f9503 100644 --- a/example/passthrough_hp.cc +++ b/example/passthrough_hp.cc @@ -5,7 +5,7 @@ Copyright (C) 2018 Valve, Inc This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file @@ -59,11 +59,17 @@ #include #include #include +#ifdef HAVE_SYS_XATTR_H #include +#endif #include #include #include #include +#ifdef __FreeBSD__ +#include +#include +#endif // C++ includes #include @@ -72,6 +78,9 @@ #include "cxxopts.hpp" #include #include +#include + +#include "passthrough_helpers.h" using namespace std; @@ -82,15 +91,14 @@ using namespace std; sfs_dirp` elements as inodes and file handles. This means that we must be able to store pointer a pointer in both a fuse_ino_t variable and a uint64_t variable (used for file handles). */ -static_assert(sizeof(fuse_ino_t) >= sizeof(void*), - "void* must fit into fuse_ino_t"); +static_assert(sizeof(fuse_ino_t) >= sizeof(void *), + "void* must fit into fuse_ino_t"); static_assert(sizeof(fuse_ino_t) >= sizeof(uint64_t), - "fuse_ino_t must be at least 64 bits"); - + "fuse_ino_t must be at least 64 bits"); /* Forward declarations */ struct Inode; -static Inode& get_inode(fuse_ino_t ino); +static Inode &get_inode(fuse_ino_t ino); static void forget_one(fuse_ino_t ino, uint64_t n); // Uniquely identifies a file in the source directory tree. This could @@ -103,1474 +111,1556 @@ static void forget_one(fuse_ino_t ino, uint64_t n); typedef std::pair SrcId; // Define a hash function for SrcId -namespace std { - template<> - struct hash { - size_t operator()(const SrcId& id) const { - return hash{}(id.first) ^ hash{}(id.second); - } - }; +namespace std +{ +template <> struct hash { + size_t operator()(const SrcId &id) const + { + return hash{}(id.first) ^ hash{}(id.second); + } +}; } // Maps files in the source directory tree to inodes typedef std::unordered_map InodeMap; struct Inode { - int fd {-1}; - dev_t src_dev {0}; - ino_t src_ino {0}; - int generation {0}; - int backing_id {0}; - uint64_t nopen {0}; - uint64_t nlookup {0}; - std::mutex m; - - // Delete copy constructor and assignments. We could implement - // move if we need it. - Inode() = default; - Inode(const Inode&) = delete; - Inode(Inode&& inode) = delete; - Inode& operator=(Inode&& inode) = delete; - Inode& operator=(const Inode&) = delete; - - ~Inode() { - if(fd > 0) - close(fd); - } + int fd{ -1 }; + dev_t src_dev{ 0 }; + ino_t src_ino{ 0 }; + int generation{ 0 }; + int backing_id{ 0 }; + uint64_t nopen{ 0 }; + std::atomic nlookup{ 0 }; + std::mutex m; + + // Delete copy constructor and assignments. We could implement + // move if we need it. + Inode() = default; + Inode(const Inode &) = delete; + Inode(Inode &&inode) = delete; + Inode &operator=(Inode &&inode) = delete; + Inode &operator=(const Inode &) = delete; + + ~Inode() + { + if (fd > 0) + close(fd); + } }; struct Fs { - // Must be acquired *after* any Inode.m locks. - std::mutex mutex; - InodeMap inodes; // protected by mutex - Inode root; - double timeout; - bool debug; - bool debug_fuse; - bool foreground; - std::string source; - size_t blocksize; - dev_t src_dev; - bool nosplice; - bool nocache; - size_t num_threads; - bool clone_fd; - std::string fuse_mount_options; - bool direct_io; - bool passthrough; + // Must be acquired *after* any Inode.m locks. + std::mutex mutex; + InodeMap inodes; // protected by mutex + Inode root; + double timeout; + bool debug; + bool debug_fuse; + bool foreground; + std::string source; + size_t blocksize; + dev_t src_dev; + bool nosplice; + bool nocache; + size_t num_threads; + bool clone_fd; + + std::string fuse_mount_options; + bool direct_io; + bool passthrough; }; static Fs fs{}; +#define FUSE_BUF_COPY_FLAGS \ + (fs.nosplice ? FUSE_BUF_NO_SPLICE : \ + static_cast(FUSE_BUF_SPLICE_MOVE)) -#define FUSE_BUF_COPY_FLAGS \ - (fs.nosplice ? \ - FUSE_BUF_NO_SPLICE : \ - static_cast(FUSE_BUF_SPLICE_MOVE)) - - -static Inode& get_inode(fuse_ino_t ino) { - if (ino == FUSE_ROOT_ID) - return fs.root; - - Inode* inode = reinterpret_cast(ino); - if(inode->fd == -1) { - cerr << "INTERNAL ERROR: Unknown inode " << ino << endl; - abort(); - } - return *inode; +static Inode &get_inode(fuse_ino_t ino) +{ + if (ino == FUSE_ROOT_ID) + return fs.root; + + Inode *inode = reinterpret_cast(ino); + if (inode->fd == -1) { + cerr << "INTERNAL ERROR: Unknown inode " << ino << endl; + abort(); + } + return *inode; } - -static int get_fs_fd(fuse_ino_t ino) { - int fd = get_inode(ino).fd; - return fd; +static int get_fs_fd(fuse_ino_t ino) +{ + int fd = get_inode(ino).fd; + return fd; } - -static void sfs_init(void *userdata, fuse_conn_info *conn) { - (void)userdata; - - if (!fuse_set_feature_flag(conn, FUSE_CAP_PASSTHROUGH)) - fs.passthrough = false; - - /* Passthrough and writeback cache are conflicting modes */ - if (fs.timeout && !fs.passthrough) - fuse_set_feature_flag(conn, FUSE_CAP_WRITEBACK_CACHE); - - fuse_set_feature_flag(conn, FUSE_CAP_FLOCK_LOCKS); - - if (fs.nosplice) { - // FUSE_CAP_SPLICE_READ is enabled in libfused by default, - // see do_init() in in fuse_lowlevel.c - // Just unset all, in case FUSE_CAP_SPLICE_WRITE or - // FUSE_CAP_SPLICE_MOVE would also get enabled by default. - fuse_unset_feature_flag(conn, FUSE_CAP_SPLICE_READ); - fuse_unset_feature_flag(conn, FUSE_CAP_SPLICE_WRITE); - fuse_unset_feature_flag(conn, FUSE_CAP_SPLICE_MOVE); - } else { - fuse_set_feature_flag(conn, FUSE_CAP_SPLICE_WRITE); - fuse_set_feature_flag(conn, FUSE_CAP_SPLICE_READ); - fuse_set_feature_flag(conn, FUSE_CAP_SPLICE_MOVE); - } - - /* This is a local file system - no network coherency needed */ - fuse_set_feature_flag(conn, FUSE_CAP_DIRECT_IO_ALLOW_MMAP); - - /* Disable NFS export support, which also disabled name_to_handle_at. - * Goal is to make xfstests that test name_to_handle_at to fail with - * the right error code (EOPNOTSUPP) than to open_by_handle_at to fail with - * ESTALE and let those test fail. - * Perfect NFS export support is not possible with this FUSE filesystem needs - * more kernel work, in order to passthrough nfs handle encode/decode to - * fuse-server/daemon. - */ - fuse_set_feature_flag(conn, FUSE_CAP_NO_EXPORT_SUPPORT); - - /* Disable the receiving and processing of FUSE_INTERRUPT requests */ - conn->no_interrupt = 1; - - /* Try a large IO by default */ - conn->max_write = 4 * 1024 * 1024; +static void sfs_init(void *userdata, fuse_conn_info *conn) +{ + (void)userdata; + + if (!fuse_set_feature_flag(conn, FUSE_CAP_PASSTHROUGH)) + fs.passthrough = false; + + /* Passthrough and writeback cache are conflicting modes */ + if (fs.timeout && !fs.passthrough) + fuse_set_feature_flag(conn, FUSE_CAP_WRITEBACK_CACHE); + + fuse_set_feature_flag(conn, FUSE_CAP_FLOCK_LOCKS); + + if (fs.nosplice) { + // FUSE_CAP_SPLICE_READ is enabled in libfuse3 by default, + // see do_init() in fuse_lowlevel.c + // Just unset all, in case FUSE_CAP_SPLICE_WRITE or + // FUSE_CAP_SPLICE_MOVE would also get enabled by default. + fuse_unset_feature_flag(conn, FUSE_CAP_SPLICE_READ); + fuse_unset_feature_flag(conn, FUSE_CAP_SPLICE_WRITE); + fuse_unset_feature_flag(conn, FUSE_CAP_SPLICE_MOVE); + } else { + fuse_set_feature_flag(conn, FUSE_CAP_SPLICE_WRITE); + fuse_set_feature_flag(conn, FUSE_CAP_SPLICE_READ); + fuse_set_feature_flag(conn, FUSE_CAP_SPLICE_MOVE); + } + + /* This is a local file system - no network coherency needed */ + fuse_set_feature_flag(conn, FUSE_CAP_DIRECT_IO_ALLOW_MMAP); + + /* Disable NFS export support, which also disabled name_to_handle_at. + * Goal is to make xfstests that test name_to_handle_at to fail with + * the right error code (EOPNOTSUPP) than to open_by_handle_at to fail with + * ESTALE and let those test fail. + * Perfect NFS export support is not possible with this FUSE filesystem needs + * more kernel work, in order to passthrough nfs handle encode/decode to + * fuse-server/daemon. + */ + fuse_set_feature_flag(conn, FUSE_CAP_NO_EXPORT_SUPPORT); + + /* Disable the receiving and processing of FUSE_INTERRUPT requests */ + conn->no_interrupt = 1; + + /* Try a large IO by default */ + conn->max_write = 4 * 1024 * 1024; } - static void sfs_getattr(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { - struct stat attr; - int fd = fi ? fi->fh : get_inode(ino).fd; - - auto res = fstatat(fd, "", &attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); - if (res == -1) { - fuse_reply_err(req, errno); - return; - } - fuse_reply_attr(req, &attr, fs.timeout); + struct stat attr; + int fd = fi ? fi->fh : get_inode(ino).fd; + + auto res = fstatat(fd, "", &attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { + fuse_reply_err(req, errno); + return; + } + fuse_reply_attr(req, &attr, fs.timeout); } - +static int with_fd_path(int fd, const std::function &f) +{ +#ifdef __FreeBSD__ + struct kinfo_file kf; + kf.kf_structsize = sizeof(kf); + int ret = fcntl(fd, F_KINFO, &kf); + if (ret == -1) + return ret; + return f(kf.kf_path); +#else // Linux + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", fd); + return f(procname); +#endif +} static void do_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - int valid, struct fuse_file_info* fi) { - Inode& inode = get_inode(ino); - int ifd = inode.fd; - int res; - - if (valid & FUSE_SET_ATTR_MODE) { - if (fi) { - res = fchmod(fi->fh, attr->st_mode); - } else { - char procname[64]; - sprintf(procname, "/proc/self/fd/%i", ifd); - res = chmod(procname, attr->st_mode); - } - if (res == -1) - goto out_err; - } - if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { - uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : static_cast(-1); - gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : static_cast(-1); - - res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); - if (res == -1) - goto out_err; - } - if (valid & FUSE_SET_ATTR_SIZE) { - if (fi) { - res = ftruncate(fi->fh, attr->st_size); - } else { - char procname[64]; - sprintf(procname, "/proc/self/fd/%i", ifd); - res = truncate(procname, attr->st_size); - } - if (res == -1) - goto out_err; - } - if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { - struct timespec tv[2]; - - tv[0].tv_sec = 0; - tv[1].tv_sec = 0; - tv[0].tv_nsec = UTIME_OMIT; - tv[1].tv_nsec = UTIME_OMIT; - - if (valid & FUSE_SET_ATTR_ATIME_NOW) - tv[0].tv_nsec = UTIME_NOW; - else if (valid & FUSE_SET_ATTR_ATIME) - tv[0] = attr->st_atim; - - if (valid & FUSE_SET_ATTR_MTIME_NOW) - tv[1].tv_nsec = UTIME_NOW; - else if (valid & FUSE_SET_ATTR_MTIME) - tv[1] = attr->st_mtim; - - if (fi) - res = futimens(fi->fh, tv); - else { + int valid, struct fuse_file_info *fi) +{ + Inode &inode = get_inode(ino); + int ifd = inode.fd; + int res; + + if (valid & FUSE_SET_ATTR_MODE) { + if (fi) { + res = fchmod(fi->fh, attr->st_mode); + } else { + res = with_fd_path(ifd, [attr](const char *procname) { + return chmod(procname, attr->st_mode); + }); + } + if (res == -1) + goto out_err; + } + if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { + uid_t uid = (valid & FUSE_SET_ATTR_UID) ? + attr->st_uid : + static_cast(-1); + gid_t gid = (valid & FUSE_SET_ATTR_GID) ? + attr->st_gid : + static_cast(-1); + + res = fchownat(ifd, "", uid, gid, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) + goto out_err; + } + if (valid & FUSE_SET_ATTR_SIZE) { + if (fi) { + res = ftruncate(fi->fh, attr->st_size); + } else { + res = with_fd_path(ifd, [attr](const char *procname) { + return truncate(procname, attr->st_size); + }); + } + if (res == -1) + goto out_err; + } + if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { + struct timespec tv[2]; + + tv[0].tv_sec = 0; + tv[1].tv_sec = 0; + tv[0].tv_nsec = UTIME_OMIT; + tv[1].tv_nsec = UTIME_OMIT; + + if (valid & FUSE_SET_ATTR_ATIME_NOW) + tv[0].tv_nsec = UTIME_NOW; + else if (valid & FUSE_SET_ATTR_ATIME) + tv[0] = attr->st_atim; + + if (valid & FUSE_SET_ATTR_MTIME_NOW) + tv[1].tv_nsec = UTIME_NOW; + else if (valid & FUSE_SET_ATTR_MTIME) + tv[1] = attr->st_mtim; + + if (fi) + res = futimens(fi->fh, tv); + else { #ifdef HAVE_UTIMENSAT - char procname[64]; - sprintf(procname, "/proc/self/fd/%i", ifd); - res = utimensat(AT_FDCWD, procname, tv, 0); + res = with_fd_path(ifd, [&tv](const char *procname) { + return utimensat(AT_FDCWD, procname, tv, 0); + }); #else - res = -1; - errno = EOPNOTSUPP; + res = -1; + errno = EOPNOTSUPP; #endif - } - if (res == -1) - goto out_err; - } - return sfs_getattr(req, ino, fi); + } + if (res == -1) + goto out_err; + } + return sfs_getattr(req, ino, fi); out_err: - fuse_reply_err(req, errno); + fuse_reply_err(req, errno); } - static void sfs_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - int valid, fuse_file_info *fi) { - (void) ino; - do_setattr(req, ino, attr, valid, fi); + int valid, fuse_file_info *fi) +{ + (void)ino; + do_setattr(req, ino, attr, valid, fi); } - -static int do_lookup(fuse_ino_t parent, const char *name, - fuse_entry_param *e) { - if (fs.debug) - cerr << "DEBUG: lookup(): name=" << name - << ", parent=" << parent << endl; - memset(e, 0, sizeof(*e)); - e->attr_timeout = fs.timeout; - e->entry_timeout = fs.timeout; - - auto newfd = openat(get_fs_fd(parent), name, O_PATH | O_NOFOLLOW); - if (newfd == -1) - return errno; - - auto res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); - if (res == -1) { - auto saveerr = errno; - close(newfd); - if (fs.debug) - cerr << "DEBUG: lookup(): fstatat failed" << endl; - return saveerr; - } - - if (e->attr.st_dev != fs.src_dev) { - cerr << "WARNING: Mountpoints in the source directory tree will be hidden." << endl; - return ENOTSUP; - } else if (e->attr.st_ino == FUSE_ROOT_ID) { - cerr << "ERROR: Source directory tree must not include inode " - << FUSE_ROOT_ID << endl; - return EIO; - } - - SrcId id {e->attr.st_ino, e->attr.st_dev}; - unique_lock fs_lock {fs.mutex}; - Inode* inode_p; - try { - inode_p = &fs.inodes[id]; - } catch (std::bad_alloc&) { - return ENOMEM; - } - e->ino = reinterpret_cast(inode_p); - Inode& inode {*inode_p}; - e->generation = inode.generation; - - if (inode.fd == -ENOENT) { // found unlinked inode - if (fs.debug) - cerr << "DEBUG: lookup(): inode " << e->attr.st_ino - << " recycled; generation=" << inode.generation << endl; - /* fallthrough to new inode but keep existing inode.nlookup */ - } - - if (inode.fd > 0) { // found existing inode - fs_lock.unlock(); - if (fs.debug) - cerr << "DEBUG: lookup(): inode " << e->attr.st_ino - << " (userspace) already known; fd = " << inode.fd << endl; - lock_guard g {inode.m}; - - inode.nlookup++; - if (fs.debug) - cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " - << "inode " << inode.src_ino - << " count " << inode.nlookup << endl; - - - close(newfd); - } else { // no existing inode - /* This is just here to make Helgrind happy. It violates the - lock ordering requirement (inode.m must be acquired before - fs.mutex), but this is of no consequence because at this - point no other thread has access to the inode mutex */ - lock_guard g {inode.m}; - inode.src_ino = e->attr.st_ino; - inode.src_dev = e->attr.st_dev; - - inode.nlookup++; - if (fs.debug) - cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " - << "inode " << inode.src_ino - << " count " << inode.nlookup << endl; - - inode.fd = newfd; - fs_lock.unlock(); - - if (fs.debug) - cerr << "DEBUG: lookup(): created userspace inode " << e->attr.st_ino - << "; fd = " << inode.fd << endl; - } - - return 0; +static int do_lookup(fuse_ino_t parent, const char *name, fuse_entry_param *e) +{ + if (fs.debug) + cerr << "DEBUG: lookup(): name=" << name + << ", parent=" << parent << endl; + memset(e, 0, sizeof(*e)); + e->attr_timeout = fs.timeout; + e->entry_timeout = fs.timeout; + + auto newfd = openat(get_fs_fd(parent), name, O_PATH | O_NOFOLLOW); + if (newfd == -1) + return errno; + + auto res = fstatat(newfd, "", &e->attr, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { + auto saveerr = errno; + close(newfd); + if (fs.debug) + cerr << "DEBUG: lookup(): fstatat failed" << endl; + return saveerr; + } + + if (e->attr.st_dev != fs.src_dev) { + cerr << "WARNING: Mountpoints in the source directory tree will be hidden." + << endl; + return ENOTSUP; + } else if (e->attr.st_ino == FUSE_ROOT_ID) { + cerr << "ERROR: Source directory tree must not include inode " + << FUSE_ROOT_ID << endl; + return EIO; + } + + SrcId id{ e->attr.st_ino, e->attr.st_dev }; + unique_lock fs_lock{ fs.mutex }; + Inode *inode_p; + try { + inode_p = &fs.inodes[id]; + } catch (std::bad_alloc &) { + return ENOMEM; + } + e->ino = reinterpret_cast(inode_p); + Inode &inode{ *inode_p }; + e->generation = inode.generation; + + if (inode.fd == -ENOENT) { // found unlinked inode + if (fs.debug) + cerr << "DEBUG: lookup(): inode " << e->attr.st_ino + << " recycled; generation=" << inode.generation + << endl; + /* fallthrough to new inode but keep existing inode.nlookup */ + } + + if (inode.fd > 0) { // found existing inode + if (fs.debug) + cerr << "DEBUG: lookup(): inode " << e->attr.st_ino + << " (userspace) already known; fd = " << inode.fd + << endl; + + inode.nlookup++; + if (fs.debug) + cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " + << "inode " << inode.src_ino << " count " + << inode.nlookup << endl; + + fs_lock.unlock(); + close(newfd); + } else { // no existing inode + /* This is just here to make Helgrind happy. It violates the + * lock ordering requirement (inode.m must be acquired before + * fs.mutex), but this is of no consequence because at this + * point no other thread has access to the inode mutex + */ + lock_guard g{ inode.m }; + inode.src_ino = e->attr.st_ino; + inode.src_dev = e->attr.st_dev; + + inode.nlookup++; + if (fs.debug) + cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " + << "inode " << inode.src_ino << " count " + << inode.nlookup << endl; + + inode.fd = newfd; + fs_lock.unlock(); + + if (fs.debug) + cerr << "DEBUG: lookup(): created userspace inode " + << e->attr.st_ino << "; fd = " << inode.fd << endl; + } + + return 0; } - -static void sfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) { - fuse_entry_param e {}; - auto err = do_lookup(parent, name, &e); - if (err == ENOENT) { - e.attr_timeout = fs.timeout; - e.entry_timeout = fs.timeout; - e.ino = e.attr.st_ino = 0; - fuse_reply_entry(req, &e); - } else if (err) { - if (err == ENFILE || err == EMFILE) - cerr << "ERROR: Reached maximum number of file descriptors." << endl; - fuse_reply_err(req, err); - } else { - fuse_reply_entry(req, &e); - } +static void sfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + fuse_entry_param e{}; + auto err = do_lookup(parent, name, &e); + if (err == ENOENT) { + e.attr_timeout = fs.timeout; + e.entry_timeout = fs.timeout; + e.ino = e.attr.st_ino = 0; + fuse_reply_entry(req, &e); + } else if (err) { + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." + << endl; + fuse_reply_err(req, err); + } else { + fuse_reply_entry(req, &e); + } } - -static void mknod_symlink(fuse_req_t req, fuse_ino_t parent, - const char *name, mode_t mode, dev_t rdev, - const char *link) { - int res; - Inode& inode_p = get_inode(parent); - auto saverr = ENOMEM; - - if (S_ISDIR(mode)) - res = mkdirat(inode_p.fd, name, mode); - else if (S_ISLNK(mode)) - res = symlinkat(link, inode_p.fd, name); - else - res = mknodat(inode_p.fd, name, mode, rdev); - saverr = errno; - if (res == -1) - goto out; - - fuse_entry_param e; - saverr = do_lookup(parent, name, &e); - if (saverr) - goto out; - - fuse_reply_entry(req, &e); - return; +static void mknod_symlink(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, dev_t rdev, const char *link) +{ + int res; + Inode &inode_p = get_inode(parent); + auto saverr = ENOMEM; + + if (S_ISDIR(mode)) + res = mkdirat(inode_p.fd, name, mode); + else if (S_ISLNK(mode)) + res = symlinkat(link, inode_p.fd, name); + else + res = mknodat(inode_p.fd, name, mode, rdev); + saverr = errno; + if (res == -1) + goto out; + + fuse_entry_param e; + saverr = do_lookup(parent, name, &e); + if (saverr) + goto out; + + fuse_reply_entry(req, &e); + return; out: - if (saverr == ENFILE || saverr == EMFILE) - cerr << "ERROR: Reached maximum number of file descriptors." << endl; - fuse_reply_err(req, saverr); + if (saverr == ENFILE || saverr == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." + << endl; + fuse_reply_err(req, saverr); } - static void sfs_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, dev_t rdev) { - mknod_symlink(req, parent, name, mode, rdev, nullptr); + mode_t mode, dev_t rdev) +{ + mknod_symlink(req, parent, name, mode, rdev, nullptr); } - static void sfs_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode) { - mknod_symlink(req, parent, name, S_IFDIR | mode, 0, nullptr); + mode_t mode) +{ + mknod_symlink(req, parent, name, S_IFDIR | mode, 0, nullptr); } - static void sfs_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, - const char *name) { - mknod_symlink(req, parent, name, S_IFLNK, 0, link); + const char *name) +{ + mknod_symlink(req, parent, name, S_IFLNK, 0, link); } - static void sfs_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - const char *name) { - Inode& inode = get_inode(ino); - Inode& inode_p = get_inode(parent); - fuse_entry_param e {}; - - e.attr_timeout = fs.timeout; - e.entry_timeout = fs.timeout; - - char procname[64]; - sprintf(procname, "/proc/self/fd/%i", inode.fd); - auto res = linkat(AT_FDCWD, procname, inode_p.fd, name, AT_SYMLINK_FOLLOW); - if (res == -1) { - fuse_reply_err(req, errno); - return; - } - - res = fstatat(inode.fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); - if (res == -1) { - fuse_reply_err(req, errno); - return; - } - e.ino = reinterpret_cast(&inode); - { - lock_guard g {inode.m}; - inode.nlookup++; - if (fs.debug) - cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " - << "inode " << inode.src_ino - << " count " << inode.nlookup << endl; - } - - fuse_reply_entry(req, &e); - return; + const char *name) +{ + Inode &inode = get_inode(ino); + Inode &inode_p = get_inode(parent); + fuse_entry_param e{}; + + e.attr_timeout = fs.timeout; + e.entry_timeout = fs.timeout; + + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + auto res = + linkat(AT_FDCWD, procname, inode_p.fd, name, AT_SYMLINK_FOLLOW); + if (res == -1) { + fuse_reply_err(req, errno); + return; + } + + res = fstatat(inode.fd, "", &e.attr, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { + fuse_reply_err(req, errno); + return; + } + e.ino = reinterpret_cast(&inode); + { + inode.nlookup++; + if (fs.debug) + cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " + << "inode " << inode.src_ino << " count " + << inode.nlookup << endl; + } + + fuse_reply_entry(req, &e); + return; } - -static void sfs_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) { - Inode& inode_p = get_inode(parent); - lock_guard g {inode_p.m}; - auto res = unlinkat(inode_p.fd, name, AT_REMOVEDIR); - fuse_reply_err(req, res == -1 ? errno : 0); +static void sfs_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + Inode &inode_p = get_inode(parent); + lock_guard g{ inode_p.m }; + auto res = unlinkat(inode_p.fd, name, AT_REMOVEDIR); + fuse_reply_err(req, res == -1 ? errno : 0); } - static void sfs_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - fuse_ino_t newparent, const char *newname, - unsigned int flags) { - Inode& inode_p = get_inode(parent); - Inode& inode_np = get_inode(newparent); - if (flags) { - fuse_reply_err(req, EINVAL); - return; - } - - auto res = renameat(inode_p.fd, name, inode_np.fd, newname); - fuse_reply_err(req, res == -1 ? errno : 0); + fuse_ino_t newparent, const char *newname, + unsigned int flags) +{ + Inode &inode_p = get_inode(parent); + Inode &inode_np = get_inode(newparent); + if (flags) { + fuse_reply_err(req, EINVAL); + return; + } + + auto res = renameat(inode_p.fd, name, inode_np.fd, newname); + fuse_reply_err(req, res == -1 ? errno : 0); } - -static void sfs_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) { - Inode& inode_p = get_inode(parent); - // Release inode.fd before last unlink like nfsd EXPORT_OP_CLOSE_BEFORE_UNLINK - // to test reused inode numbers. - // Skip this when inode has an open file and when writeback cache is enabled. - if (!fs.timeout) { - fuse_entry_param e; - auto err = do_lookup(parent, name, &e); - if (err) { - fuse_reply_err(req, err); - return; - } - if (e.attr.st_nlink == 1) { - Inode& inode = get_inode(e.ino); - lock_guard g {inode.m}; - if (inode.fd > 0 && !inode.nopen) { - if (fs.debug) - cerr << "DEBUG: unlink: release inode " << e.attr.st_ino - << "; fd=" << inode.fd << endl; - lock_guard g_fs {fs.mutex}; - close(inode.fd); - inode.fd = -ENOENT; - inode.generation++; - } - } - - // decrease the ref which lookup above had increased - forget_one(e.ino, 1); - } - auto res = unlinkat(inode_p.fd, name, 0); - fuse_reply_err(req, res == -1 ? errno : 0); +static void sfs_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + Inode &inode_p = get_inode(parent); + // Release inode.fd before last unlink like nfsd EXPORT_OP_CLOSE_BEFORE_UNLINK + // to test reused inode numbers. + // Skip this when inode has an open file and when writeback cache is enabled. + if (!fs.timeout) { + fuse_entry_param e; + auto err = do_lookup(parent, name, &e); + if (err) { + fuse_reply_err(req, err); + return; + } + if (e.attr.st_nlink == 1) { + Inode &inode = get_inode(e.ino); + lock_guard g{ inode.m }; + if (inode.fd > 0 && !inode.nopen) { + if (fs.debug) + cerr << "DEBUG: unlink: release inode " + << e.attr.st_ino + << "; fd=" << inode.fd << endl; + lock_guard g_fs{ fs.mutex }; + close(inode.fd); + inode.fd = -ENOENT; + inode.generation++; + } + } + + // decrease the ref which lookup above had increased + forget_one(e.ino, 1); + } + auto res = unlinkat(inode_p.fd, name, 0); + fuse_reply_err(req, res == -1 ? errno : 0); } - -static void forget_one(fuse_ino_t ino, uint64_t n) { - Inode& inode = get_inode(ino); - unique_lock l {inode.m}; - - if(n > inode.nlookup) { - cerr << "INTERNAL ERROR: Negative lookup count for inode " - << inode.src_ino << endl; - abort(); - } - inode.nlookup -= n; - - if (fs.debug) - cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " - << "inode " << inode.src_ino - << " count " << inode.nlookup << endl; - - if (!inode.nlookup) { - if (fs.debug) - cerr << "DEBUG: forget: cleaning up inode " << inode.src_ino << endl; - { - lock_guard g_fs {fs.mutex}; - l.unlock(); - fs.inodes.erase({inode.src_ino, inode.src_dev}); - } - } else if (fs.debug) - cerr << "DEBUG: forget: inode " << inode.src_ino - << " lookup count now " << inode.nlookup << endl; +static void forget_one(fuse_ino_t ino, uint64_t n) +{ + Inode &inode = get_inode(ino); + unique_lock l{ inode.m }; + + if (n > inode.nlookup) { + cerr << "INTERNAL ERROR: Negative lookup count for inode " + << inode.src_ino << endl; + abort(); + } + inode.nlookup -= n; + + if (fs.debug) + cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " + << "inode " << inode.src_ino << " count " << inode.nlookup + << endl; + + if (!inode.nlookup) { + lock_guard g_fs{ fs.mutex }; + l.unlock(); + if (!inode.nlookup) { + if (fs.debug) + cerr << "DEBUG: forget: cleaning up inode " + << inode.src_ino << endl; + fs.inodes.erase({ inode.src_ino, inode.src_dev }); + } + } else if (fs.debug) + cerr << "DEBUG: forget: inode " << inode.src_ino + << " lookup count now " << inode.nlookup << endl; } -static void sfs_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) { - forget_one(ino, nlookup); - fuse_reply_none(req); +static void sfs_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) +{ + forget_one(ino, nlookup); + fuse_reply_none(req); } - static void sfs_forget_multi(fuse_req_t req, size_t count, - fuse_forget_data *forgets) { - for (int i = 0; i < count; i++) - forget_one(forgets[i].ino, forgets[i].nlookup); - fuse_reply_none(req); + fuse_forget_data *forgets) +{ + for (int i = 0; i < count; i++) + forget_one(forgets[i].ino, forgets[i].nlookup); + fuse_reply_none(req); } - -static void sfs_readlink(fuse_req_t req, fuse_ino_t ino) { - Inode& inode = get_inode(ino); - char buf[PATH_MAX + 1]; - auto res = readlinkat(inode.fd, "", buf, sizeof(buf)); - if (res == -1) - fuse_reply_err(req, errno); - else if (res == sizeof(buf)) - fuse_reply_err(req, ENAMETOOLONG); - else { - buf[res] = '\0'; - fuse_reply_readlink(req, buf); - } +static void sfs_readlink(fuse_req_t req, fuse_ino_t ino) +{ + Inode &inode = get_inode(ino); + char buf[PATH_MAX + 1]; + auto res = readlinkat(inode.fd, "", buf, sizeof(buf)); + if (res == -1) + fuse_reply_err(req, errno); + else if (res == sizeof(buf)) + fuse_reply_err(req, ENAMETOOLONG); + else { + buf[res] = '\0'; + fuse_reply_readlink(req, buf); + } } - struct DirHandle { - DIR *dp {nullptr}; - off_t offset; - - DirHandle() = default; - DirHandle(const DirHandle&) = delete; - DirHandle& operator=(const DirHandle&) = delete; - - ~DirHandle() { - if(dp) - closedir(dp); - } + DIR *dp{ nullptr }; + off_t offset; + + DirHandle() = default; + DirHandle(const DirHandle &) = delete; + DirHandle &operator=(const DirHandle &) = delete; + + ~DirHandle() + { + if (dp) + closedir(dp); + } }; - -static DirHandle *get_dir_handle(fuse_file_info *fi) { - return reinterpret_cast(fi->fh); +static DirHandle *get_dir_handle(fuse_file_info *fi) +{ + return reinterpret_cast(fi->fh); } - -static void sfs_opendir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { - Inode& inode = get_inode(ino); - auto d = new (nothrow) DirHandle; - if (d == nullptr) { - fuse_reply_err(req, ENOMEM); - return; - } - - // Make Helgrind happy - it can't know that there's an implicit - // synchronization due to the fact that other threads cannot - // access d until we've called fuse_reply_*. - lock_guard g {inode.m}; - - auto fd = openat(inode.fd, ".", O_RDONLY); - if (fd == -1) - goto out_errno; - - // On success, dir stream takes ownership of fd, so we - // do not have to close it. - d->dp = fdopendir(fd); - if(d->dp == nullptr) - goto out_errno; - - d->offset = 0; - - fi->fh = reinterpret_cast(d); - if(fs.timeout) { - fi->keep_cache = 1; - fi->cache_readdir = 1; - } - fuse_reply_open(req, fi); - return; +static void sfs_opendir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) +{ + Inode &inode = get_inode(ino); + auto d = new (nothrow) DirHandle; + if (d == nullptr) { + fuse_reply_err(req, ENOMEM); + return; + } + + // Make Helgrind happy - it can't know that there's an implicit + // synchronization due to the fact that other threads cannot + // access d until we've called fuse_reply_*. + lock_guard g{ inode.m }; + + auto fd = openat(inode.fd, ".", O_RDONLY); + if (fd == -1) + goto out_errno; + + // On success, dir stream takes ownership of fd, so we + // do not have to close it. + d->dp = fdopendir(fd); + if (d->dp == nullptr) + goto out_errno; + + d->offset = 0; + + fi->fh = reinterpret_cast(d); + if (fs.timeout) { + fi->keep_cache = 1; + fi->cache_readdir = 1; + } + fuse_reply_open(req, fi); + return; out_errno: - auto error = errno; - delete d; - if (error == ENFILE || error == EMFILE) - cerr << "ERROR: Reached maximum number of file descriptors." << endl; - fuse_reply_err(req, error); + auto error = errno; + delete d; + if (error == ENFILE || error == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." + << endl; + fuse_reply_err(req, error); } - -static bool is_dot_or_dotdot(const char *name) { - return name[0] == '.' && - (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); +static bool is_dot_or_dotdot(const char *name) +{ + return name[0] == '.' && + (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); } - static void do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, fuse_file_info *fi, const int plus) { - auto d = get_dir_handle(fi); - Inode& inode = get_inode(ino); - lock_guard g {inode.m}; - char *p; - auto rem = size; - int err = 0, count = 0; - - if (fs.debug) - cerr << "DEBUG: readdir(): started with offset " - << offset << endl; - - auto buf = new (nothrow) char[size]; - if (!buf) { - fuse_reply_err(req, ENOMEM); - return; - } - p = buf; - - if (offset != d->offset) { - if (fs.debug) - cerr << "DEBUG: readdir(): seeking to " << offset << endl; - seekdir(d->dp, offset); - d->offset = offset; - } - - while (1) { - bool did_lookup = false; - struct dirent *entry; - errno = 0; - entry = readdir(d->dp); - if (!entry) { - if(errno) { - err = errno; - if (fs.debug) - warn("DEBUG: readdir(): readdir failed with"); - goto error; - } - break; // End of stream - } - d->offset = entry->d_off; - - fuse_entry_param e{}; - size_t entsize; - if (plus) { - if (is_dot_or_dotdot(entry->d_name)) { - /* fuse kernel ignores attributes for these and also does - * not increase lookup count (see fuse_direntplus_link) */ - e.attr.st_ino = entry->d_ino; - e.attr.st_mode = entry->d_type << 12; - } else { - err = do_lookup(ino, entry->d_name, &e); - if (err) - goto error; - did_lookup = true; - } - entsize = fuse_add_direntry_plus(req, p, rem, entry->d_name, &e, entry->d_off); - } else { - e.attr.st_ino = entry->d_ino; - e.attr.st_mode = entry->d_type << 12; - entsize = fuse_add_direntry(req, p, rem, entry->d_name, &e.attr, entry->d_off); - } - - if (entsize > rem) { - if (fs.debug) - cerr << "DEBUG: readdir(): buffer full, returning data. " << endl; - if (did_lookup) - forget_one(e.ino, 1); - break; - } - - p += entsize; - rem -= entsize; - count++; - if (fs.debug) { - cerr << "DEBUG: readdir(): added to buffer: " << entry->d_name - << ", ino " << e.attr.st_ino << ", offset " << entry->d_off << endl; - } - } - err = 0; + off_t offset, fuse_file_info *fi, const int plus) +{ + auto d = get_dir_handle(fi); + Inode &inode = get_inode(ino); + lock_guard g{ inode.m }; + char *p; + auto rem = size; + int err = 0, count = 0; + + if (fs.debug) + cerr << "DEBUG: readdir(): started with offset " << offset + << endl; + + auto buf = new (nothrow) char[size]; + if (!buf) { + fuse_reply_err(req, ENOMEM); + return; + } + p = buf; + + if (offset != d->offset) { + if (fs.debug) + cerr << "DEBUG: readdir(): seeking to " << offset + << endl; + seekdir(d->dp, offset); + d->offset = offset; + } + + while (1) { + bool did_lookup = false; + struct dirent *entry; + errno = 0; + entry = readdir(d->dp); + if (!entry) { + if (errno) { + err = errno; + if (fs.debug) + warn("DEBUG: readdir(): readdir failed with"); + goto error; + } + break; // End of stream + } + d->offset = entry->d_off; + + fuse_entry_param e{}; + size_t entsize; + if (plus) { + if (is_dot_or_dotdot(entry->d_name)) { + /* fuse kernel ignores attributes for these and also does + * not increase lookup count (see fuse_direntplus_link) + */ + e.attr.st_ino = entry->d_ino; + e.attr.st_mode = entry->d_type << 12; + } else { + err = do_lookup(ino, entry->d_name, &e); + if (err) + goto error; + did_lookup = true; + } + entsize = fuse_add_direntry_plus( + req, p, rem, entry->d_name, &e, entry->d_off); + } else { + e.attr.st_ino = entry->d_ino; + e.attr.st_mode = entry->d_type << 12; + entsize = fuse_add_direntry(req, p, rem, entry->d_name, + &e.attr, entry->d_off); + } + + if (entsize > rem) { + if (fs.debug) + cerr << "DEBUG: readdir(): buffer full, returning data. " + << endl; + if (did_lookup) + forget_one(e.ino, 1); + break; + } + + p += entsize; + rem -= entsize; + count++; + if (fs.debug) { + cerr << "DEBUG: readdir(): added to buffer: " + << entry->d_name << ", ino " << e.attr.st_ino + << ", offset " << entry->d_off << endl; + } + } + err = 0; error: - // If there's an error, we can only signal it if we haven't stored - // any entries yet - otherwise we'd end up with wrong lookup - // counts for the entries that are already in the buffer. So we - // return what we've collected until that point. - if (err && rem == size) { - if (err == ENFILE || err == EMFILE) - cerr << "ERROR: Reached maximum number of file descriptors." << endl; - fuse_reply_err(req, err); - } else { - if (fs.debug) - cerr << "DEBUG: readdir(): returning " << count - << " entries, curr offset " << d->offset << endl; - fuse_reply_buf(req, buf, size - rem); - } - delete[] buf; - return; + // If there's an error, we can only signal it if we haven't stored + // any entries yet - otherwise we'd end up with wrong lookup + // counts for the entries that are already in the buffer. So we + // return what we've collected until that point. + if (err && rem == size) { + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." + << endl; + fuse_reply_err(req, err); + } else { + if (fs.debug) + cerr << "DEBUG: readdir(): returning " << count + << " entries, curr offset " << d->offset << endl; + fuse_reply_buf(req, buf, size - rem); + } + delete[] buf; + return; } - static void sfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, fuse_file_info *fi) { - // operation logging is done in readdir to reduce code duplication - do_readdir(req, ino, size, offset, fi, 0); + off_t offset, fuse_file_info *fi) +{ + // operation logging is done in readdir to reduce code duplication + do_readdir(req, ino, size, offset, fi, 0); } - static void sfs_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, fuse_file_info *fi) { - // operation logging is done in readdir to reduce code duplication - do_readdir(req, ino, size, offset, fi, 1); + off_t offset, fuse_file_info *fi) +{ + // operation logging is done in readdir to reduce code duplication + do_readdir(req, ino, size, offset, fi, 1); } - -static void sfs_releasedir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { - (void) ino; - auto d = get_dir_handle(fi); - delete d; - fuse_reply_err(req, 0); +static void sfs_releasedir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) +{ + (void)ino; + auto d = get_dir_handle(fi); + delete d; + fuse_reply_err(req, 0); } - static void do_passthrough_open(fuse_req_t req, fuse_ino_t ino, int fd, - fuse_file_info *fi) { - Inode& inode = get_inode(ino); - /* Setup a shared backing file on first open of an inode */ - if (inode.backing_id) { - if (fs.debug) - cerr << "DEBUG: reusing shared backing file " - << inode.backing_id << " for inode " << ino << endl; - fi->backing_id = inode.backing_id; - } else if (!(inode.backing_id = fuse_passthrough_open(req, fd))) { - cerr << "DEBUG: fuse_passthrough_open failed for inode " << ino - << ", disabling rw passthrough." << endl; - fs.passthrough = false; - } else { - if (fs.debug) - cerr << "DEBUG: setup shared backing file " - << inode.backing_id << " for inode " << ino << endl; - fi->backing_id = inode.backing_id; - } - /* open in passthrough mode must drop old page cache */ - if (fi->backing_id) - fi->keep_cache = false; + fuse_file_info *fi) +{ + Inode &inode = get_inode(ino); + /* Setup a shared backing file on first open of an inode */ + if (inode.backing_id) { + if (fs.debug) + cerr << "DEBUG: reusing shared backing file " + << inode.backing_id << " for inode " << ino + << endl; + fi->backing_id = inode.backing_id; + } else if (!(inode.backing_id = fuse_passthrough_open(req, fd))) { + cerr << "DEBUG: fuse_passthrough_open failed for inode " << ino + << ", disabling rw passthrough." << endl; + fs.passthrough = false; + } else { + if (fs.debug) + cerr << "DEBUG: setup shared backing file " + << inode.backing_id << " for inode " << ino + << endl; + fi->backing_id = inode.backing_id; + } + /* open in passthrough mode must drop old page cache */ + if (fi->backing_id) + fi->keep_cache = false; } static void sfs_create_open_flags(fuse_file_info *fi) { - if (fs.direct_io) - fi->direct_io = 1; - - /* - * fi->direct_io (FOPEN_DIRECT_IO) is set to benefit from - * parallel_direct_writes, which kernel cannot do for plain O_DIRECT. - * However, passthrough is preferred, but which is not possible when - * FOPEN_DIRECT_IO is set. - */ - if (!fs.passthrough) { - if (fi->flags & O_DIRECT) - fi->direct_io = 1; - } - - /* parallel_direct_writes feature depends on direct_io features. - To make parallel_direct_writes valid, need set fi->direct_io - in current function. */ - fi->parallel_direct_writes = 1; - - fi->keep_cache = (fs.timeout != 0); - fi->noflush = (fs.timeout == 0 && (fi->flags & O_ACCMODE) == O_RDONLY); + if (fs.direct_io) + fi->direct_io = 1; + + /* + * fi->direct_io (FOPEN_DIRECT_IO) is set to benefit from + * parallel_direct_writes, which kernel cannot do for plain O_DIRECT. + * However, passthrough is preferred, but which is not possible when + * FOPEN_DIRECT_IO is set. + */ + if (!fs.passthrough) { + if (fi->flags & O_DIRECT) + fi->direct_io = 1; + } + + /* parallel_direct_writes feature depends on direct_io features. + * To make parallel_direct_writes valid, need set fi->direct_io + * in current function. + */ + fi->parallel_direct_writes = 1; + + fi->keep_cache = (fs.timeout != 0); + fi->noflush = (fs.timeout == 0 && (fi->flags & O_ACCMODE) == O_RDONLY); } static void sfs_create(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, fuse_file_info *fi) { - Inode& inode_p = get_inode(parent); - - auto fd = openat(inode_p.fd, name, - (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); - if (fd == -1) { - auto err = errno; - if (err == ENFILE || err == EMFILE) - cerr << "ERROR: Reached maximum number of file descriptors." << endl; - fuse_reply_err(req, err); - return; - } - - fi->fh = fd; - fuse_entry_param e; - auto err = do_lookup(parent, name, &e); - if (err) { - if (err == ENFILE || err == EMFILE) - cerr << "ERROR: Reached maximum number of file descriptors." << endl; - fuse_reply_err(req, err); - return; - } - - Inode& inode = get_inode(e.ino); - lock_guard g {inode.m}; - inode.nopen++; - - sfs_create_open_flags(fi); - - if (fs.passthrough) - do_passthrough_open(req, e.ino, fd, fi); - fuse_reply_create(req, &e, fi); + mode_t mode, fuse_file_info *fi) +{ + Inode &inode_p = get_inode(parent); + + auto fd = openat(inode_p.fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); + if (fd == -1) { + auto err = errno; + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." + << endl; + fuse_reply_err(req, err); + return; + } + + fi->fh = fd; + fuse_entry_param e; + auto err = do_lookup(parent, name, &e); + if (err) { + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." + << endl; + fuse_reply_err(req, err); + return; + } + + Inode &inode = get_inode(e.ino); + lock_guard g{ inode.m }; + inode.nopen++; + + sfs_create_open_flags(fi); + + if (fs.passthrough) + do_passthrough_open(req, e.ino, fd, fi); + fuse_reply_create(req, &e, fi); } +#ifdef O_TMPFILE static Inode *create_new_inode(int fd, fuse_entry_param *e) { - memset(e, 0, sizeof(*e)); - e->attr_timeout = fs.timeout; - e->entry_timeout = fs.timeout; - - auto res = fstatat(fd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); - if (res == -1) { - if (fs.debug) - cerr << "DEBUG: lookup(): fstatat failed" << endl; - return NULL; - } - - SrcId id {e->attr.st_ino, e->attr.st_dev}; - unique_lock fs_lock {fs.mutex}; - Inode* p_inode; - try { - p_inode = &fs.inodes[id]; - } catch (std::bad_alloc&) { - return NULL; - } - - e->ino = reinterpret_cast(p_inode); - e->generation = p_inode->generation; - - lock_guard g {p_inode->m}; - p_inode->src_ino = e->attr.st_ino; - p_inode->src_dev = e->attr.st_dev; - - p_inode->nlookup++; - if (fs.debug) - cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " - << "inode " << p_inode->src_ino - << " count " << p_inode->nlookup << endl; - - p_inode->fd = fd; - fs_lock.unlock(); - - if (fs.debug) - cerr << "DEBUG: lookup(): created userspace inode " << e->attr.st_ino - << "; fd = " << p_inode->fd << endl; - return p_inode; -} - -static void sfs_tmpfile(fuse_req_t req, fuse_ino_t parent, - mode_t mode, fuse_file_info *fi) { - Inode& parent_inode = get_inode(parent); - - auto fd = openat(parent_inode.fd, ".", - (fi->flags | O_TMPFILE) & ~O_NOFOLLOW, mode); - if (fd == -1) { - auto err = errno; - if (err == ENFILE || err == EMFILE) - cerr << "ERROR: Reached maximum number of file descriptors." << endl; - fuse_reply_err(req, err); - return; - } - - fi->fh = fd; - fuse_entry_param e; - - Inode *inode = create_new_inode(dup(fd), &e); - if (inode == NULL) { - auto err = errno; - cerr << "ERROR: could not create new inode." << endl; - close(fd); - fuse_reply_err(req, err); - return; - } - - lock_guard g {inode->m}; - - sfs_create_open_flags(fi); - - if (fs.passthrough) - do_passthrough_open(req, e.ino, fd, fi); - - fuse_reply_create(req, &e, fi); + memset(e, 0, sizeof(*e)); + e->attr_timeout = fs.timeout; + e->entry_timeout = fs.timeout; + + auto res = + fstatat(fd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { + if (fs.debug) + cerr << "DEBUG: lookup(): fstatat failed" << endl; + return NULL; + } + + SrcId id{ e->attr.st_ino, e->attr.st_dev }; + unique_lock fs_lock{ fs.mutex }; + Inode *p_inode; + try { + p_inode = &fs.inodes[id]; + } catch (std::bad_alloc &) { + return NULL; + } + + e->ino = reinterpret_cast(p_inode); + e->generation = p_inode->generation; + + lock_guard g{ p_inode->m }; + p_inode->src_ino = e->attr.st_ino; + p_inode->src_dev = e->attr.st_dev; + + p_inode->nlookup++; + if (fs.debug) + cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " + << "inode " << p_inode->src_ino << " count " + << p_inode->nlookup << endl; + + p_inode->fd = fd; + fs_lock.unlock(); + + if (fs.debug) + cerr << "DEBUG: lookup(): created userspace inode " + << e->attr.st_ino << "; fd = " << p_inode->fd << endl; + return p_inode; } - +static void sfs_tmpfile(fuse_req_t req, fuse_ino_t parent, mode_t mode, + fuse_file_info *fi) +{ + Inode &parent_inode = get_inode(parent); + + auto fd = openat(parent_inode.fd, ".", + (fi->flags | O_TMPFILE) & ~O_NOFOLLOW, mode); + if (fd == -1) { + auto err = errno; + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." + << endl; + fuse_reply_err(req, err); + return; + } + + fi->fh = fd; + fuse_entry_param e; + + Inode *inode = create_new_inode(dup(fd), &e); + if (inode == NULL) { + auto err = errno; + cerr << "ERROR: could not create new inode." << endl; + close(fd); + fuse_reply_err(req, err); + return; + } + + lock_guard g{ inode->m }; + + sfs_create_open_flags(fi); + + if (fs.passthrough) + do_passthrough_open(req, e.ino, fd, fi); + + fuse_reply_create(req, &e, fi); +} +#endif static void sfs_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - fuse_file_info *fi) { - (void) ino; - int res; - int fd = dirfd(get_dir_handle(fi)->dp); - if (datasync) - res = fdatasync(fd); - else - res = fsync(fd); - fuse_reply_err(req, res == -1 ? errno : 0); + fuse_file_info *fi) +{ + (void)ino; + int res; + int fd = dirfd(get_dir_handle(fi)->dp); + if (datasync) + res = fdatasync(fd); + else + res = fsync(fd); + fuse_reply_err(req, res == -1 ? errno : 0); } +static void sfs_open(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) +{ + Inode &inode = get_inode(ino); -static void sfs_open(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { - Inode& inode = get_inode(ino); - - /* With writeback cache, kernel may send read requests even + /* With writeback cache, kernel may send read requests even when userspace opened write-only */ - if (fs.timeout && (fi->flags & O_ACCMODE) == O_WRONLY) { - fi->flags &= ~O_ACCMODE; - fi->flags |= O_RDWR; - } - - /* With writeback cache, O_APPEND is handled by the kernel. This - breaks atomicity (since the file may change in the underlying - filesystem, so that the kernel's idea of the end of the file - isn't accurate anymore). However, no process should modify the - file in the underlying filesystem once it has been read, so - this is not a problem. */ - if (fs.timeout && fi->flags & O_APPEND) - fi->flags &= ~O_APPEND; - - /* Unfortunately we cannot use inode.fd, because this was opened + if (fs.timeout && (fi->flags & O_ACCMODE) == O_WRONLY) { + fi->flags &= ~O_ACCMODE; + fi->flags |= O_RDWR; + } + + /* With writeback cache, O_APPEND is handled by the kernel. This + * breaks atomicity (since the file may change in the underlying + * filesystem, so that the kernel's idea of the end of the file + * isn't accurate anymore). However, no process should modify the + * file in the underlying filesystem once it has been read, so + * this is not a problem. + */ + if (fs.timeout && fi->flags & O_APPEND) + fi->flags &= ~O_APPEND; + + /* Unfortunately we cannot use inode.fd, because this was opened with O_PATH (so it doesn't allow read/write access). */ - char buf[64]; - sprintf(buf, "/proc/self/fd/%i", inode.fd); - auto fd = open(buf, fi->flags & ~O_NOFOLLOW); - if (fd == -1) { - auto err = errno; - if (err == ENFILE || err == EMFILE) - cerr << "ERROR: Reached maximum number of file descriptors." << endl; - fuse_reply_err(req, err); - return; - } - - lock_guard g {inode.m}; - inode.nopen++; - - sfs_create_open_flags(fi); - - fi->fh = fd; - if (fs.passthrough) - do_passthrough_open(req, ino, fd, fi); - fuse_reply_open(req, fi); + auto fd = with_fd_path(inode.fd, [fi](const char *buf) { + return open(buf, fi->flags & ~O_NOFOLLOW); + }); + if (fd == -1) { + auto err = errno; + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." + << endl; + fuse_reply_err(req, err); + return; + } + + lock_guard g{ inode.m }; + inode.nopen++; + + sfs_create_open_flags(fi); + + fi->fh = fd; + if (fs.passthrough) + do_passthrough_open(req, ino, fd, fi); + fuse_reply_open(req, fi); } - -static void sfs_release(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { - Inode& inode = get_inode(ino); - lock_guard g {inode.m}; - inode.nopen--; - - /* Close the shared backing file on last file close of an inode */ - if (inode.backing_id && !inode.nopen) { - if (fuse_passthrough_close(req, inode.backing_id) < 0) { - cerr << "DEBUG: fuse_passthrough_close failed for inode " - << ino << " backing file " << inode.backing_id << endl; - } else if (fs.debug) { - cerr << "DEBUG: closed backing file " << inode.backing_id - << " for inode " << ino << endl; - } - inode.backing_id = 0; - } - - close(fi->fh); - fuse_reply_err(req, 0); +static void sfs_release(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) +{ + Inode &inode = get_inode(ino); + lock_guard g{ inode.m }; + inode.nopen--; + + /* Close the shared backing file on last file close of an inode */ + if (inode.backing_id && !inode.nopen) { + if (fuse_passthrough_close(req, inode.backing_id) < 0) { + cerr << "DEBUG: fuse_passthrough_close failed for inode " + << ino << " backing file " << inode.backing_id + << endl; + } else if (fs.debug) { + cerr << "DEBUG: closed backing file " + << inode.backing_id << " for inode " << ino + << endl; + } + inode.backing_id = 0; + } + + close(fi->fh); + fuse_reply_err(req, 0); } - -static void sfs_flush(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { - (void) ino; - auto res = close(dup(fi->fh)); - fuse_reply_err(req, res == -1 ? errno : 0); +static void sfs_flush(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) +{ + (void)ino; + auto res = close(dup(fi->fh)); + fuse_reply_err(req, res == -1 ? errno : 0); } - static void sfs_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - fuse_file_info *fi) { - (void) ino; - int res; - if (datasync) - res = fdatasync(fi->fh); - else - res = fsync(fi->fh); - fuse_reply_err(req, res == -1 ? errno : 0); + fuse_file_info *fi) +{ + (void)ino; + int res; + if (datasync) + res = fdatasync(fi->fh); + else + res = fsync(fi->fh); + fuse_reply_err(req, res == -1 ? errno : 0); } - -static void do_read(fuse_req_t req, size_t size, off_t off, fuse_file_info *fi) { - - fuse_bufvec buf = FUSE_BUFVEC_INIT(size); - buf.buf[0].flags = static_cast( - FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK); - buf.buf[0].fd = fi->fh; - buf.buf[0].pos = off; - - fuse_reply_data(req, &buf, FUSE_BUF_COPY_FLAGS); +static void do_read(fuse_req_t req, size_t size, off_t off, fuse_file_info *fi) +{ + fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + char *payload = NULL; + size_t payload_size = 0; + int res = fuse_req_get_payload(req, &payload, &payload_size, NULL); + + /* + * This is a demonstration how to use io-uring payload. For FUSE_BUF_IS_FD + * it shouldn't make much of a difference because fuse_reply_data() -> + * fuse_reply_data_uring() also has access to the payload and will + * read directly from the FD into the payload. + * It is more useful for file systems that need a buffer for decryption, + * decompression, etc. + */ + if (res == 0) { + /* This is an io-uring request - write directly to the payload */ + assert(payload_size >= size); + + buf.buf[0].mem = payload; + buf.buf[0].size = payload_size; + + res = pread(fi->fh, payload, size, off); + if (res < 0) { + fuse_reply_err(req, errno); + return; + } + + buf.buf[0].size = res; + } else { + buf.buf[0].flags = static_cast( + FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK); + buf.buf[0].fd = fi->fh; + buf.buf[0].pos = off; + } + + fuse_reply_data(req, &buf, FUSE_BUF_COPY_FLAGS); } static void sfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, - fuse_file_info *fi) { - (void) ino; - if (fs.passthrough && !fs.direct_io) { - cerr << "ERROR: fuse_passthrough read failed." << endl; - fuse_reply_err(req, EIO); - return; - } - do_read(req, size, off, fi); + fuse_file_info *fi) +{ + (void)ino; + if (fs.passthrough && !fs.direct_io) { + cerr << "ERROR: fuse_passthrough read failed." << endl; + fuse_reply_err(req, EIO); + return; + } + do_read(req, size, off, fi); } - static void do_write_buf(fuse_req_t req, size_t size, off_t off, - fuse_bufvec *in_buf, fuse_file_info *fi) { - fuse_bufvec out_buf = FUSE_BUFVEC_INIT(size); - out_buf.buf[0].flags = static_cast( - FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK); - out_buf.buf[0].fd = fi->fh; - out_buf.buf[0].pos = off; - - auto res = fuse_buf_copy(&out_buf, in_buf, FUSE_BUF_COPY_FLAGS); - if (res < 0) - fuse_reply_err(req, -res); - else - fuse_reply_write(req, (size_t)res); + fuse_bufvec *in_buf, fuse_file_info *fi) +{ + fuse_bufvec out_buf = FUSE_BUFVEC_INIT(size); + out_buf.buf[0].flags = + static_cast(FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK); + out_buf.buf[0].fd = fi->fh; + out_buf.buf[0].pos = off; + + auto res = fuse_buf_copy(&out_buf, in_buf, FUSE_BUF_COPY_FLAGS); + if (res < 0) + fuse_reply_err(req, -res); + else + fuse_reply_write(req, (size_t)res); } - static void sfs_write_buf(fuse_req_t req, fuse_ino_t ino, fuse_bufvec *in_buf, - off_t off, fuse_file_info *fi) { - (void) ino; - if (fs.passthrough && !fs.direct_io) { - cerr << "ERROR: fuse_passthrough write failed." << endl; - fuse_reply_err(req, EIO); - return; - } - auto size {fuse_buf_size(in_buf)}; - do_write_buf(req, size, off, in_buf, fi); + off_t off, fuse_file_info *fi) +{ + (void)ino; + if (fs.passthrough && !fs.direct_io) { + cerr << "ERROR: fuse_passthrough write failed." << endl; + fuse_reply_err(req, EIO); + return; + } + auto size{ fuse_buf_size(in_buf) }; + do_write_buf(req, size, off, in_buf, fi); } +static void sfs_statfs(fuse_req_t req, fuse_ino_t ino) +{ + struct statvfs stbuf; -static void sfs_statfs(fuse_req_t req, fuse_ino_t ino) { - struct statvfs stbuf; - - auto res = fstatvfs(get_fs_fd(ino), &stbuf); - if (res == -1) - fuse_reply_err(req, errno); - else - fuse_reply_statfs(req, &stbuf); + auto res = fstatvfs(get_fs_fd(ino), &stbuf); + if (res == -1) + fuse_reply_err(req, errno); + else + fuse_reply_statfs(req, &stbuf); } - -#ifdef HAVE_POSIX_FALLOCATE static void sfs_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, - off_t offset, off_t length, fuse_file_info *fi) { - (void) ino; - if (mode) { - fuse_reply_err(req, EOPNOTSUPP); - return; - } - - auto err = posix_fallocate(fi->fh, offset, length); - fuse_reply_err(req, err); + off_t offset, off_t length, fuse_file_info *fi) +{ + (void)ino; + + auto err = -do_fallocate(fi->fh, mode, offset, length); + + fuse_reply_err(req, err); } -#endif static void sfs_flock(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi, - int op) { - (void) ino; - auto res = flock(fi->fh, op); - fuse_reply_err(req, res == -1 ? errno : 0); + int op) +{ + (void)ino; + auto res = flock(fi->fh, op); + fuse_reply_err(req, res == -1 ? errno : 0); } - #ifdef HAVE_SETXATTR static void sfs_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - size_t size) { - char *value = nullptr; - Inode& inode = get_inode(ino); - ssize_t ret; - int saverr; - - char procname[64]; - sprintf(procname, "/proc/self/fd/%i", inode.fd); - - if (size) { - value = new (nothrow) char[size]; - if (value == nullptr) { - saverr = ENOMEM; - goto out; - } - - ret = getxattr(procname, name, value, size); - if (ret == -1) - goto out_err; - saverr = 0; - if (ret == 0) - goto out; - - fuse_reply_buf(req, value, ret); - } else { - ret = getxattr(procname, name, nullptr, 0); - if (ret == -1) - goto out_err; - - fuse_reply_xattr(req, ret); - } + size_t size) +{ + char *value = nullptr; + Inode &inode = get_inode(ino); + ssize_t ret; + int saverr; + + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + + if (size) { + value = new (nothrow) char[size]; + if (value == nullptr) { + saverr = ENOMEM; + goto out; + } + + ret = getxattr(procname, name, value, size); + if (ret == -1) + goto out_err; + saverr = 0; + if (ret == 0) + goto out; + + fuse_reply_buf(req, value, ret); + } else { + ret = getxattr(procname, name, nullptr, 0); + if (ret == -1) + goto out_err; + + fuse_reply_xattr(req, ret); + } out_free: - delete[] value; - return; + delete[] value; + return; out_err: - saverr = errno; + saverr = errno; out: - fuse_reply_err(req, saverr); - goto out_free; + fuse_reply_err(req, saverr); + goto out_free; } - -static void sfs_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) { - char *value = nullptr; - Inode& inode = get_inode(ino); - ssize_t ret; - int saverr; - - char procname[64]; - sprintf(procname, "/proc/self/fd/%i", inode.fd); - - if (size) { - value = new (nothrow) char[size]; - if (value == nullptr) { - saverr = ENOMEM; - goto out; - } - - ret = listxattr(procname, value, size); - if (ret == -1) - goto out_err; - saverr = 0; - if (ret == 0) - goto out; - - fuse_reply_buf(req, value, ret); - } else { - ret = listxattr(procname, nullptr, 0); - if (ret == -1) - goto out_err; - - fuse_reply_xattr(req, ret); - } +static void sfs_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) +{ + char *value = nullptr; + Inode &inode = get_inode(ino); + ssize_t ret; + int saverr; + + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + + if (size) { + value = new (nothrow) char[size]; + if (value == nullptr) { + saverr = ENOMEM; + goto out; + } + + ret = listxattr(procname, value, size); + if (ret == -1) + goto out_err; + saverr = 0; + if (ret == 0) + goto out; + + fuse_reply_buf(req, value, ret); + } else { + ret = listxattr(procname, nullptr, 0); + if (ret == -1) + goto out_err; + + fuse_reply_xattr(req, ret); + } out_free: - delete[] value; - return; + delete[] value; + return; out_err: - saverr = errno; + saverr = errno; out: - fuse_reply_err(req, saverr); - goto out_free; + fuse_reply_err(req, saverr); + goto out_free; } - static void sfs_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) { - Inode& inode = get_inode(ino); - ssize_t ret; - int saverr; + const char *value, size_t size, int flags) +{ + Inode &inode = get_inode(ino); + ssize_t ret; + int saverr; - char procname[64]; - sprintf(procname, "/proc/self/fd/%i", inode.fd); + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); - ret = setxattr(procname, name, value, size, flags); - saverr = ret == -1 ? errno : 0; + ret = setxattr(procname, name, value, size, flags); + saverr = ret == -1 ? errno : 0; - fuse_reply_err(req, saverr); + fuse_reply_err(req, saverr); } +static void sfs_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) +{ + char procname[64]; + Inode &inode = get_inode(ino); + ssize_t ret; + int saverr; -static void sfs_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) { - char procname[64]; - Inode& inode = get_inode(ino); - ssize_t ret; - int saverr; - - sprintf(procname, "/proc/self/fd/%i", inode.fd); - ret = removexattr(procname, name); - saverr = ret == -1 ? errno : 0; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + ret = removexattr(procname, name); + saverr = ret == -1 ? errno : 0; - fuse_reply_err(req, saverr); + fuse_reply_err(req, saverr); } #endif - -static void assign_operations(fuse_lowlevel_ops &sfs_oper) { - sfs_oper.init = sfs_init; - sfs_oper.lookup = sfs_lookup; - sfs_oper.mkdir = sfs_mkdir; - sfs_oper.mknod = sfs_mknod; - sfs_oper.symlink = sfs_symlink; - sfs_oper.link = sfs_link; - sfs_oper.unlink = sfs_unlink; - sfs_oper.rmdir = sfs_rmdir; - sfs_oper.rename = sfs_rename; - sfs_oper.forget = sfs_forget; - sfs_oper.forget_multi = sfs_forget_multi; - sfs_oper.getattr = sfs_getattr; - sfs_oper.setattr = sfs_setattr; - sfs_oper.readlink = sfs_readlink; - sfs_oper.opendir = sfs_opendir; - sfs_oper.readdir = sfs_readdir; - sfs_oper.readdirplus = sfs_readdirplus; - sfs_oper.releasedir = sfs_releasedir; - sfs_oper.fsyncdir = sfs_fsyncdir; - sfs_oper.create = sfs_create; - sfs_oper.tmpfile = sfs_tmpfile; - sfs_oper.open = sfs_open; - sfs_oper.release = sfs_release; - sfs_oper.flush = sfs_flush; - sfs_oper.fsync = sfs_fsync; - sfs_oper.read = sfs_read; - sfs_oper.write_buf = sfs_write_buf; - sfs_oper.statfs = sfs_statfs; -#ifdef HAVE_POSIX_FALLOCATE - sfs_oper.fallocate = sfs_fallocate; +static void assign_operations(fuse_lowlevel_ops &sfs_oper) +{ + sfs_oper.init = sfs_init; + sfs_oper.lookup = sfs_lookup; + sfs_oper.mkdir = sfs_mkdir; + sfs_oper.mknod = sfs_mknod; + sfs_oper.symlink = sfs_symlink; + sfs_oper.link = sfs_link; + sfs_oper.unlink = sfs_unlink; + sfs_oper.rmdir = sfs_rmdir; + sfs_oper.rename = sfs_rename; + sfs_oper.forget = sfs_forget; + sfs_oper.forget_multi = sfs_forget_multi; + sfs_oper.getattr = sfs_getattr; + sfs_oper.setattr = sfs_setattr; + sfs_oper.readlink = sfs_readlink; + sfs_oper.opendir = sfs_opendir; + sfs_oper.readdir = sfs_readdir; + sfs_oper.readdirplus = sfs_readdirplus; + sfs_oper.releasedir = sfs_releasedir; + sfs_oper.fsyncdir = sfs_fsyncdir; + sfs_oper.create = sfs_create; +#ifdef O_TMPFILE + sfs_oper.tmpfile = sfs_tmpfile; #endif - sfs_oper.flock = sfs_flock; + sfs_oper.open = sfs_open; + sfs_oper.release = sfs_release; + sfs_oper.flush = sfs_flush; + sfs_oper.fsync = sfs_fsync; + sfs_oper.read = sfs_read; + sfs_oper.write_buf = sfs_write_buf; + sfs_oper.statfs = sfs_statfs; + sfs_oper.fallocate = sfs_fallocate; + sfs_oper.flock = sfs_flock; #ifdef HAVE_SETXATTR - sfs_oper.setxattr = sfs_setxattr; - sfs_oper.getxattr = sfs_getxattr; - sfs_oper.listxattr = sfs_listxattr; - sfs_oper.removexattr = sfs_removexattr; + sfs_oper.setxattr = sfs_setxattr; + sfs_oper.getxattr = sfs_getxattr; + sfs_oper.listxattr = sfs_listxattr; + sfs_oper.removexattr = sfs_removexattr; #endif } -static void print_usage(char *prog_name) { - cout << "Usage: " << prog_name << " --help\n" - << " " << prog_name << " [options] \n"; +static void print_usage(char *prog_name) +{ + cout << "Usage: " << prog_name << " --help\n" + << " " << prog_name << " [options] \n"; } -static cxxopts::ParseResult parse_wrapper(cxxopts::Options& parser, int& argc, char**& argv) { - try { - return parser.parse(argc, argv); - } catch (cxxopts::option_not_exists_exception& exc) { - std::cout << argv[0] << ": " << exc.what() << std::endl; - print_usage(argv[0]); - exit(2); - } +static cxxopts::ParseResult parse_wrapper(cxxopts::Options &parser, int &argc, + char **&argv) +{ + try { + return parser.parse(argc, argv); + } catch (cxxopts::option_not_exists_exception &exc) { + std::cout << argv[0] << ": " << exc.what() << std::endl; + print_usage(argv[0]); + exit(2); + } } +static void string_split(std::string s, std::vector &out, + std::string delimiter) +{ + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; -static void string_split(std::string s, std::vector& out, std::string delimiter) { - size_t pos_start = 0, pos_end, delim_len = delimiter.length(); - std::string token; - - while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { - token = s.substr(pos_start, pos_end - pos_start); - pos_start = pos_end + delim_len; - out.push_back(token); - } + while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { + token = s.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + out.push_back(token); + } - out.push_back(s.substr(pos_start)); + out.push_back(s.substr(pos_start)); } - -static std::string string_join(const std::vector& elems, char delim) +static std::string string_join(const std::vector &elems, + char delim) { - std::ostringstream out; - for (auto ii = elems.begin(); ii != elems.end(); ++ii) { - out << (*ii); - if (ii + 1 != elems.end()) { - out << delim; - } - } - return out.str(); + std::ostringstream out; + for (auto ii = elems.begin(); ii != elems.end(); ++ii) { + out << (*ii); + if (ii + 1 != elems.end()) { + out << delim; + } + } + return out.str(); } - -static cxxopts::ParseResult parse_options(int argc, char **argv) { - cxxopts::Options opt_parser(argv[0]); - std::vector mount_options; - opt_parser.add_options() - ("debug", "Enable filesystem debug messages") - ("debug-fuse", "Enable libfuse debug messages") - ("foreground", "Run in foreground") - ("help", "Print help") - ("nocache", "Disable attribute all caching") - ("nosplice", "Do not use splice(2) to transfer data") - ("nopassthrough", "Do not use pass-through mode for read/write") - ("single", "Run single-threaded") - ("o", "Mount options (see mount.fuse(5) - only use if you know what " - "you are doing)", cxxopts::value(mount_options)) - ("num-threads", "Number of libfuse worker threads", - cxxopts::value()->default_value(SFS_DEFAULT_THREADS)) - ("clone-fd", "use separate fuse device fd for each thread") - ("direct-io", "enable fuse kernel internal direct-io"); - - // FIXME: Find a better way to limit the try clause to just - // opt_parser.parse() (cf. https://github.com/jarro2783/cxxopts/issues/146) - auto options = parse_wrapper(opt_parser, argc, argv); - - if (options.count("help")) { - print_usage(argv[0]); - // Strip everything before the option list from the - // default help string. - auto help = opt_parser.help(); - std::cout << std::endl << "options:" - << help.substr(help.find("\n\n") + 1, string::npos); - exit(0); - - } else if (argc != 3) { - std::cout << argv[0] << ": invalid number of arguments\n"; - print_usage(argv[0]); - exit(2); - } - - fs.debug = options.count("debug") != 0; - fs.debug_fuse = options.count("debug-fuse") != 0; - - fs.foreground = options.count("foreground") != 0; - if (fs.debug || fs.debug_fuse) - fs.foreground = true; - - fs.nosplice = options.count("nosplice") != 0; - fs.passthrough = options.count("nopassthrough") == 0; - fs.num_threads = options["num-threads"].as(); - fs.clone_fd = options.count("clone-fd"); - fs.direct_io = options.count("direct-io"); - char* resolved_path = realpath(argv[1], NULL); - if (resolved_path == NULL) - warn("WARNING: realpath() failed with"); - fs.source = std::string {resolved_path}; - free(resolved_path); - - std::vector flattened_mount_opts; - for (auto opt : mount_options) { - string_split(opt, flattened_mount_opts, ","); - } - - bool found_fsname = false; - for (auto opt : flattened_mount_opts) { - if (opt.find("fsname=") == 0) { - found_fsname = true; - continue; - } - - /* Filter out some obviously incorrect options. */ - if (opt == "fd") { - std::cout << argv[0] << ": Unsupported mount option: " << opt << "\n"; - print_usage(argv[0]); - exit(2); - } - } - if (!found_fsname) { - flattened_mount_opts.push_back("fsname=" + fs.source); - } - flattened_mount_opts.push_back("default_permissions"); - fs.fuse_mount_options = string_join(flattened_mount_opts, ','); - return options; +static cxxopts::ParseResult parse_options(int argc, char **argv) +{ + cxxopts::Options opt_parser(argv[0]); + std::vector mount_options; + opt_parser.add_options()("debug", "Enable filesystem debug messages")( + "debug-fuse", "Enable libfuse debug messages")( + "foreground", "Run in foreground")("help", "Print help")( + "nocache", "Disable attribute all caching")( + "nosplice", "Do not use splice(2) to transfer data")( + "nopassthrough", "Do not use pass-through mode for read/write")( + "single", "Run single-threaded")( + "o", + "Mount options (see mount.fuse(5) - only use if you know what " + "you are doing)", + cxxopts::value(mount_options))( + "num-threads", "Number of libfuse worker threads", + cxxopts::value()->default_value(SFS_DEFAULT_THREADS))( + "clone-fd", "use separate fuse device fd for each thread")( + "direct-io", "enable fuse kernel internal direct-io"); + + // FIXME: Find a better way to limit the try clause to just + // opt_parser.parse() (cf. https://github.com/jarro2783/cxxopts/issues/146) + auto options = parse_wrapper(opt_parser, argc, argv); + + if (options.count("help")) { + print_usage(argv[0]); + // Strip everything before the option list from the + // default help string. + auto help = opt_parser.help(); + std::cout << std::endl + << "options:" + << help.substr(help.find("\n\n") + 1, string::npos); + std::cout << "\nFuse lowlevel options:\n"; + fuse_lowlevel_help(); + exit(0); + + } else if (argc != 3) { + std::cout << argv[0] << ": invalid number of arguments\n"; + print_usage(argv[0]); + exit(2); + } + + fs.debug = options.count("debug") != 0; + fs.debug_fuse = options.count("debug-fuse") != 0; + + fs.foreground = options.count("foreground") != 0; + if (fs.debug || fs.debug_fuse) + fs.foreground = true; + + fs.nosplice = options.count("nosplice") != 0; + fs.passthrough = options.count("nopassthrough") == 0; + fs.num_threads = options["num-threads"].as(); + fs.clone_fd = options.count("clone-fd"); + fs.direct_io = options.count("direct-io"); + + char *resolved_path = realpath(argv[1], NULL); + if (resolved_path == NULL) + warn("WARNING: realpath() failed with"); + fs.source = std::string{ resolved_path }; + free(resolved_path); + + std::vector flattened_mount_opts; + for (auto opt : mount_options) { + string_split(opt, flattened_mount_opts, ","); + } + + bool found_fsname = false; + for (auto opt : flattened_mount_opts) { + if (opt.find("fsname=") == 0) { + found_fsname = true; + continue; + } + + /* Filter out some obviously incorrect options. */ + if (opt == "fd") { + std::cout << argv[0] + << ": Unsupported mount option: " << opt + << "\n"; + print_usage(argv[0]); + exit(2); + } + } + if (!found_fsname) { + flattened_mount_opts.push_back("fsname=" + fs.source); + } + flattened_mount_opts.push_back("default_permissions"); + fs.fuse_mount_options = string_join(flattened_mount_opts, ','); + return options; } - -static void maximize_fd_limit() { - struct rlimit lim {}; - auto res = getrlimit(RLIMIT_NOFILE, &lim); - if (res != 0) { - warn("WARNING: getrlimit() failed with"); - return; - } - lim.rlim_cur = lim.rlim_max; - res = setrlimit(RLIMIT_NOFILE, &lim); - if (res != 0) - warn("WARNING: setrlimit() failed with"); +static void maximize_fd_limit() +{ + struct rlimit lim {}; + auto res = getrlimit(RLIMIT_NOFILE, &lim); + if (res != 0) { + warn("WARNING: getrlimit() failed with"); + return; + } + lim.rlim_cur = lim.rlim_max; + res = setrlimit(RLIMIT_NOFILE, &lim); + if (res != 0) + warn("WARNING: setrlimit() failed with"); } +int main(int argc, char *argv[]) +{ + struct fuse_loop_config *loop_config = NULL; -int main(int argc, char *argv[]) { - - struct fuse_loop_config *loop_config = NULL; + // Parse command line options + auto options{ parse_options(argc, argv) }; - // Parse command line options - auto options {parse_options(argc, argv)}; + // We need an fd for every dentry in our the filesystem that the + // kernel knows about. This is way more than most processes need, + // so try to get rid of any resource softlimit. + maximize_fd_limit(); - // We need an fd for every dentry in our the filesystem that the - // kernel knows about. This is way more than most processes need, - // so try to get rid of any resource softlimit. - maximize_fd_limit(); + // Initialize filesystem root + fs.root.fd = -1; + fs.root.nlookup = 9999; + fs.timeout = options.count("nocache") ? 0 : 86400.0; - // Initialize filesystem root - fs.root.fd = -1; - fs.root.nlookup = 9999; - fs.timeout = options.count("nocache") ? 0 : 86400.0; + struct stat stat; + auto ret = lstat(fs.source.c_str(), &stat); + if (ret == -1) + err(1, "ERROR: failed to stat source (\"%s\")", + fs.source.c_str()); + if (!S_ISDIR(stat.st_mode)) + errx(1, "ERROR: source is not a directory"); + fs.src_dev = stat.st_dev; - struct stat stat; - auto ret = lstat(fs.source.c_str(), &stat); - if (ret == -1) - err(1, "ERROR: failed to stat source (\"%s\")", fs.source.c_str()); - if (!S_ISDIR(stat.st_mode)) - errx(1, "ERROR: source is not a directory"); - fs.src_dev = stat.st_dev; + fs.root.fd = open(fs.source.c_str(), O_PATH); + if (fs.root.fd == -1) + err(1, "ERROR: open(\"%s\", O_PATH)", fs.source.c_str()); - fs.root.fd = open(fs.source.c_str(), O_PATH); - if (fs.root.fd == -1) - err(1, "ERROR: open(\"%s\", O_PATH)", fs.source.c_str()); + // Initialize fuse + fuse_args args = FUSE_ARGS_INIT(0, nullptr); + if (fuse_opt_add_arg(&args, argv[0]) || fuse_opt_add_arg(&args, "-o") || + fuse_opt_add_arg(&args, fs.fuse_mount_options.c_str()) || + (fs.debug_fuse && fuse_opt_add_arg(&args, "-odebug"))) + errx(3, "ERROR: Out of memory adding arguments"); - // Initialize fuse - fuse_args args = FUSE_ARGS_INIT(0, nullptr); - if (fuse_opt_add_arg(&args, argv[0]) || - fuse_opt_add_arg(&args, "-o") || - fuse_opt_add_arg(&args, fs.fuse_mount_options.c_str()) || - (fs.debug_fuse && fuse_opt_add_arg(&args, "-odebug"))) - errx(3, "ERROR: Out of memory"); + ret = -1; + fuse_lowlevel_ops sfs_oper{}; + assign_operations(sfs_oper); + auto se = fuse_session_new(&args, &sfs_oper, sizeof(sfs_oper), &fs); + if (se == nullptr) + goto err_out1; - ret = -1; - fuse_lowlevel_ops sfs_oper {}; - assign_operations(sfs_oper); - auto se = fuse_session_new(&args, &sfs_oper, sizeof(sfs_oper), &fs); - if (se == nullptr) - goto err_out1; + if (fuse_set_signal_handlers(se) != 0) + goto err_out2; - if (fuse_set_signal_handlers(se) != 0) - goto err_out2; + if (fuse_set_fail_signal_handlers(se) != 0) + goto err_out2; - if (fuse_set_fail_signal_handlers(se) != 0) - goto err_out2; + // Don't apply umask, use modes exactly as specified + umask(0); - // Don't apply umask, use modes exactly as specified - umask(0); + // Mount and run main loop + loop_config = fuse_loop_cfg_create(); - // Mount and run main loop - loop_config = fuse_loop_cfg_create(); + if (fs.num_threads != -1) + fuse_loop_cfg_set_max_threads(loop_config, fs.num_threads); - if (fs.num_threads != -1) - fuse_loop_cfg_set_max_threads(loop_config, fs.num_threads); + fuse_loop_cfg_set_clone_fd(loop_config, fs.clone_fd); - fuse_loop_cfg_set_clone_fd(loop_config, fs.clone_fd); - - if (fuse_session_mount(se, argv[2]) != 0) - goto err_out3; + if (fuse_session_mount(se, argv[2]) != 0) + goto err_out3; - fuse_daemonize(fs.foreground); + fuse_daemonize(fs.foreground); - if (!fs.foreground) - fuse_log_enable_syslog("passthrough-hp", LOG_PID | LOG_CONS, LOG_DAEMON); + if (!fs.foreground) + fuse_log_enable_syslog("passthrough-hp", LOG_PID | LOG_CONS, + LOG_DAEMON); - if (options.count("single")) - ret = fuse_session_loop(se); - else - ret = fuse_session_loop_mt(se, loop_config); + if (options.count("single")) + ret = fuse_session_loop(se); + else + ret = fuse_session_loop_mt(se, loop_config); - fuse_session_unmount(se); + fuse_session_unmount(se); err_out3: - fuse_remove_signal_handlers(se); + fuse_remove_signal_handlers(se); err_out2: - fuse_session_destroy(se); + fuse_session_destroy(se); err_out1: - fuse_loop_cfg_destroy(loop_config); - fuse_opt_free_args(&args); + fuse_loop_cfg_destroy(loop_config); + fuse_opt_free_args(&args); - if (!fs.foreground) - fuse_log_close_syslog(); + if (!fs.foreground) + fuse_log_close_syslog(); - return ret ? 1 : 0; + return ret ? 1 : 0; } - diff --git a/example/passthrough_ll.c b/example/passthrough_ll.c index 01a420869..b746ea8a4 100644 --- a/example/passthrough_ll.c +++ b/example/passthrough_ll.c @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file @@ -13,11 +13,7 @@ * just "passing through" all requests to the corresponding user-space * libc functions. In contrast to passthrough.c and passthrough_fh.c, * this implementation uses the low-level API. Its performance should - * be the least bad among the three, but many operations are not - * implemented. In particular, it is not possible to remove files (or - * directories) because the code necessary to defer actual removal - * until the file is not opened anymore would make the example much - * more complicated. + * be the least bad among the three. * * When writeback caching is enabled (-o writeback mount option), it * is only possible to write to files for which the mounting user has @@ -35,7 +31,7 @@ */ #define _GNU_SOURCE -#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 12) +#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 18) #include #include @@ -315,7 +311,7 @@ static struct lo_inode *create_new_inode(int fd, struct fuse_entry_param *e, str { struct lo_inode *inode = NULL; struct lo_inode *prev, *next; - + inode = calloc(1, sizeof(struct lo_inode)); if (!inode) return NULL; @@ -352,7 +348,7 @@ static int fill_entry_param_new_inode(fuse_req_t req, fuse_ino_t parent, int fd, e->ino = (uintptr_t) create_new_inode(dup(fd), e, lo); if (lo_debug(req)) - fuse_log(FUSE_LOG_DEBUG, " %lli/%lli -> %lli\n", + fuse_log(FUSE_LOG_DEBUG, " %lli/%d -> %lli\n", (unsigned long long) parent, fd, (unsigned long long) e->ino); return 0; @@ -712,7 +708,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, err = errno; goto error; } else { // End of stream - break; + break; } } } @@ -744,11 +740,11 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, &st, nextoff); } if (entsize > rem) { - if (entry_ino != 0) + if (entry_ino != 0) lo_forget_one(req, entry_ino, 1); break; } - + p += entsize; rem -= entsize; @@ -816,9 +812,9 @@ static void lo_tmpfile(fuse_req_t req, fuse_ino_t parent, /* parallel_direct_writes feature depends on direct_io features. To make parallel_direct_writes valid, need set fi->direct_io in current function. */ - fi->parallel_direct_writes = 1; - - err = fill_entry_param_new_inode(req, parent, fd, &e); + fi->parallel_direct_writes = 1; + + err = fill_entry_param_new_inode(req, parent, fd, &e); if (err) fuse_reply_err(req, err); else @@ -981,8 +977,8 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, out_buf.buf[0].pos = off; if (lo_debug(req)) - fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", - ino, out_buf.buf[0].size, (unsigned long) off); + fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%jd)\n", + ino, out_buf.buf[0].size, (intmax_t) off); res = fuse_buf_copy(&out_buf, in_buf, 0); if(res < 0) @@ -1006,22 +1002,10 @@ static void lo_statfs(fuse_req_t req, fuse_ino_t ino) static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, off_t length, struct fuse_file_info *fi) { - int err = EOPNOTSUPP; + int err; (void) ino; -#ifdef HAVE_FALLOCATE - err = fallocate(fi->fh, mode, offset, length); - if (err < 0) - err = errno; - -#elif defined(HAVE_POSIX_FALLOCATE) - if (mode) { - fuse_reply_err(req, EOPNOTSUPP); - return; - } - - err = posix_fallocate(fi->fh, offset, length); -#endif + err = -do_fallocate(fi->fh, mode, offset, length); fuse_reply_err(req, err); } @@ -1199,10 +1183,12 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, ssize_t res; if (lo_debug(req)) - fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " - "off=%lu, ino=%" PRIu64 "/fd=%lu, " - "off=%lu, size=%zd, flags=0x%x)\n", - ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, + fuse_log(FUSE_LOG_DEBUG, + "%s(ino=%lld fd=%lld off=%jd ino=%lld fd=%lld off=%jd, size=%zd, flags=0x%x)\n", + __func__, (unsigned long long)ino_in, + (unsigned long long)fi_in->fh, + (intmax_t) off_in, (unsigned long long)ino_out, + (unsigned long long)fi_out->fh, (intmax_t) off_out, len, flags); res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, @@ -1227,6 +1213,28 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, fuse_reply_err(req, errno); } +#ifdef HAVE_STATX +static void lo_statx(fuse_req_t req, fuse_ino_t ino, int flags, int mask, + struct fuse_file_info *fi) +{ + struct lo_data *lo = lo_data(req); + struct statx buf; + int res; + int fd; + + if (fi) + fd = fi->fh; + else + fd = lo_fd(req, ino); + + res = statx(fd, "", flags | AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, mask, &buf); + if (res == -1) + fuse_reply_err(req, errno); + else + fuse_reply_statx(req, 0, &buf, lo->timeout); +} +#endif + static const struct fuse_lowlevel_ops lo_oper = { .init = lo_init, .destroy = lo_destroy, @@ -1267,6 +1275,9 @@ static const struct fuse_lowlevel_ops lo_oper = { .copy_file_range = lo_copy_file_range, #endif .lseek = lo_lseek, +#ifdef HAVE_STATX + .statx = lo_statx, +#endif }; int main(int argc, char *argv[]) diff --git a/example/poll.c b/example/poll.c index e617111fe..8c3fa6473 100644 --- a/example/poll.c +++ b/example/poll.c @@ -4,7 +4,7 @@ Copyright (C) 2008 Tejun Heo This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/poll_client.c b/example/poll_client.c index 83c58239c..96f14190f 100644 --- a/example/poll_client.c +++ b/example/poll_client.c @@ -4,7 +4,7 @@ Copyright (C) 2008 Tejun Heo This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/printcap.c b/example/printcap.c index 74d22e1c5..c711b42e3 100644 --- a/example/printcap.c +++ b/example/printcap.c @@ -3,7 +3,7 @@ Copyright (C) 2017 Nikolaus Rath This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /** @file diff --git a/example/usdt.bt b/example/usdt.bt new file mode 100644 index 000000000..41cfcdc59 --- /dev/null +++ b/example/usdt.bt @@ -0,0 +1,19 @@ +#!/usr/bin/env bpftrace + +// To run, do `sudo bpftrace usdt.bt` + +usdt:../build/lib/libfuse3.so:libfuse:request_receive +{ + printf("libfuse:request_receive hit, err=%d\n", arg0); +} + +usdt:../build/lib/libfuse3.so:libfuse:request_process +{ + printf("libfuse:request_process hit, opcode=%u, unique=%u\n", arg0, arg1); +} + +usdt:../build/lib/libfuse3.so:libfuse:request_reply +{ + printf("libfuse:request_reply hit, unique=%lu, len=%u, err=%u, reply_err=%d\n", + arg0, arg1, arg2, arg3); +} diff --git a/include/cuse_lowlevel.h b/include/cuse_lowlevel.h index 80476c20b..fe0b6f2da 100644 --- a/include/cuse_lowlevel.h +++ b/include/cuse_lowlevel.h @@ -4,7 +4,7 @@ Copyright (C) 2008-2009 Tejun Heo This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. Read example/cusexmp.c for usages. */ diff --git a/include/fuse.h b/include/fuse.h index 4582cc7ac..32d921b38 100644 --- a/include/fuse.h +++ b/include/fuse.h @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #ifndef FUSE_H_ @@ -33,6 +33,9 @@ extern "C" { * Basic FUSE API * * ----------------------------------------------------------- */ +/* Forward declaration */ +struct statx; + /** Handle for a FUSE filesystem */ struct fuse; @@ -57,13 +60,16 @@ enum fuse_readdir_flags { */ enum fuse_fill_dir_flags { /** - * "Plus" mode: all file attributes are valid + * "Plus" mode: file attributes are valid * * The attributes are used by the kernel to prefill the inode cache * during a readdir. * * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set * and vice versa. + * + * This does not make libfuse honor the 'st_ino' field. That is + * controlled by the 'use_ino' option instead. */ FUSE_FILL_DIR_DEFAULTS = 0, FUSE_FILL_DIR_PLUS = (1 << 1) @@ -850,6 +856,17 @@ struct fuse_operations { * Find next data or hole after the specified offset */ off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); + + /** + * Get extended file attributes. + * + * fi may be NULL. + * + * If path is NULL, then the AT_EMPTY_PATH bit in flags will be + * already set. + */ + int (*statx)(const char *path, int flags, int mask, struct statx *stxbuf, + struct fuse_file_info *fi); }; /** Extra context that may be needed by some filesystems @@ -1344,6 +1361,8 @@ ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, size_t len, int flags); off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, struct fuse_file_info *fi); +int fuse_fs_statx(struct fuse_fs *fs, const char *path, int flags, int mask, + struct statx *stxbuf, struct fuse_file_info *fi); void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, struct fuse_config *cfg); void fuse_fs_destroy(struct fuse_fs *fs); diff --git a/include/fuse_common.h b/include/fuse_common.h index f96899075..041188ec7 100644 --- a/include/fuse_common.h +++ b/include/fuse_common.h @@ -2,12 +2,11 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ /** @file */ -#include #if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) #error "Never include directly; use or instead." #endif @@ -24,6 +23,7 @@ #include "fuse_opt.h" #include "fuse_log.h" #include +#include #include #include @@ -174,7 +174,7 @@ struct fuse_loop_config_v1 { * * This feature is enabled by default when supported by the kernel. */ -#define FUSE_CAP_ASYNC_READ (1 << 0) +#define FUSE_CAP_ASYNC_READ (1UL << 0) /** * Indicates that the filesystem supports "remote" locking. @@ -182,7 +182,7 @@ struct fuse_loop_config_v1 { * This feature is enabled by default when supported by the kernel, * and if getlk() and setlk() handlers are implemented. */ -#define FUSE_CAP_POSIX_LOCKS (1 << 1) +#define FUSE_CAP_POSIX_LOCKS (1UL << 1) /** * Indicates that the filesystem supports the O_TRUNC open flag. If @@ -191,7 +191,7 @@ struct fuse_loop_config_v1 { * * This feature is enabled by default when supported by the kernel. */ -#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) +#define FUSE_CAP_ATOMIC_O_TRUNC (1UL << 3) /** * Indicates that the filesystem supports lookups of "." and "..". @@ -203,7 +203,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) +#define FUSE_CAP_EXPORT_SUPPORT (1UL << 4) /** * Indicates that the kernel should not apply the umask to the @@ -211,7 +211,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_DONT_MASK (1 << 6) +#define FUSE_CAP_DONT_MASK (1UL << 6) /** * Indicates that libfuse should try to use splice() when writing to @@ -219,7 +219,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_SPLICE_WRITE (1 << 7) +#define FUSE_CAP_SPLICE_WRITE (1UL << 7) /** * Indicates that libfuse should try to move pages instead of copying when @@ -227,7 +227,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_SPLICE_MOVE (1 << 8) +#define FUSE_CAP_SPLICE_MOVE (1UL << 8) /** * Indicates that libfuse should try to use splice() when reading from @@ -236,7 +236,7 @@ struct fuse_loop_config_v1 { * This feature is enabled by default when supported by the kernel and * if the filesystem implements a write_buf() handler. */ -#define FUSE_CAP_SPLICE_READ (1 << 9) +#define FUSE_CAP_SPLICE_READ (1UL << 9) /** * If set, the calls to flock(2) will be emulated using POSIX locks and must @@ -249,14 +249,14 @@ struct fuse_loop_config_v1 { * This feature is enabled by default when supported by the kernel and * if the filesystem implements a flock() handler. */ -#define FUSE_CAP_FLOCK_LOCKS (1 << 10) +#define FUSE_CAP_FLOCK_LOCKS (1UL << 10) /** * Indicates that the filesystem supports ioctl's on directories. * * This feature is enabled by default when supported by the kernel. */ -#define FUSE_CAP_IOCTL_DIR (1 << 11) +#define FUSE_CAP_IOCTL_DIR (1UL << 11) /** * Traditionally, while a file is open the FUSE kernel module only @@ -278,7 +278,7 @@ struct fuse_loop_config_v1 { * * This feature is enabled by default when supported by the kernel. */ -#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) +#define FUSE_CAP_AUTO_INVAL_DATA (1UL << 12) /** * Indicates that the filesystem supports readdirplus. @@ -286,7 +286,7 @@ struct fuse_loop_config_v1 { * This feature is enabled by default when supported by the kernel and if the * filesystem implements a readdirplus() handler. */ -#define FUSE_CAP_READDIRPLUS (1 << 13) +#define FUSE_CAP_READDIRPLUS (1UL << 13) /** * Indicates that the filesystem supports adaptive readdirplus. @@ -314,7 +314,7 @@ struct fuse_loop_config_v1 { * if the filesystem implements both a readdirplus() and a readdir() * handler. */ -#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) +#define FUSE_CAP_READDIRPLUS_AUTO (1UL << 14) /** * Indicates that the filesystem supports asynchronous direct I/O submission. @@ -325,7 +325,7 @@ struct fuse_loop_config_v1 { * * This feature is enabled by default when supported by the kernel. */ -#define FUSE_CAP_ASYNC_DIO (1 << 15) +#define FUSE_CAP_ASYNC_DIO (1UL << 15) /** * Indicates that writeback caching should be enabled. This means that @@ -334,7 +334,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) +#define FUSE_CAP_WRITEBACK_CACHE (1UL << 16) /** * Indicates support for zero-message opens. If this flag is set in @@ -349,7 +349,7 @@ struct fuse_loop_config_v1 { * this behavior you must return `ENOSYS` from the open() handler on supporting * kernels. */ -#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) +#define FUSE_CAP_NO_OPEN_SUPPORT (1UL << 17) /** * Indicates support for parallel directory operations. If this flag @@ -357,7 +357,7 @@ struct fuse_loop_config_v1 { * readdir() requests are never issued concurrently for the same * directory. */ -#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) +#define FUSE_CAP_PARALLEL_DIROPS (1UL << 18) /** * Indicates support for POSIX ACLs. @@ -376,7 +376,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_POSIX_ACL (1 << 19) +#define FUSE_CAP_POSIX_ACL (1UL << 19) /** * Indicates that the filesystem is responsible for unsetting @@ -385,7 +385,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) +#define FUSE_CAP_HANDLE_KILLPRIV (1UL << 20) /** * Indicates that the filesystem is responsible for unsetting @@ -402,7 +402,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 21) +#define FUSE_CAP_HANDLE_KILLPRIV_V2 (1UL << 21) /** * Indicates that the kernel supports caching symlinks in its page cache. @@ -415,7 +415,7 @@ struct fuse_loop_config_v1 { * If the kernel supports it (>= 4.20), you can enable this feature by * setting this flag in the `want` field of the `fuse_conn_info` structure. */ -#define FUSE_CAP_CACHE_SYMLINKS (1 << 23) +#define FUSE_CAP_CACHE_SYMLINKS (1UL << 23) /** * Indicates support for zero-message opendirs. If this flag is set in @@ -430,7 +430,7 @@ struct fuse_loop_config_v1 { * this behavior you must return `ENOSYS` from the opendir() handler on * supporting kernels. */ -#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) +#define FUSE_CAP_NO_OPENDIR_SUPPORT (1UL << 24) /** * Indicates support for invalidating cached pages only on explicit request. @@ -453,7 +453,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_EXPLICIT_INVAL_DATA (1 << 25) +#define FUSE_CAP_EXPLICIT_INVAL_DATA (1UL << 25) /** * Indicates support that dentries can be expired. @@ -469,14 +469,14 @@ struct fuse_loop_config_v1 { * The dentry could also be mounted in a different mount instance, in which case * any submounts will still be detached. */ -#define FUSE_CAP_EXPIRE_ONLY (1 << 26) +#define FUSE_CAP_EXPIRE_ONLY (1UL << 26) /** * Indicates that an extended 'struct fuse_setxattr' is used by the kernel * side - extra_flags are passed, which are used (as of now by acl) processing. * For example FUSE_SETXATTR_ACL_KILL_SGID might be set. */ -#define FUSE_CAP_SETXATTR_EXT (1 << 27) +#define FUSE_CAP_SETXATTR_EXT (1UL << 27) /** * Files opened with FUSE_DIRECT_IO do not support MAP_SHARED mmap. This restriction @@ -485,7 +485,7 @@ struct fuse_loop_config_v1 { * ensure coherency between mount points (or network clients) and with kernel page * cache as enforced by mmap that cannot be guaranteed anymore. */ -#define FUSE_CAP_DIRECT_IO_ALLOW_MMAP (1 << 28) +#define FUSE_CAP_DIRECT_IO_ALLOW_MMAP (1UL << 28) /** * Indicates support for passthrough mode access for read/write operations. @@ -497,7 +497,7 @@ struct fuse_loop_config_v1 { * * This feature is disabled by default. */ -#define FUSE_CAP_PASSTHROUGH (1 << 29) +#define FUSE_CAP_PASSTHROUGH (1UL << 29) /** * Indicates that the file system cannot handle NFS export @@ -505,7 +505,12 @@ struct fuse_loop_config_v1 { * If this flag is set NFS export and name_to_handle_at * is not going to work at all and will fail with EOPNOTSUPP. */ -#define FUSE_CAP_NO_EXPORT_SUPPORT (1 << 30) +#define FUSE_CAP_NO_EXPORT_SUPPORT (1UL << 30) + +/** + * Indicates support for io-uring between fuse-server and fuse-client + */ +#define FUSE_CAP_OVER_IO_URING (1UL << 31) /** * Ioctl flags @@ -695,10 +700,16 @@ struct fuse_conn_info { */ uint64_t want_ext; + /** + * Request timeout (in seconds). If the request is not answered by + * this timeout, the connection will be aborted by the kernel. + */ + uint16_t request_timeout; + /** * For future use. */ - uint32_t reserved[16]; + uint16_t reserved[31]; }; struct fuse_session; diff --git a/include/fuse_kernel.h b/include/fuse_kernel.h index d08b99d60..94621f68a 100644 --- a/include/fuse_kernel.h +++ b/include/fuse_kernel.h @@ -217,6 +217,28 @@ * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag * - add FUSE_NO_EXPORT_SUPPORT init flag * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag + * + * 7.41 + * - add FUSE_ALLOW_IDMAP + * 7.42 + * - Add FUSE_OVER_IO_URING and all other io-uring related flags and data + * structures: + * - struct fuse_uring_ent_in_out + * - struct fuse_uring_req_header + * - struct fuse_uring_cmd_req + * - FUSE_URING_IN_OUT_HEADER_SZ + * - FUSE_URING_OP_IN_OUT_SZ + * - enum fuse_uring_cmd + * + * 7.43 + * - add FUSE_REQUEST_TIMEOUT + * + * 7.44 + * - add FUSE_NOTIFY_INC_EPOCH + * + * 7.45 + * - add FUSE_COPY_FILE_RANGE_64 + * - add struct fuse_copy_file_range_out */ #ifndef _LINUX_FUSE_H @@ -252,7 +274,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 40 +#define FUSE_KERNEL_MINOR_VERSION 45 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -421,6 +443,10 @@ struct fuse_file_lock { * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * of the request ID indicates resend requests + * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts + * FUSE_OVER_IO_URING: Indicate that client supports io-uring + * FUSE_REQUEST_TIMEOUT: kernel supports timing out requests. + * init_out.request_timeout contains the timeout (in secs) */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -463,9 +489,11 @@ struct fuse_file_lock { #define FUSE_PASSTHROUGH (1ULL << 37) #define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) #define FUSE_HAS_RESEND (1ULL << 39) - /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP +#define FUSE_ALLOW_IDMAP (1ULL << 40) +#define FUSE_OVER_IO_URING (1ULL << 41) +#define FUSE_REQUEST_TIMEOUT (1ULL << 42) /** * CUSE INIT request/reply flags @@ -633,6 +661,7 @@ enum fuse_opcode { FUSE_SYNCFS = 50, FUSE_TMPFILE = 51, FUSE_STATX = 52, + FUSE_COPY_FILE_RANGE_64 = 53, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -650,6 +679,7 @@ enum fuse_notify_code { FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_RESEND = 7, + FUSE_NOTIFY_INC_EPOCH = 8, FUSE_NOTIFY_CODE_MAX, }; @@ -893,7 +923,8 @@ struct fuse_init_out { uint16_t map_alignment; uint32_t flags2; uint32_t max_stack_depth; - uint32_t unused[6]; + uint16_t request_timeout; + uint16_t unused[11]; }; #define CUSE_INIT_INFO_MAX 4096 @@ -984,6 +1015,21 @@ struct fuse_fallocate_in { */ #define FUSE_UNIQUE_RESEND (1ULL << 63) +/** + * This value will be set by the kernel to + * (struct fuse_in_header).{uid,gid} fields in + * case when: + * - fuse daemon enabled FUSE_ALLOW_IDMAP + * - idmapping information is not available and uid/gid + * can not be mapped in accordance with an idmapping. + * + * Note: an idmapping information always available + * for inode creation operations like: + * FUSE_MKNOD, FUSE_SYMLINK, FUSE_MKDIR, FUSE_TMPFILE, + * FUSE_CREATE and FUSE_RENAME2 (with RENAME_WHITEOUT). + */ +#define FUSE_INVALID_UIDGID ((uint32_t)(-1)) + struct fuse_in_header { uint32_t len; uint32_t opcode; @@ -1107,6 +1153,11 @@ struct fuse_copy_file_range_in { uint64_t flags; }; +/* For FUSE_COPY_FILE_RANGE_64 */ +struct fuse_copy_file_range_out { + uint64_t bytes_copied; +}; + #define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) #define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1) struct fuse_setupmapping_in { @@ -1186,4 +1237,67 @@ struct fuse_supp_groups { uint32_t groups[]; }; +/** + * Size of the ring buffer header + */ +#define FUSE_URING_IN_OUT_HEADER_SZ 128 +#define FUSE_URING_OP_IN_OUT_SZ 128 + +/* Used as part of the fuse_uring_req_header */ +struct fuse_uring_ent_in_out { + uint64_t flags; + + /* + * commit ID to be used in a reply to a ring request (see also + * struct fuse_uring_cmd_req) + */ + uint64_t commit_id; + + /* size of user payload buffer */ + uint32_t payload_sz; + uint32_t padding; + + uint64_t reserved; +}; + +/** + * Header for all fuse-io-uring requests + */ +struct fuse_uring_req_header { + /* struct fuse_in_header / struct fuse_out_header */ + char in_out[FUSE_URING_IN_OUT_HEADER_SZ]; + + /* per op code header */ + char op_in[FUSE_URING_OP_IN_OUT_SZ]; + + struct fuse_uring_ent_in_out ring_ent_in_out; +}; + +/** + * sqe commands to the kernel + */ +enum fuse_uring_cmd { + FUSE_IO_URING_CMD_INVALID = 0, + + /* register the request buffer and fetch a fuse request */ + FUSE_IO_URING_CMD_REGISTER = 1, + + /* commit fuse request result and fetch next request */ + FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2, +}; + +/** + * In the 80B command area of the SQE. + */ +struct fuse_uring_cmd_req { + uint64_t flags; + + /* entry identifier for commits */ + uint64_t commit_id; + + /* queue the command is for (queue index) */ + uint16_t qid; + uint8_t padding[6]; +}; + #endif /* _LINUX_FUSE_H */ diff --git a/include/fuse_log.h b/include/fuse_log.h index c8559572a..7948bab14 100644 --- a/include/fuse_log.h +++ b/include/fuse_log.h @@ -3,7 +3,7 @@ Copyright (C) 2019 Red Hat, Inc. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #ifndef FUSE_LOG_H_ @@ -73,7 +73,8 @@ void fuse_set_log_func(fuse_log_func_t func); * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) * @param fmt sprintf-style format string including newline */ -void fuse_log(enum fuse_log_level level, const char *fmt, ...); +void fuse_log(enum fuse_log_level level, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); /** * Switch default log handler from stderr to syslog diff --git a/include/fuse_lowlevel.h b/include/fuse_lowlevel.h index c7b44d963..6be0295b7 100644 --- a/include/fuse_lowlevel.h +++ b/include/fuse_lowlevel.h @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #ifndef FUSE_LOWLEVEL_H_ @@ -49,6 +49,9 @@ typedef uint64_t fuse_ino_t; /** Request pointer type */ typedef struct fuse_req *fuse_req_t; +/* Forward declaration */ +struct statx; + /** * Session * @@ -194,11 +197,10 @@ enum fuse_notify_entry_flags { * `fuse_session_new()`. In this case, methods will only be called if * the kernel's permission check has succeeded. * - * The filesystem sometimes needs to handle a return value of -ENOENT - * from the reply function, which means, that the request was - * interrupted, and the reply discarded. For example if - * fuse_reply_open() return -ENOENT means, that the release method for - * this file will not be called. + * It is generally not really necessary to check the fuse_reply_* return + * values for errors, as any error in sending a reply indicates an + * unrecoverable problem with the kernel fuse connection, which will also + * terminate the session loop anyway. * * This data structure is ABI sensitive, on adding new functions these need to * be appended at the end of the struct @@ -450,7 +452,7 @@ struct fuse_lowlevel_ops { * * If this request is answered with an error code of ENOSYS, this is * treated as a permanent failure with error code EINVAL, i.e. all - * future bmap requests will fail with EINVAL without being + * future rename requests will fail with EINVAL without being * send to the filesystem process. * * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If @@ -1303,7 +1305,6 @@ struct fuse_lowlevel_ops { void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, struct fuse_file_info *fi); - /** * Create a tempfile * @@ -1325,6 +1326,21 @@ struct fuse_lowlevel_ops { void (*tmpfile) (fuse_req_t req, fuse_ino_t parent, mode_t mode, struct fuse_file_info *fi); + /** + * Get extended file attributes. + * + * Valid replies: + * fuse_reply_statx + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param flags bitmask of requested flags + * @param mask bitmask of requested fields + * @param fi file information (may be NULL) + */ + void (*statx)(fuse_req_t req, fuse_ino_t ino, int flags, int mask, + struct fuse_file_info *fi); }; /** @@ -1705,6 +1721,20 @@ int fuse_reply_poll(fuse_req_t req, unsigned revents); */ int fuse_reply_lseek(fuse_req_t req, off_t off); +/** + * Reply with extended file attributes. + * + * Possible requests: + * statx + * + * @param req request handle + * @param flags statx flags + * @param statx the attributes + * @param attr_timeout validity timeout (in seconds) for the attributes + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_statx(fuse_req_t req, int flags, struct statx *statx, double attr_timeout); + /* ----------------------------------------------------------- * * Notification * * ----------------------------------------------------------- */ @@ -1744,6 +1774,20 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, off_t off, off_t len); +/** + * Notify to increment the epoch for the current + * + * Each fuse connection has an 'epoch', which is initialized during INIT. + * Caching will then be validated against the epoch value: if the current epoch + * is higher than an object being revalidated, the object is invalid. + * + * This function simply increment the current epoch value. + * + * @param se the session object + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_increment_epoch(struct fuse_session *se); + /** * Notify to invalidate parent attributes and the dentry matching parent/name * @@ -2070,6 +2114,8 @@ fuse_session_new_versioned(struct fuse_args *args, * If not all options are known, an error message is written to stderr * and the function returns NULL. * + * To create a no-op session just for mounting pass op as NULL. + * * Option parsing skips argv[0], which is assumed to contain the * program name. To prevent accidentally passing an option in * argv[0], this element must always be present (even if no options @@ -2315,6 +2361,29 @@ void fuse_session_process_buf(struct fuse_session *se, */ int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); +/** + * Check if the request is submitted through fuse-io-uring + */ +bool fuse_req_is_uring(fuse_req_t req); + +/** + * Get the payload of a request + * (for requests submitted through fuse-io-uring only) + * + * This is useful for a file system that wants to write data directly + * to the request buffer. With io-uring the req is the buffer owner + * and the file system can write directly to the buffer and avoid + * extra copying. For example useful for network file systems. + * + * @param req the request + * @param payload pointer to the payload + * @param payload_sz size of the payload + * @param mr memory registration handle, currently unused + * @return 0 on success, -errno on failure + */ +int fuse_req_get_payload(fuse_req_t req, char **payload, size_t *payload_sz, + void **mr); + #ifdef __cplusplus } #endif diff --git a/include/fuse_opt.h b/include/fuse_opt.h index d8573e74f..a9a41fe80 100644 --- a/include/fuse_opt.h +++ b/include/fuse_opt.h @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #ifndef FUSE_OPT_H_ diff --git a/lib/buffer.c b/lib/buffer.c index 6375433ee..2b72d95a0 100644 --- a/lib/buffer.c +++ b/lib/buffer.c @@ -6,7 +6,7 @@ fuse_bufvec`. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #define _GNU_SOURCE diff --git a/lib/compat.c b/lib/compat.c index b98ca4b04..5fac4155c 100644 --- a/lib/compat.c +++ b/lib/compat.c @@ -7,7 +7,7 @@ file system by implementing nothing but the request handlers. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ /* Description: diff --git a/lib/cuse_lowlevel.c b/lib/cuse_lowlevel.c index 5387f8430..0bc2e66be 100644 --- a/lib/cuse_lowlevel.c +++ b/lib/cuse_lowlevel.c @@ -4,7 +4,7 @@ Copyright (C) 2008 Tejun Heo This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #include "fuse_config.h" @@ -192,9 +192,11 @@ static int cuse_reply_init(fuse_req_t req, struct cuse_init_out *arg, return fuse_send_reply_iov_nofree(req, 0, iov, 3); } -void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +void _cuse_lowlevel_init(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *req_payload) { - struct fuse_init_in *arg = (struct fuse_init_in *) inarg; + const struct fuse_init_in *arg = op_in; + (void)req_payload; struct cuse_init_out outarg; struct fuse_session *se = req->se; struct cuse_data *cd = se->cuse_data; @@ -263,6 +265,11 @@ void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_free_req(req); } +void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + _cuse_lowlevel_init(req, nodeid, inarg, NULL); +} + struct fuse_session *cuse_lowlevel_setup(int argc, char *argv[], const struct cuse_info *ci, const struct cuse_lowlevel_ops *clop, diff --git a/lib/fuse.c b/lib/fuse.c index 85914546e..9607bb0d2 100644 --- a/lib/fuse.c +++ b/lib/fuse.c @@ -6,7 +6,7 @@ API. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #define _GNU_SOURCE @@ -1507,6 +1507,29 @@ static void set_stat(struct fuse *f, fuse_ino_t nodeid, struct stat *stbuf) stbuf->st_gid = f->conf.gid; } +#ifdef HAVE_STATX +static void set_statx(struct fuse *f, fuse_ino_t nodeid, struct statx *stxbuf) +{ + if (!f->conf.use_ino) + stxbuf->stx_ino = nodeid; + if (f->conf.set_mode) { + if (f->conf.dmask && S_ISDIR(stxbuf->stx_mode)) + stxbuf->stx_mode = (stxbuf->stx_mode & S_IFMT) | + (0777 & ~f->conf.dmask); + else if (f->conf.fmask) + stxbuf->stx_mode = (stxbuf->stx_mode & S_IFMT) | + (0777 & ~f->conf.fmask); + else + stxbuf->stx_mode = (stxbuf->stx_mode & S_IFMT) | + (0777 & ~f->conf.umask); + } + if (f->conf.set_uid) + stxbuf->stx_uid = f->conf.uid; + if (f->conf.set_gid) + stxbuf->stx_gid = f->conf.gid; +} +#endif + static struct fuse *req_fuse(fuse_req_t req) { return (struct fuse *) fuse_req_userdata(req); @@ -1592,146 +1615,133 @@ int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.getattr) { - if (fs->debug) { - char buf[10]; - fuse_log(FUSE_LOG_DEBUG, "getattr[%s] %s\n", - file_info_string(fi, buf, sizeof(buf)), - path); - } - return fs->op.getattr(path, buf, fi); - } else { + if (!fs->op.getattr) return -ENOSYS; + + if (fs->debug) { + char buf[10]; + + fuse_log(FUSE_LOG_DEBUG, "getattr[%s] %s\n", + file_info_string(fi, buf, sizeof(buf)), + path); } + return fs->op.getattr(path, buf, fi); } int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath, unsigned int flags) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.rename) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "rename %s %s 0x%x\n", oldpath, newpath, - flags); - - return fs->op.rename(oldpath, newpath, flags); - } else { + if (!fs->op.rename) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "rename %s %s 0x%x\n", oldpath, newpath, + flags); + + return fs->op.rename(oldpath, newpath, flags); } int fuse_fs_unlink(struct fuse_fs *fs, const char *path) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.unlink) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "unlink %s\n", path); - - return fs->op.unlink(path); - } else { + if (!fs->op.unlink) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "unlink %s\n", path); + + return fs->op.unlink(path); } int fuse_fs_rmdir(struct fuse_fs *fs, const char *path) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.rmdir) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "rmdir %s\n", path); - - return fs->op.rmdir(path); - } else { + if (!fs->op.rmdir) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "rmdir %s\n", path); + + return fs->op.rmdir(path); } int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.symlink) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "symlink %s %s\n", linkname, path); - - return fs->op.symlink(linkname, path); - } else { + if (!fs->op.symlink) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "symlink %s %s\n", linkname, path); + + return fs->op.symlink(linkname, path); } int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.link) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "link %s %s\n", oldpath, newpath); - - return fs->op.link(oldpath, newpath); - } else { + if (!fs->op.link) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "link %s %s\n", oldpath, newpath); + + return fs->op.link(oldpath, newpath); } int fuse_fs_release(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.release) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "release%s[%llu] flags: 0x%x\n", - fi->flush ? "+flush" : "", - (unsigned long long) fi->fh, fi->flags); - - return fs->op.release(path, fi); - } else { + if (!fs->op.release) return 0; - } + + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "release%s[%llu] flags: 0x%x\n", + fi->flush ? "+flush" : "", + (unsigned long long) fi->fh, fi->flags); + + return fs->op.release(path, fi); } int fuse_fs_opendir(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi) { + int err; + fuse_get_context()->private_data = fs->user_data; - if (fs->op.opendir) { - int err; + if (!fs->op.opendir) + return 0; - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "opendir flags: 0x%x %s\n", fi->flags, - path); + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "opendir flags: 0x%x %s\n", fi->flags, + path); - err = fs->op.opendir(path, fi); + err = fs->op.opendir(path, fi); - if (fs->debug && !err) - fuse_log(FUSE_LOG_DEBUG, " opendir[%llu] flags: 0x%x %s\n", - (unsigned long long) fi->fh, fi->flags, path); + if (fs->debug && !err) + fuse_log(FUSE_LOG_DEBUG, " opendir[%llu] flags: 0x%x %s\n", + (unsigned long long) fi->fh, fi->flags, path); - return err; - } else { - return 0; - } + return err; } int fuse_fs_open(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi) { + int err; + fuse_get_context()->private_data = fs->user_data; - if (fs->op.open) { - int err; + if (!fs->op.open) + return 0; - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "open flags: 0x%x %s\n", fi->flags, - path); + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "open flags: 0x%x %s\n", fi->flags, + path); - err = fs->op.open(path, fi); + err = fs->op.open(path, fi); - if (fs->debug && !err) - fuse_log(FUSE_LOG_DEBUG, " open[%llu] flags: 0x%x %s\n", - (unsigned long long) fi->fh, fi->flags, path); + if (fs->debug && !err) + fuse_log(FUSE_LOG_DEBUG, " open[%llu] flags: 0x%x %s\n", + (unsigned long long) fi->fh, fi->flags, path); - return err; - } else { - return 0; - } + return err; } static void fuse_free_buf(struct fuse_bufvec *buf) @@ -1750,161 +1760,159 @@ int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, struct fuse_bufvec **bufp, size_t size, off_t off, struct fuse_file_info *fi) { - fuse_get_context()->private_data = fs->user_data; - if (fs->op.read || fs->op.read_buf) { - int res; + int res; - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, - "read[%llu] %zu bytes from %llu flags: 0x%x\n", - (unsigned long long) fi->fh, - size, (unsigned long long) off, fi->flags); + fuse_get_context()->private_data = fs->user_data; + if (!fs->op.read && !fs->op.read_buf) + return -ENOSYS; - if (fs->op.read_buf) { - res = fs->op.read_buf(path, bufp, size, off, fi); - } else { - struct fuse_bufvec *buf; - void *mem; + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, + "read[%llu] %zu bytes from %llu flags: 0x%x\n", + (unsigned long long) fi->fh, + size, (unsigned long long) off, fi->flags); - buf = malloc(sizeof(struct fuse_bufvec)); - if (buf == NULL) - return -ENOMEM; + if (fs->op.read_buf) { + res = fs->op.read_buf(path, bufp, size, off, fi); + } else { + struct fuse_bufvec *buf; + void *mem; - mem = malloc(size); - if (mem == NULL) { - free(buf); - return -ENOMEM; - } - *buf = FUSE_BUFVEC_INIT(size); - buf->buf[0].mem = mem; - *bufp = buf; + buf = malloc(sizeof(struct fuse_bufvec)); + if (buf == NULL) + return -ENOMEM; - res = fs->op.read(path, mem, size, off, fi); - if (res >= 0) - buf->buf[0].size = res; + mem = malloc(size); + if (mem == NULL) { + free(buf); + return -ENOMEM; } + *buf = FUSE_BUFVEC_INIT(size); + buf->buf[0].mem = mem; + *bufp = buf; + + res = fs->op.read(path, mem, size, off, fi); + if (res >= 0) + buf->buf[0].size = res; + } - if (fs->debug && res >= 0) - fuse_log(FUSE_LOG_DEBUG, " read[%llu] %zu bytes from %llu\n", - (unsigned long long) fi->fh, - fuse_buf_size(*bufp), - (unsigned long long) off); - if (res >= 0 && fuse_buf_size(*bufp) > size) - fuse_log(FUSE_LOG_ERR, "fuse: read too many bytes\n"); + if (fs->debug && res >= 0) + fuse_log(FUSE_LOG_DEBUG, " read[%llu] %zu bytes from %llu\n", + (unsigned long long) fi->fh, + fuse_buf_size(*bufp), + (unsigned long long) off); + if (res >= 0 && fuse_buf_size(*bufp) > size) + fuse_log(FUSE_LOG_ERR, "fuse: read too many bytes\n"); - if (res < 0) - return res; + if (res < 0) + return res; - return 0; - } else { - return -ENOSYS; - } + return 0; } int fuse_fs_read(struct fuse_fs *fs, const char *path, char *mem, size_t size, off_t off, struct fuse_file_info *fi) { + int res; + fuse_get_context()->private_data = fs->user_data; - if (fs->op.read || fs->op.read_buf) { - int res; + if (!fs->op.read && !fs->op.read_buf) + return -ENOSYS; - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, - "read[%llu] %zu bytes from %llu flags: 0x%x\n", - (unsigned long long) fi->fh, - size, (unsigned long long) off, fi->flags); + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, + "read[%llu] %zu bytes from %llu flags: 0x%x\n", + (unsigned long long) fi->fh, + size, (unsigned long long) off, fi->flags); - if (fs->op.read_buf) { - struct fuse_bufvec *buf = NULL; + if (fs->op.read_buf) { + struct fuse_bufvec *buf = NULL; - res = fs->op.read_buf(path, &buf, size, off, fi); - if (res == 0) { - struct fuse_bufvec dst = FUSE_BUFVEC_INIT(size); + res = fs->op.read_buf(path, &buf, size, off, fi); + if (res == 0) { + struct fuse_bufvec dst = FUSE_BUFVEC_INIT(size); - dst.buf[0].mem = mem; - res = fuse_buf_copy(&dst, buf, 0); - } - fuse_free_buf(buf); - } else { - res = fs->op.read(path, mem, size, off, fi); + dst.buf[0].mem = mem; + res = fuse_buf_copy(&dst, buf, 0); } - - if (fs->debug && res >= 0) - fuse_log(FUSE_LOG_DEBUG, " read[%llu] %u bytes from %llu\n", - (unsigned long long) fi->fh, - res, - (unsigned long long) off); - if (res >= 0 && res > (int) size) - fuse_log(FUSE_LOG_ERR, "fuse: read too many bytes\n"); - - return res; + fuse_free_buf(buf); } else { - return -ENOSYS; + res = fs->op.read(path, mem, size, off, fi); } + + if (fs->debug && res >= 0) + fuse_log(FUSE_LOG_DEBUG, " read[%llu] %u bytes from %llu\n", + (unsigned long long) fi->fh, + res, + (unsigned long long) off); + if (res >= 0 && res > (int) size) + fuse_log(FUSE_LOG_ERR, "fuse: read too many bytes\n"); + + return res; } int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, struct fuse_bufvec *buf, off_t off, struct fuse_file_info *fi) { + int res; + size_t size; + fuse_get_context()->private_data = fs->user_data; - if (fs->op.write_buf || fs->op.write) { - int res; - size_t size = fuse_buf_size(buf); + if (!fs->op.write_buf && !fs->op.write) + return -ENOSYS; - assert(buf->idx == 0 && buf->off == 0); - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, - "write%s[%llu] %zu bytes to %llu flags: 0x%x\n", - fi->writepage ? "page" : "", - (unsigned long long) fi->fh, - size, - (unsigned long long) off, - fi->flags); - - if (fs->op.write_buf) { - res = fs->op.write_buf(path, buf, off, fi); + size = fuse_buf_size(buf); + assert(buf->idx == 0 && buf->off == 0); + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, + "write%s[%llu] %zu bytes to %llu flags: 0x%x\n", + fi->writepage ? "page" : "", + (unsigned long long) fi->fh, + size, + (unsigned long long) off, + fi->flags); + + if (fs->op.write_buf) { + res = fs->op.write_buf(path, buf, off, fi); + } else { + void *mem = NULL; + struct fuse_buf *flatbuf; + struct fuse_bufvec tmp = FUSE_BUFVEC_INIT(size); + + if (buf->count == 1 && + !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { + flatbuf = &buf->buf[0]; } else { - void *mem = NULL; - struct fuse_buf *flatbuf; - struct fuse_bufvec tmp = FUSE_BUFVEC_INIT(size); + res = -ENOMEM; + mem = malloc(size); + if (mem == NULL) + goto out; - if (buf->count == 1 && - !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { - flatbuf = &buf->buf[0]; - } else { - res = -ENOMEM; - mem = malloc(size); - if (mem == NULL) - goto out; - - tmp.buf[0].mem = mem; - res = fuse_buf_copy(&tmp, buf, 0); - if (res <= 0) - goto out_free; - - tmp.buf[0].size = res; - flatbuf = &tmp.buf[0]; - } + tmp.buf[0].mem = mem; + res = fuse_buf_copy(&tmp, buf, 0); + if (res <= 0) + goto out_free; - res = fs->op.write(path, flatbuf->mem, flatbuf->size, - off, fi); -out_free: - free(mem); + tmp.buf[0].size = res; + flatbuf = &tmp.buf[0]; } -out: - if (fs->debug && res >= 0) - fuse_log(FUSE_LOG_DEBUG, " write%s[%llu] %u bytes to %llu\n", - fi->writepage ? "page" : "", - (unsigned long long) fi->fh, res, - (unsigned long long) off); - if (res > (int) size) - fuse_log(FUSE_LOG_ERR, "fuse: wrote too many bytes\n"); - return res; - } else { - return -ENOSYS; + res = fs->op.write(path, flatbuf->mem, flatbuf->size, + off, fi); +out_free: + free(mem); } +out: + if (fs->debug && res >= 0) + fuse_log(FUSE_LOG_DEBUG, " write%s[%llu] %u bytes to %llu\n", + fi->writepage ? "page" : "", + (unsigned long long) fi->fh, res, + (unsigned long long) off); + if (res > (int) size) + fuse_log(FUSE_LOG_ERR, "fuse: wrote too many bytes\n"); + + return res; } int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *mem, @@ -1921,45 +1929,41 @@ int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.fsync) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "fsync[%llu] datasync: %i\n", - (unsigned long long) fi->fh, datasync); - - return fs->op.fsync(path, datasync, fi); - } else { + if (!fs->op.fsync) return -ENOSYS; - } + + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "fsync[%llu] datasync: %i\n", + (unsigned long long) fi->fh, datasync); + + return fs->op.fsync(path, datasync, fi); } int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.fsyncdir) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "fsyncdir[%llu] datasync: %i\n", - (unsigned long long) fi->fh, datasync); - - return fs->op.fsyncdir(path, datasync, fi); - } else { + if (!fs->op.fsyncdir) return -ENOSYS; - } + + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "fsyncdir[%llu] datasync: %i\n", + (unsigned long long) fi->fh, datasync); + + return fs->op.fsyncdir(path, datasync, fi); } int fuse_fs_flush(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.flush) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "flush[%llu]\n", - (unsigned long long) fi->fh); - - return fs->op.flush(path, fi); - } else { + if (!fs->op.flush) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "flush[%llu]\n", + (unsigned long long) fi->fh); + + return fs->op.flush(path, fi); } int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf) @@ -1981,15 +1985,14 @@ int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.releasedir) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "releasedir[%llu] flags: 0x%x\n", - (unsigned long long) fi->fh, fi->flags); + if (!fs->op.releasedir) + return 0; - return fs->op.releasedir(path, fi); - } else { - return 0; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "releasedir[%llu] flags: 0x%x\n", + (unsigned long long) fi->fh, fi->flags); + + return fs->op.releasedir(path, fi); } int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, @@ -1998,273 +2001,247 @@ int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, enum fuse_readdir_flags flags) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.readdir) { - if (fs->debug) { - fuse_log(FUSE_LOG_DEBUG, "readdir%s[%llu] from %llu\n", - (flags & FUSE_READDIR_PLUS) ? "plus" : "", - (unsigned long long) fi->fh, - (unsigned long long) off); - } - - return fs->op.readdir(path, buf, filler, off, fi, flags); - } else { + if (!fs->op.readdir) return -ENOSYS; + if (fs->debug) { + fuse_log(FUSE_LOG_DEBUG, "readdir%s[%llu] from %llu\n", + (flags & FUSE_READDIR_PLUS) ? "plus" : "", + (unsigned long long) fi->fh, + (unsigned long long) off); } + + return fs->op.readdir(path, buf, filler, off, fi, flags); } int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, struct fuse_file_info *fi) { + int err; + fuse_get_context()->private_data = fs->user_data; - if (fs->op.create) { - int err; + if (!fs->op.create) + return -ENOSYS; - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, - "create flags: 0x%x %s 0%o umask=0%03o\n", - fi->flags, path, mode, - fuse_get_context()->umask); + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, + "create flags: 0x%x %s 0%o umask=0%03o\n", + fi->flags, path, mode, + fuse_get_context()->umask); - err = fs->op.create(path, mode, fi); + err = fs->op.create(path, mode, fi); - if (fs->debug && !err) - fuse_log(FUSE_LOG_DEBUG, " create[%llu] flags: 0x%x %s\n", - (unsigned long long) fi->fh, fi->flags, path); + if (fs->debug && !err) + fuse_log(FUSE_LOG_DEBUG, " create[%llu] flags: 0x%x %s\n", + (unsigned long long) fi->fh, fi->flags, path); - return err; - } else { - return -ENOSYS; - } + return err; } int fuse_fs_lock(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi, int cmd, struct flock *lock) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.lock) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "lock[%llu] %s %s start: %llu len: %llu pid: %llu\n", - (unsigned long long) fi->fh, - (cmd == F_GETLK ? "F_GETLK" : - (cmd == F_SETLK ? "F_SETLK" : - (cmd == F_SETLKW ? "F_SETLKW" : "???"))), - (lock->l_type == F_RDLCK ? "F_RDLCK" : - (lock->l_type == F_WRLCK ? "F_WRLCK" : - (lock->l_type == F_UNLCK ? "F_UNLCK" : - "???"))), - (unsigned long long) lock->l_start, - (unsigned long long) lock->l_len, - (unsigned long long) lock->l_pid); - - return fs->op.lock(path, fi, cmd, lock); - } else { + if (!fs->op.lock) return -ENOSYS; - } + + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "lock[%llu] %s %s start: %llu len: %llu pid: %llu\n", + (unsigned long long) fi->fh, + (cmd == F_GETLK ? "F_GETLK" : + (cmd == F_SETLK ? "F_SETLK" : + (cmd == F_SETLKW ? "F_SETLKW" : "???"))), + (lock->l_type == F_RDLCK ? "F_RDLCK" : + (lock->l_type == F_WRLCK ? "F_WRLCK" : + (lock->l_type == F_UNLCK ? "F_UNLCK" : + "???"))), + (unsigned long long) lock->l_start, + (unsigned long long) lock->l_len, + (unsigned long long) lock->l_pid); + + return fs->op.lock(path, fi, cmd, lock); } int fuse_fs_flock(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi, int op) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.flock) { - if (fs->debug) { - int xop = op & ~LOCK_NB; - - fuse_log(FUSE_LOG_DEBUG, "lock[%llu] %s%s\n", - (unsigned long long) fi->fh, - xop == LOCK_SH ? "LOCK_SH" : - (xop == LOCK_EX ? "LOCK_EX" : - (xop == LOCK_UN ? "LOCK_UN" : "???")), - (op & LOCK_NB) ? "|LOCK_NB" : ""); - } - return fs->op.flock(path, fi, op); - } else { + if (!fs->op.flock) return -ENOSYS; + + if (fs->debug) { + int xop = op & ~LOCK_NB; + + fuse_log(FUSE_LOG_DEBUG, "lock[%llu] %s%s\n", + (unsigned long long) fi->fh, + xop == LOCK_SH ? "LOCK_SH" : + (xop == LOCK_EX ? "LOCK_EX" : + (xop == LOCK_UN ? "LOCK_UN" : "???")), + (op & LOCK_NB) ? "|LOCK_NB" : ""); } + return fs->op.flock(path, fi, op); } int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.chown) { - if (fs->debug) { - char buf[10]; - fuse_log(FUSE_LOG_DEBUG, "chown[%s] %s %lu %lu\n", - file_info_string(fi, buf, sizeof(buf)), - path, (unsigned long) uid, (unsigned long) gid); - } - return fs->op.chown(path, uid, gid, fi); - } else { + if (!fs->op.chown) return -ENOSYS; + if (fs->debug) { + char buf[10]; + + fuse_log(FUSE_LOG_DEBUG, "chown[%s] %s %lu %lu\n", + file_info_string(fi, buf, sizeof(buf)), + path, (unsigned long) uid, (unsigned long) gid); } + return fs->op.chown(path, uid, gid, fi); } int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.truncate) { - if (fs->debug) { - char buf[10]; - fuse_log(FUSE_LOG_DEBUG, "truncate[%s] %llu\n", - file_info_string(fi, buf, sizeof(buf)), - (unsigned long long) size); - } - return fs->op.truncate(path, size, fi); - } else { + if (!fs->op.truncate) return -ENOSYS; + if (fs->debug) { + char buf[10]; + + fuse_log(FUSE_LOG_DEBUG, "truncate[%s] %llu\n", + file_info_string(fi, buf, sizeof(buf)), + (unsigned long long) size); } + return fs->op.truncate(path, size, fi); } int fuse_fs_utimens(struct fuse_fs *fs, const char *path, const struct timespec tv[2], struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.utimens) { - if (fs->debug) { - char buf[10]; - fuse_log(FUSE_LOG_DEBUG, "utimens[%s] %s %li.%09lu %li.%09lu\n", - file_info_string(fi, buf, sizeof(buf)), - path, tv[0].tv_sec, tv[0].tv_nsec, - tv[1].tv_sec, tv[1].tv_nsec); - } - return fs->op.utimens(path, tv, fi); - } else { + if (!fs->op.utimens) return -ENOSYS; + if (fs->debug) { + char buf[10]; + + fuse_log(FUSE_LOG_DEBUG, "utimens[%s] %s %jd.%09ld %jd.%09ld\n", + file_info_string(fi, buf, sizeof(buf)), + path, (intmax_t)tv[0].tv_sec, tv[0].tv_nsec, + (intmax_t)tv[1].tv_sec, tv[1].tv_nsec); } + return fs->op.utimens(path, tv, fi); } int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.access) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "access %s 0%o\n", path, mask); - - return fs->op.access(path, mask); - } else { + if (!fs->op.access) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "access %s 0%o\n", path, mask); + + return fs->op.access(path, mask); } int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, size_t len) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.readlink) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "readlink %s %lu\n", path, - (unsigned long) len); - - return fs->op.readlink(path, buf, len); - } else { + if (!fs->op.readlink) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "readlink %s %lu\n", path, + (unsigned long) len); + + return fs->op.readlink(path, buf, len); } int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, dev_t rdev) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.mknod) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "mknod %s 0%o 0x%llx umask=0%03o\n", - path, mode, (unsigned long long) rdev, - fuse_get_context()->umask); - - return fs->op.mknod(path, mode, rdev); - } else { + if (!fs->op.mknod) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "mknod %s 0%o 0x%llx umask=0%03o\n", + path, mode, (unsigned long long) rdev, + fuse_get_context()->umask); + + return fs->op.mknod(path, mode, rdev); } int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.mkdir) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "mkdir %s 0%o umask=0%03o\n", - path, mode, fuse_get_context()->umask); - - return fs->op.mkdir(path, mode); - } else { + if (!fs->op.mkdir) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "mkdir %s 0%o umask=0%03o\n", + path, mode, fuse_get_context()->umask); + + return fs->op.mkdir(path, mode); } int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, const char *value, size_t size, int flags) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.setxattr) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "setxattr %s %s %lu 0x%x\n", - path, name, (unsigned long) size, flags); - - return fs->op.setxattr(path, name, value, size, flags); - } else { + if (!fs->op.setxattr) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "setxattr %s %s %lu 0x%x\n", + path, name, (unsigned long) size, flags); + + return fs->op.setxattr(path, name, value, size, flags); } int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, char *value, size_t size) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.getxattr) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "getxattr %s %s %lu\n", - path, name, (unsigned long) size); - - return fs->op.getxattr(path, name, value, size); - } else { + if (!fs->op.getxattr) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "getxattr %s %s %lu\n", + path, name, (unsigned long) size); + + return fs->op.getxattr(path, name, value, size); } int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, size_t size) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.listxattr) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "listxattr %s %lu\n", - path, (unsigned long) size); - - return fs->op.listxattr(path, list, size); - } else { + if (!fs->op.listxattr) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "listxattr %s %lu\n", + path, (unsigned long) size); + + return fs->op.listxattr(path, list, size); } int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, uint64_t *idx) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.bmap) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "bmap %s blocksize: %lu index: %llu\n", - path, (unsigned long) blocksize, - (unsigned long long) *idx); - - return fs->op.bmap(path, blocksize, idx); - } else { + if (!fs->op.bmap) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "bmap %s blocksize: %lu index: %llu\n", + path, (unsigned long) blocksize, + (unsigned long long) *idx); + + return fs->op.bmap(path, blocksize, idx); } int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.removexattr) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "removexattr %s %s\n", path, name); - - return fs->op.removexattr(path, name); - } else { + if (!fs->op.removexattr) return -ENOSYS; - } + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "removexattr %s %s\n", path, name); + + return fs->op.removexattr(path, name); } int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, @@ -2272,55 +2249,52 @@ int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, void *data) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.ioctl) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "ioctl[%llu] 0x%x flags: 0x%x\n", - (unsigned long long) fi->fh, cmd, flags); - - return fs->op.ioctl(path, cmd, arg, fi, flags, data); - } else + if (!fs->op.ioctl) return -ENOSYS; + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "ioctl[%llu] 0x%x flags: 0x%x\n", + (unsigned long long) fi->fh, cmd, flags); + + return fs->op.ioctl(path, cmd, arg, fi, flags, data); } int fuse_fs_poll(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi, struct fuse_pollhandle *ph, unsigned *reventsp) { - fuse_get_context()->private_data = fs->user_data; - if (fs->op.poll) { - int res; + int res; - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "poll[%llu] ph: %p, events 0x%x\n", - (unsigned long long) fi->fh, ph, - fi->poll_events); + fuse_get_context()->private_data = fs->user_data; + if (!fs->op.poll) + return -ENOSYS; + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "poll[%llu] ph: %p, events 0x%x\n", + (unsigned long long) fi->fh, ph, + fi->poll_events); - res = fs->op.poll(path, fi, ph, reventsp); + res = fs->op.poll(path, fi, ph, reventsp); - if (fs->debug && !res) - fuse_log(FUSE_LOG_DEBUG, " poll[%llu] revents: 0x%x\n", - (unsigned long long) fi->fh, *reventsp); + if (fs->debug && !res) + fuse_log(FUSE_LOG_DEBUG, " poll[%llu] revents: 0x%x\n", + (unsigned long long) fi->fh, *reventsp); - return res; - } else - return -ENOSYS; + return res; } int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, off_t offset, off_t length, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.fallocate) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "fallocate %s mode %x, offset: %llu, length: %llu\n", - path, - mode, - (unsigned long long) offset, - (unsigned long long) length); - - return fs->op.fallocate(path, mode, offset, length, fi); - } else + if (!fs->op.fallocate) return -ENOSYS; + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "fallocate %s mode %x, offset: %llu, length: %llu\n", + path, + mode, + (unsigned long long) offset, + (unsigned long long) length); + + return fs->op.fallocate(path, mode, offset, length, fi); } ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, @@ -2330,38 +2304,69 @@ ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, size_t len, int flags) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.copy_file_range) { - if (fs->debug) - fuse_log(FUSE_LOG_DEBUG, "copy_file_range from %s:%llu to " - "%s:%llu, length: %llu\n", - path_in, - (unsigned long long) off_in, - path_out, - (unsigned long long) off_out, - (unsigned long long) len); - - return fs->op.copy_file_range(path_in, fi_in, off_in, path_out, - fi_out, off_out, len, flags); - } else + if (!fs->op.copy_file_range) return -ENOSYS; + if (fs->debug) + fuse_log(FUSE_LOG_DEBUG, "copy_file_range from %s:%llu to " + "%s:%llu, length: %llu\n", + path_in, + (unsigned long long) off_in, + path_out, + (unsigned long long) off_out, + (unsigned long long) len); + + return fs->op.copy_file_range(path_in, fi_in, off_in, path_out, + fi_out, off_out, len, flags); } off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.lseek) { + if (!fs->op.lseek) + return -ENOSYS; + if (fs->debug) { + char buf[10]; + + fuse_log(FUSE_LOG_DEBUG, "lseek[%s] %llu %d\n", + file_info_string(fi, buf, sizeof(buf)), + (unsigned long long) off, whence); + } + return fs->op.lseek(path, off, whence, fi); +} + +#ifdef HAVE_STATX +int fuse_fs_statx(struct fuse_fs *fs, const char *path, int flags, int mask, + struct statx *stxbuf, struct fuse_file_info *fi) +{ + fuse_get_context()->private_data = fs->user_data; + if (fs->op.statx) { if (fs->debug) { char buf[10]; - fuse_log(FUSE_LOG_DEBUG, "lseek[%s] %llu %d\n", - file_info_string(fi, buf, sizeof(buf)), - (unsigned long long) off, whence); + + fuse_log(FUSE_LOG_DEBUG, "statx[%s] %s %d %d\n", + file_info_string(fi, buf, sizeof(buf)), path, + flags, mask); } - return fs->op.lseek(path, off, whence, fi); - } else { - return -ENOSYS; + return fs->op.statx(path, flags, mask, stxbuf, fi); } + + return -ENOSYS; +} +#else +int fuse_fs_statx(struct fuse_fs *fs, const char *path, int flags, int mask, + struct statx *stxbuf, struct fuse_file_info *fi) +{ + (void)fs; + (void)path; + (void)flags; + (void)mask; + (void)stxbuf; + (void)fi; + + return -ENOSYS; } +#endif static int is_open(struct fuse *f, fuse_ino_t dir, const char *name) { @@ -2654,7 +2659,7 @@ static void fuse_lib_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) { struct fuse *f = req_fuse_prepare(req); - struct fuse_entry_param e; + struct fuse_entry_param e = { .ino = 0 }; /* invalid ino */ char *path; int err; struct node *dot = NULL; @@ -2775,17 +2780,17 @@ int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.chmod) { - if (fs->debug) { - char buf[10]; - fuse_log(FUSE_LOG_DEBUG, "chmod[%s] %s %llo\n", - file_info_string(fi, buf, sizeof(buf)), - path, (unsigned long long) mode); - } - return fs->op.chmod(path, mode, fi); - } - else + if (!fs->op.chmod) return -ENOSYS; + + if (fs->debug) { + char buf[10]; + + fuse_log(FUSE_LOG_DEBUG, "chmod[%s] %s %llo\n", + file_info_string(fi, buf, sizeof(buf)), + path, (unsigned long long) mode); + } + return fs->op.chmod(path, mode, fi); } static void fuse_lib_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, @@ -3563,17 +3568,17 @@ static int fill_dir_plus(void *dh_, const char *name, const struct stat *statp, if (statp && (flags & FUSE_FILL_DIR_PLUS)) { e.attr = *statp; - } else { - e.attr.st_ino = FUSE_UNKNOWN_INO; - if (statp) { - e.attr.st_mode = statp->st_mode; - if (f->conf.use_ino) - e.attr.st_ino = statp->st_ino; - } - if (!f->conf.use_ino && f->conf.readdir_ino) { - e.attr.st_ino = (ino_t) - lookup_nodeid(f, dh->nodeid, name); - } + } + + e.attr.st_ino = FUSE_UNKNOWN_INO; + if (statp) { + e.attr.st_mode = statp->st_mode; + if (f->conf.use_ino) + e.attr.st_ino = statp->st_ino; + } + if (!f->conf.use_ino && f->conf.readdir_ino) { + e.attr.st_ino = (ino_t) + lookup_nodeid(f, dh->nodeid, name); } if (off) { @@ -4412,6 +4417,55 @@ static void fuse_lib_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence reply_err(req, res); } +#ifdef HAVE_STATX +static void fuse_lib_statx(fuse_req_t req, fuse_ino_t ino, int flags, int mask, + struct fuse_file_info *fi) +{ + struct fuse *f = req_fuse_prepare(req); + struct statx stxbuf; + char *path; + int err; + + memset(&stxbuf, 0, sizeof(stxbuf)); + + if (fi != NULL) + err = get_path_nullok(f, ino, &path); + else + err = get_path(f, ino, &path); + + if (!err) { + struct fuse_intr_data d; + + if (!path) + flags |= AT_EMPTY_PATH; + fuse_prepare_interrupt(f, req, &d); + err = fuse_fs_statx(f->fs, path, flags, mask, &stxbuf, fi); + fuse_finish_interrupt(f, req, &d); + free_path(f, ino, path); + } + if (!err) { + struct node *node; + + pthread_mutex_lock(&f->lock); + node = get_node(f, ino); + if (node->is_hidden && stxbuf.stx_nlink > 0) + stxbuf.stx_nlink--; + if (f->conf.auto_cache) { + struct stat stbuf; + + stbuf.st_mtime = stxbuf.stx_mtime.tv_nsec; + ST_MTIM_NSEC(&stbuf) = stxbuf.stx_mtime.tv_nsec; + stbuf.st_size = stxbuf.stx_size; + update_stat(node, &stbuf); + } + pthread_mutex_unlock(&f->lock); + set_statx(f, ino, &stxbuf); + fuse_reply_statx(req, 0, &stxbuf, f->conf.attr_timeout); + } else + reply_err(req, err); +} +#endif + static int clean_delay(struct fuse *f) { /* @@ -4510,6 +4564,9 @@ static struct fuse_lowlevel_ops fuse_path_ops = { .fallocate = fuse_lib_fallocate, .copy_file_range = fuse_lib_copy_file_range, .lseek = fuse_lib_lseek, +#ifdef HAVE_STATX + .statx = fuse_lib_statx, +#endif }; int fuse_notify_poll(struct fuse_pollhandle *ph) @@ -4569,8 +4626,7 @@ static int fuse_session_loop_remember(struct fuse *f) } } - free(fbuf.mem); - fuse_session_reset(se); + fuse_buf_free(&fbuf); return res < 0 ? -1 : 0; } @@ -4892,9 +4948,7 @@ static void *fuse_prune_nodes(void *fuse) struct fuse *f = fuse; int sleep_time; -#ifdef HAVE_PTHREAD_SETNAME_NP - pthread_setname_np(pthread_self(), "fuse_prune_nodes"); -#endif + fuse_set_thread_name("fuse_prune_nodes"); while(1) { sleep_time = fuse_clean_cache(f); @@ -5019,10 +5073,6 @@ struct fuse *_fuse_new_31(struct fuse_args *args, if (f->se == NULL) goto out_free_fs; - if (f->conf.debug) { - fuse_log(FUSE_LOG_DEBUG, "nullpath_ok: %i\n", f->conf.nullpath_ok); - } - /* Trace topmost layer by default */ f->fs->debug = f->conf.debug; f->ctr = 0; diff --git a/lib/fuse_i.h b/lib/fuse_i.h index acf9d5ae2..d35e1e51d 100644 --- a/lib/fuse_i.h +++ b/lib/fuse_i.h @@ -3,13 +3,18 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ +#ifndef LIB_FUSE_I_H_ +#define LIB_FUSE_I_H_ + #include "fuse.h" #include "fuse_lowlevel.h" #include "util.h" +#include +#include #include #include #include @@ -23,6 +28,7 @@ }) struct mount_opts; +struct fuse_ring_pool; struct fuse_req { struct fuse_session *se; @@ -32,7 +38,11 @@ struct fuse_req { struct fuse_ctx ctx; struct fuse_chan *ch; int interrupted; - unsigned int ioctl_64bit : 1; + struct { + unsigned int ioctl_64bit : 1; + unsigned int is_uring : 1; + unsigned int is_copy_file_range_64 : 1; + } flags; union { struct { uint64_t unique; @@ -54,9 +64,14 @@ struct fuse_notify_req { struct fuse_notify_req *prev; }; +struct fuse_session_uring { + bool enable; + unsigned int q_depth; + struct fuse_ring_pool *pool; +}; + struct fuse_session { _Atomic(char *)mountpoint; - volatile int exited; int fd; struct fuse_custom_io *io; struct mount_opts *mo; @@ -79,14 +94,23 @@ struct fuse_session { _Atomic size_t bufsize; int error; - /* This is useful if any kind of ABI incompatibility is found at + /* + * This is useful if any kind of ABI incompatibility is found at * a later version, to 'fix' it at run time. */ struct libfuse_version version; + /* thread synchronization */ + _Atomic bool mt_exited; + pthread_mutex_t mt_lock; + sem_t mt_finish; + /* true if reading requests from /dev/fuse are handled internally */ bool buf_reallocable; + /* io_uring */ + struct fuse_session_uring uring; + /* * conn->want and conn_want_ext options set by libfuse , needed * to correctly convert want to want_ext @@ -196,7 +220,10 @@ int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, int count); void fuse_free_req(fuse_req_t req); +void list_init_req(struct fuse_req *req); +void _cuse_lowlevel_init(fuse_req_t req, const fuse_ino_t nodeid, + const void *req_header, const void *req_payload); void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); @@ -235,3 +262,5 @@ int fuse_loop_cfg_verify(struct fuse_loop_config *config); /* room needed in buffer to accommodate header */ #define FUSE_BUFFER_HEADER_SIZE 0x1000 + +#endif /* LIB_FUSE_I_H_*/ diff --git a/lib/fuse_log.c b/lib/fuse_log.c index c1d16c148..66dcf8a06 100644 --- a/lib/fuse_log.c +++ b/lib/fuse_log.c @@ -5,63 +5,26 @@ Logging API. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #include "fuse_log.h" -#include #include #include #include +#include #define MAX_SYSLOG_LINE_LEN 512 static bool to_syslog = false; -static void default_log_func(__attribute__((unused)) enum fuse_log_level level, - const char *fmt, va_list ap) +static void default_log_func(enum fuse_log_level level, const char *fmt, va_list ap) { - if (to_syslog) { - int sys_log_level = LOG_ERR; - - /* - * with glibc fuse_log_level has identical values as - * syslog levels, but we also support BSD - better we convert to - * be sure. - */ - switch (level) { - case FUSE_LOG_DEBUG: - sys_log_level = LOG_DEBUG; - break; - case FUSE_LOG_INFO: - sys_log_level = LOG_INFO; - break; - case FUSE_LOG_NOTICE: - sys_log_level = LOG_NOTICE; - break; - case FUSE_LOG_WARNING: - sys_log_level = LOG_WARNING; - break; - case FUSE_LOG_ERR: - sys_log_level = LOG_ERR; - break; - case FUSE_LOG_CRIT: - sys_log_level = LOG_CRIT; - break; - case FUSE_LOG_ALERT: - sys_log_level = LOG_ALERT; - break; - case FUSE_LOG_EMERG: - sys_log_level = LOG_EMERG; - } - - char log[MAX_SYSLOG_LINE_LEN]; - vsnprintf(log, MAX_SYSLOG_LINE_LEN, fmt, ap); - syslog(sys_log_level, "%s", log); - } else { + if (to_syslog) + vsyslog(level, fmt, ap); + else vfprintf(stderr, fmt, ap); - } } static fuse_log_func_t log_func = default_log_func; diff --git a/lib/fuse_loop.c b/lib/fuse_loop.c index 410f43fbb..1ff075cbf 100644 --- a/lib/fuse_loop.c +++ b/lib/fuse_loop.c @@ -5,13 +5,13 @@ Implementation of the single-threaded FUSE session loop. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #include "fuse_config.h" #include "fuse_lowlevel.h" #include "fuse_i.h" - +#include "fuse_uring_i.h" #include #include #include @@ -41,6 +41,8 @@ int fuse_session_loop(struct fuse_session *se) res = 0; if(se->error != 0) res = se->error; - fuse_session_reset(se); + + if (se->uring.pool) + fuse_uring_stop(se); return res; } diff --git a/lib/fuse_loop_mt.c b/lib/fuse_loop_mt.c index 01c51ccac..ec4bb0b2b 100644 --- a/lib/fuse_loop_mt.c +++ b/lib/fuse_loop_mt.c @@ -5,7 +5,7 @@ Implementation of the multi-threaded FUSE session loop. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #define _GNU_SOURCE @@ -15,6 +15,7 @@ #include "fuse_misc.h" #include "fuse_kernel.h" #include "fuse_i.h" +#include "fuse_uring_i.h" #include "util.h" #include @@ -53,14 +54,12 @@ struct fuse_worker { struct fuse_mt *mt; }; +/* synchronization via se->mt_lock */ struct fuse_mt { - pthread_mutex_t lock; int numworker; int numavail; struct fuse_session *se; struct fuse_worker main; - sem_t finish; - int exit; int error; int clone_fd; int max_idle; @@ -131,32 +130,30 @@ static void *fuse_do_work(void *data) { struct fuse_worker *w = (struct fuse_worker *) data; struct fuse_mt *mt = w->mt; + struct fuse_session *se = mt->se; -#ifdef HAVE_PTHREAD_SETNAME_NP - pthread_setname_np(pthread_self(), "fuse_worker"); -#endif + fuse_set_thread_name("fuse_worker"); - while (!fuse_session_exited(mt->se)) { + while (!fuse_session_exited(se)) { int isforget = 0; int res; pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - res = fuse_session_receive_buf_internal(mt->se, &w->fbuf, - w->ch); + res = fuse_session_receive_buf_internal(se, &w->fbuf, w->ch); pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); if (res == -EINTR) continue; if (res <= 0) { if (res < 0) { - fuse_session_exit(mt->se); + fuse_session_exit(se); mt->error = res; } break; } - pthread_mutex_lock(&mt->lock); - if (mt->exit) { - pthread_mutex_unlock(&mt->lock); + pthread_mutex_lock(&se->mt_lock); + if (fuse_session_exited(se)) { + pthread_mutex_unlock(&se->mt_lock); return NULL; } @@ -174,13 +171,14 @@ static void *fuse_do_work(void *data) if (!isforget) mt->numavail--; - if (mt->numavail == 0 && mt->numworker < mt->max_threads) + if (mt->numavail == 0 && mt->numworker < mt->max_threads && + likely(se->got_init)) fuse_loop_start_thread(mt); - pthread_mutex_unlock(&mt->lock); + pthread_mutex_unlock(&se->mt_lock); - fuse_session_process_buf_internal(mt->se, &w->fbuf, w->ch); + fuse_session_process_buf_internal(se, &w->fbuf, w->ch); - pthread_mutex_lock(&mt->lock); + pthread_mutex_lock(&se->mt_lock); if (!isforget) mt->numavail++; @@ -191,14 +189,14 @@ static void *fuse_do_work(void *data) * delayed, a moving average might be useful for that. */ if (mt->max_idle != -1 && mt->numavail > mt->max_idle && mt->numworker > 1) { - if (mt->exit) { - pthread_mutex_unlock(&mt->lock); + if (fuse_session_exited(se)) { + pthread_mutex_unlock(&se->mt_lock); return NULL; } list_del_worker(w); mt->numavail--; mt->numworker--; - pthread_mutex_unlock(&mt->lock); + pthread_mutex_unlock(&se->mt_lock); pthread_detach(w->thread_id); fuse_buf_free(&w->fbuf); @@ -206,11 +204,10 @@ static void *fuse_do_work(void *data) free(w); return NULL; } - pthread_mutex_unlock(&mt->lock); + pthread_mutex_unlock(&se->mt_lock); } - sem_post(&mt->finish); - + sem_post(&se->mt_finish); return NULL; } @@ -360,9 +357,9 @@ static int fuse_loop_start_thread(struct fuse_mt *mt) static void fuse_join_worker(struct fuse_mt *mt, struct fuse_worker *w) { pthread_join(w->thread_id, NULL); - pthread_mutex_lock(&mt->lock); + pthread_mutex_lock(&mt->se->mt_lock); list_del_worker(w); - pthread_mutex_unlock(&mt->lock); + pthread_mutex_unlock(&mt->se->mt_lock); fuse_buf_free(&w->fbuf); fuse_chan_put(w->ch); free(w); @@ -398,34 +395,35 @@ int err; mt.max_threads = config->max_threads; mt.main.thread_id = pthread_self(); mt.main.prev = mt.main.next = &mt.main; - sem_init(&mt.finish, 0, 0); - pthread_mutex_init(&mt.lock, NULL); - pthread_mutex_lock(&mt.lock); + pthread_mutex_lock(&se->mt_lock); err = fuse_loop_start_thread(&mt); - pthread_mutex_unlock(&mt.lock); + pthread_mutex_unlock(&se->mt_lock); if (!err) { - /* sem_wait() is interruptible */ while (!fuse_session_exited(se)) - sem_wait(&mt.finish); + sem_wait(&se->mt_finish); + if (se->debug) + fuse_log(FUSE_LOG_DEBUG, + "fuse: session exited, terminating workers\n"); - pthread_mutex_lock(&mt.lock); + pthread_mutex_lock(&se->mt_lock); for (w = mt.main.next; w != &mt.main; w = w->next) pthread_cancel(w->thread_id); - mt.exit = 1; - pthread_mutex_unlock(&mt.lock); + pthread_mutex_unlock(&se->mt_lock); while (mt.main.next != &mt.main) fuse_join_worker(&mt, mt.main.next); err = mt.error; + + if (se->uring.pool) + fuse_uring_stop(se); } - pthread_mutex_destroy(&mt.lock); - sem_destroy(&mt.finish); + pthread_mutex_destroy(&se->mt_lock); if(se->error != 0) err = se->error; - fuse_session_reset(se); + if (created_config) { fuse_loop_cfg_destroy(config); diff --git a/lib/fuse_lowlevel.c b/lib/fuse_lowlevel.c index 2164ba5bf..d420b257b 100644 --- a/lib/fuse_lowlevel.c +++ b/lib/fuse_lowlevel.c @@ -6,7 +6,7 @@ functions are implemented in separate files. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #define _GNU_SOURCE @@ -18,8 +18,12 @@ #include "fuse_misc.h" #include "mount_util.h" #include "util.h" +#include "fuse_uring_i.h" +#include +#include #include +#include #include #include #include @@ -32,6 +36,11 @@ #include #include #include +#include + +#ifdef USDT_ENABLED +#include "usdt.h" +#endif #ifndef F_LINUX_SPECIFIC_BASE #define F_LINUX_SPECIFIC_BASE 1024 @@ -40,7 +49,6 @@ #define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) #endif - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) #define OFFSET_MAX 0x7fffffffffffffffLL @@ -56,6 +64,45 @@ static __attribute__((constructor)) void fuse_ll_init_pagesize(void) pagesize = getpagesize(); } +#ifdef USDT_ENABLED +/* tracepoints */ +static void trace_request_receive(int err) +{ + USDT(libfuse, request_receive, err); +} + +static void trace_request_process(unsigned int opcode, unsigned int unique) +{ + USDT(libfuse, request_process, opcode, unique); +} + +static void trace_request_reply(uint64_t unique, unsigned int len, + int error, int reply_err) +{ + USDT(libfuse, request_reply, unique, len, error, reply_err); +} +#else +static void trace_request_receive(int err) +{ + (void)err; +} + +static void trace_request_process(unsigned int opcode, unsigned int unique) +{ + (void)opcode; + (void)unique; +} + +static void trace_request_reply(uint64_t unique, unsigned int len, + int error, int reply_err) +{ + (void)unique; + (void)len; + (void)error; + (void)reply_err; +} +#endif + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) { attr->ino = stbuf->st_ino; @@ -99,7 +146,7 @@ static size_t iov_length(const struct iovec *iov, size_t count) return ret; } -static void list_init_req(struct fuse_req *req) +void list_init_req(struct fuse_req *req) { req->next = req; req->prev = req; @@ -124,6 +171,10 @@ static void list_add_req(struct fuse_req *req, struct fuse_req *next) static void destroy_req(fuse_req_t req) { + if (req->flags.is_uring) { + fuse_log(FUSE_LOG_ERR, "Refusing to destruct uring req\n"); + return; + } assert(req->ch == NULL); pthread_mutex_destroy(&req->lock); free(req); @@ -134,7 +185,11 @@ void fuse_free_req(fuse_req_t req) int ctr; struct fuse_session *se = req->se; - if (se->conn.no_interrupt) { + /* XXX: for now no support for interrupts with io-uring + * It actually might work already, though. But then would add + * a lock across ring queues. + */ + if (se->conn.no_interrupt || req->flags.is_uring) { ctr = --req->ref_cnt; fuse_chan_put(req->ch); req->ch = NULL; @@ -169,14 +224,47 @@ static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) return req; } -/* Send data. If *ch* is NULL, send via session master fd */ +/* + * Send data to fuse-kernel using an fd of the fuse device. + */ +static int fuse_write_msg_dev(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count) +{ + ssize_t res; + int err; + + if (se->io != NULL) + + /* se->io->writev is never NULL if se->io is not NULL as + * specified by fuse_session_custom_io() + */ + res = se->io->writev(ch ? ch->fd : se->fd, iov, count, + se->userdata); + else + res = writev(ch ? ch->fd : se->fd, iov, count); + + if (res == -1) { + /* ENOENT means the operation was interrupted */ + err = errno; + if (!fuse_session_exited(se) && err != ENOENT) + perror("fuse: writing device"); + return -err; + } + + return 0; +} + static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count) + struct iovec *iov, int count, fuse_req_t req) { struct fuse_out_header *out = iov[0].iov_base; + int err; + bool is_uring = req && req->flags.is_uring ? true : false; - assert(se != NULL); + if (!is_uring) + assert(se != NULL); out->len = iov_length(iov, count); + if (se->debug) { if (out->unique == 0) { fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", @@ -193,28 +281,15 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, } } - ssize_t res; - if (se->io != NULL) - /* se->io->writev is never NULL if se->io is not NULL as - specified by fuse_session_custom_io()*/ - res = se->io->writev(ch ? ch->fd : se->fd, iov, count, - se->userdata); + if (is_uring) + err = fuse_send_msg_uring(req, iov, count); else - res = writev(ch ? ch->fd : se->fd, iov, count); - - int err = errno; + err = fuse_write_msg_dev(se, ch, iov, count); - if (res == -1) { - /* ENOENT means the operation was interrupted */ - if (!fuse_session_exited(se) && err != ENOENT) - perror("fuse: writing device"); - return -err; - } - - return 0; + trace_request_reply(out->unique, out->len, out->error, err); + return err; } - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, int count) { @@ -236,7 +311,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, iov[0].iov_base = &out; iov[0].iov_len = sizeof(struct fuse_out_header); - return fuse_send_msg(req->se, req->ch, iov, count); + return fuse_send_msg(req->se, req->ch, iov, count, req); } static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, @@ -252,6 +327,9 @@ static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, static int send_reply(fuse_req_t req, int error, const void *arg, size_t argsize) { + if (req->flags.is_uring) + return send_reply_uring(req, error, arg, argsize); + struct iovec iov[2]; int count = 1; if (argsize) { @@ -511,7 +589,7 @@ int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) return send_reply_ok(req, &arg, sizeof(arg)); } -int fuse_reply_write(fuse_req_t req, size_t count) +static int do_fuse_reply_write(fuse_req_t req, size_t count) { struct fuse_write_out arg; @@ -521,6 +599,28 @@ int fuse_reply_write(fuse_req_t req, size_t count) return send_reply_ok(req, &arg, sizeof(arg)); } +static int do_fuse_reply_copy(fuse_req_t req, size_t count) +{ + struct fuse_copy_file_range_out arg; + + memset(&arg, 0, sizeof(arg)); + arg.bytes_copied = count; + + return send_reply_ok(req, &arg, sizeof(arg)); +} + +int fuse_reply_write(fuse_req_t req, size_t count) +{ + /* + * This function is also used by FUSE_COPY_FILE_RANGE and its 64-bit + * variant. + */ + if (req->flags.is_copy_file_range_64) + return do_fuse_reply_copy(req, count); + else + return do_fuse_reply_write(req, count); +} + int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) { return send_reply_ok(req, buf, size); @@ -530,7 +630,7 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, struct fuse_chan *ch, struct iovec *iov, int iov_count, struct fuse_bufvec *buf, - size_t len) + size_t len, fuse_req_t req) { struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); void *mbuf; @@ -545,7 +645,7 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, iov[iov_count].iov_base = buf->buf[0].mem; iov[iov_count].iov_len = len; iov_count++; - return fuse_send_msg(se, ch, iov, iov_count); + return fuse_send_msg(se, ch, iov, iov_count, req); } res = posix_memalign(&mbuf, pagesize, len); @@ -563,7 +663,7 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, iov[iov_count].iov_base = mbuf; iov[iov_count].iov_len = len; iov_count++; - res = fuse_send_msg(se, ch, iov, iov_count); + res = fuse_send_msg(se, ch, iov, iov_count, req); free(mbuf); return res; @@ -653,11 +753,15 @@ static int read_back(int fd, char *buf, size_t len) res = read(fd, buf, len); if (res == -1) { - fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); + fuse_log(FUSE_LOG_ERR, + "fuse: internal error: failed to read back from pipe: %s\n", + strerror(errno)); return -EIO; } if (res != len) { - fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); + fuse_log(FUSE_LOG_ERR, + "fuse: internal error: short read back from pipe: %i from %zd\n", + res, len); return -EIO; } return 0; @@ -695,8 +799,9 @@ static int grow_pipe_to_max(int pipefd) } static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int iov_count, - struct fuse_bufvec *buf, unsigned int flags) + struct iovec *iov, int iov_count, + struct fuse_bufvec *buf, unsigned int flags, + fuse_req_t req) { int res; size_t len = fuse_buf_size(buf); @@ -848,7 +953,7 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, iov[iov_count].iov_base = mbuf; iov[iov_count].iov_len = len; iov_count++; - res = fuse_send_msg(se, ch, iov, iov_count); + res = fuse_send_msg(se, ch, iov, iov_count, req); free(mbuf); return res; } @@ -895,17 +1000,18 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, return res; fallback: - return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len, req); } #else static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, struct iovec *iov, int iov_count, - struct fuse_bufvec *buf, unsigned int flags) + struct fuse_bufvec *req_data, unsigned int flags, + fuse_req_t req) { - size_t len = fuse_buf_size(buf); + size_t len = fuse_buf_size(req_data); (void) flags; - return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, req_data, len, req); } #endif @@ -916,13 +1022,16 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, struct fuse_out_header out; int res; + if (req->flags.is_uring) + return fuse_reply_data_uring(req, bufv, flags); + iov[0].iov_base = &out; iov[0].iov_len = sizeof(struct fuse_out_header); out.unique = req->unique; out.error = 0; - res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); + res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags, req); if (res <= 0) { fuse_free_req(req); return res; @@ -1031,7 +1140,7 @@ int fuse_reply_ioctl_retry(fuse_req_t req, } } else { /* Can't handle non-compat 64bit ioctls on 32bit */ - if (sizeof(void *) == 4 && req->ioctl_64bit) { + if (sizeof(void *) == 4 && req->flags.ioctl_64bit) { res = fuse_reply_err(req, EINVAL); goto out; } @@ -1133,9 +1242,39 @@ int fuse_reply_lseek(fuse_req_t req, off_t off) return send_reply_ok(req, &arg, sizeof(arg)); } -static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +#ifdef HAVE_STATX +int fuse_reply_statx(fuse_req_t req, int flags, struct statx *statx, + double attr_timeout) { - char *name = (char *) inarg; + struct fuse_statx_out arg; + + memset(&arg, 0, sizeof(arg)); + arg.flags = flags; + arg.attr_valid = calc_timeout_sec(attr_timeout); + arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); + memcpy(&arg.stat, statx, sizeof(arg.stat)); + + return send_reply_ok(req, &arg, sizeof(arg)); +} +#else +int fuse_reply_statx(fuse_req_t req, int flags, struct statx *statx, + double attr_timeout) +{ + (void)req; + (void)flags; + (void)statx; + (void)attr_timeout; + + return -ENOSYS; +} +#endif + +static void _do_lookup(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)op_in; + + char *name = (char *)in_payload; if (req->se->op.lookup) req->se->op.lookup(req, nodeid, name); @@ -1143,9 +1282,18 @@ static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_lookup(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; + _do_lookup(req, nodeid, NULL, inarg); +} + +static void _do_forget(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + + struct fuse_forget_in *arg = (struct fuse_forget_in *)op_in; if (req->se->op.forget) req->se->op.forget(req, nodeid, arg->nlookup); @@ -1153,21 +1301,27 @@ static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_none(req); } -static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, - const void *inarg) +static void do_forget(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + _do_forget(req, nodeid, inarg, NULL); +} + +static void _do_batch_forget(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_batch_forget_in *arg = (void *) inarg; - struct fuse_forget_one *param = (void *) PARAM(arg); + (void)nodeid; unsigned int i; - (void) nodeid; + const struct fuse_batch_forget_in *arg = op_in; + const struct fuse_forget_one *forgets = in_payload; if (req->se->op.forget_multi) { req->se->op.forget_multi(req, arg->count, - (struct fuse_forget_data *) param); + (struct fuse_forget_data *)in_payload); } else if (req->se->op.forget) { for (i = 0; i < arg->count; i++) { - struct fuse_forget_one *forget = ¶m[i]; + const struct fuse_forget_one *forget = &forgets[i]; struct fuse_req *dummy_req; dummy_req = fuse_ll_alloc_req(req->se); @@ -1187,14 +1341,25 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, } } -static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_batch_forget(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + struct fuse_batch_forget_in *arg = (void *)inarg; + struct fuse_forget_one *param = (void *)PARAM(arg); + + _do_batch_forget(req, nodeid, inarg, param); +} + +static void _do_getattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { + struct fuse_getattr_in *arg = (struct fuse_getattr_in *)op_in; + (void)in_payload; + struct fuse_file_info *fip = NULL; struct fuse_file_info fi; if (req->se->conn.proto_minor >= 9) { - struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; - if (arg->getattr_flags & FUSE_GETATTR_FH) { memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; @@ -1208,9 +1373,18 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_getattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + _do_getattr(req, nodeid, inarg, NULL); +} + +static void _do_setattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; + (void)in_payload; + const struct fuse_setattr_in *arg = op_in; + uint32_t valid = arg->valid; if (req->se->op.setattr) { struct fuse_file_info *fi = NULL; @@ -1219,32 +1393,34 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) memset(&stbuf, 0, sizeof(stbuf)); convert_attr(arg, &stbuf); if (arg->valid & FATTR_FH) { - arg->valid &= ~FATTR_FH; + valid &= ~FATTR_FH; memset(&fi_store, 0, sizeof(fi_store)); fi = &fi_store; fi->fh = arg->fh; } - arg->valid &= - FUSE_SET_ATTR_MODE | - FUSE_SET_ATTR_UID | - FUSE_SET_ATTR_GID | - FUSE_SET_ATTR_SIZE | - FUSE_SET_ATTR_ATIME | - FUSE_SET_ATTR_MTIME | - FUSE_SET_ATTR_KILL_SUID | - FUSE_SET_ATTR_KILL_SGID | - FUSE_SET_ATTR_ATIME_NOW | - FUSE_SET_ATTR_MTIME_NOW | - FUSE_SET_ATTR_CTIME; - - req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); + valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID | + FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE | + FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME | + FUSE_SET_ATTR_KILL_SUID | FUSE_SET_ATTR_KILL_SGID | + FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW | + FUSE_SET_ATTR_CTIME; + + req->se->op.setattr(req, nodeid, &stbuf, valid, fi); } else fuse_reply_err(req, ENOSYS); } -static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_setattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_access_in *arg = (struct fuse_access_in *) inarg; + _do_setattr(req, nodeid, inarg, NULL); +} + +static void _do_access(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + const struct fuse_access_in *arg = op_in; if (req->se->op.access) req->se->op.access(req, nodeid, arg->mask); @@ -1252,9 +1428,17 @@ static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_access(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - (void) inarg; + _do_access(req, nodeid, inarg, NULL); +} + +static void _do_readlink(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)op_in; + (void)in_payload; if (req->se->op.readlink) req->se->op.readlink(req, nodeid); @@ -1262,15 +1446,20 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_readlink(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; - char *name = PARAM(arg); + _do_readlink(req, nodeid, inarg, NULL); +} + +static void _do_mknod(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + const struct fuse_mknod_in *arg = (struct fuse_mknod_in *)op_in; + const char *name = in_payload; if (req->se->conn.proto_minor >= 12) req->ctx.umask = arg->umask; - else - name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; if (req->se->op.mknod) req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); @@ -1278,22 +1467,45 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_mknod(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) { - struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; + struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; + char *name = PARAM(arg); + + if (req->se->conn.proto_minor < 12) + name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; + + _do_mknod(req, nodeid, inarg, name); +} + +static void _do_mkdir(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + const char *name = in_payload; + const struct fuse_mkdir_in *arg = op_in; if (req->se->conn.proto_minor >= 12) req->ctx.umask = arg->umask; if (req->se->op.mkdir) - req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); + req->se->op.mkdir(req, nodeid, name, arg->mode); else fuse_reply_err(req, ENOSYS); } -static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_mkdir(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) { - char *name = (char *) inarg; + const struct fuse_mkdir_in *arg = inarg; + const char *name = PARAM(arg); + + _do_mkdir(req, nodeid, inarg, name); +} + +static void _do_unlink(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)op_in; + const char *name = in_payload; if (req->se->op.unlink) req->se->op.unlink(req, nodeid, name); @@ -1301,9 +1513,17 @@ static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_unlink(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - char *name = (char *) inarg; + _do_unlink(req, nodeid, NULL, inarg); +} + +static void _do_rmdir(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)op_in; + const char *name = in_payload; if (req->se->op.rmdir) req->se->op.rmdir(req, nodeid, name); @@ -1311,10 +1531,17 @@ static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_rmdir(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + _do_rmdir(req, nodeid, NULL, inarg); +} + +static void _do_symlink(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - char *name = (char *) inarg; - char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; + (void)op_in; + const char *name = (char *)in_payload; + const char *linkname = name + strlen(name) + 1; if (req->se->op.symlink) req->se->op.symlink(req, linkname, nodeid, name); @@ -1322,45 +1549,63 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_symlink(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + _do_symlink(req, nodeid, NULL, inarg); +} + +static void _do_rename(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; - char *oldname = PARAM(arg); - char *newname = oldname + strlen(oldname) + 1; + const struct fuse_rename_in *arg = (struct fuse_rename_in *)op_in; + const char *oldname = in_payload; + const char *newname = oldname + strlen(oldname) + 1; if (req->se->op.rename) req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, - 0); + 0); else fuse_reply_err(req, ENOSYS); } -static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_rename(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; - char *oldname = PARAM(arg); - char *newname = oldname + strlen(oldname) + 1; + const struct fuse_rename_in *arg = inarg; + const void *payload = PARAM(arg); + + _do_rename(req, nodeid, arg, payload); +} + +static void _do_rename2(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + const struct fuse_rename2_in *arg = op_in; + const char *oldname = in_payload; + const char *newname = oldname + strlen(oldname) + 1; if (req->se->op.rename) req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, - arg->flags); + arg->flags); else fuse_reply_err(req, ENOSYS); } -static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_rename2(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_link_in *arg = (struct fuse_link_in *) inarg; + const struct fuse_rename2_in *arg = inarg; + const void *payload = PARAM(arg); - if (req->se->op.link) - req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); - else - fuse_reply_err(req, ENOSYS); + _do_rename2(req, nodeid, arg, payload); } -static void do_tmpfile(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_tmpfile(fuse_req_t req, fuse_ino_t nodeid, const void *op_in, + const void *in_payload) { - struct fuse_create_in *arg = (struct fuse_create_in *) inarg; + (void)in_payload; + const struct fuse_create_in *arg = op_in; if (req->se->op.tmpfile) { struct fuse_file_info fi; @@ -1376,35 +1621,79 @@ static void do_tmpfile(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_tmpfile(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) { struct fuse_create_in *arg = (struct fuse_create_in *) inarg; + _do_tmpfile(req, nodeid, arg, NULL); +} + +static void _do_link(fuse_req_t req, const fuse_ino_t nodeid, const void *op_in, + const void *in_payload) +{ + struct fuse_link_in *arg = (struct fuse_link_in *)op_in; + + if (req->se->op.link) + req->se->op.link(req, arg->oldnodeid, nodeid, in_payload); + else + fuse_reply_err(req, ENOSYS); +} + +static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + const struct fuse_link_in *arg = inarg; + const void *name = PARAM(arg); + + _do_link(req, nodeid, inarg, name); +} + +static void _do_create(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + const struct fuse_create_in *arg = op_in; + const char *name = in_payload; + if (req->se->op.create) { struct fuse_file_info fi; - char *name = PARAM(arg); memset(&fi, 0, sizeof(fi)); fi.flags = arg->flags; if (req->se->conn.proto_minor >= 12) req->ctx.umask = arg->umask; - else - name = (char *) inarg + sizeof(struct fuse_open_in); + + /* XXX: fuse_create_in::open_flags */ req->se->op.create(req, nodeid, name, arg->mode, &fi); - } else + } else { fuse_reply_err(req, ENOSYS); + } +} + +static void do_create(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + const struct fuse_create_in *arg = (struct fuse_create_in *)inarg; + void *payload = PARAM(arg); + + if (req->se->conn.proto_minor < 12) + payload = (char *)inarg + sizeof(struct fuse_open_in); + + _do_create(req, nodeid, arg, payload); } -static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_open(fuse_req_t req, const fuse_ino_t nodeid, const void *op_in, + const void *in_payload) { - struct fuse_open_in *arg = (struct fuse_open_in *) inarg; + (void)in_payload; + struct fuse_open_in *arg = (struct fuse_open_in *)op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); fi.flags = arg->flags; + /* XXX: fuse_open_in::open_flags */ + if (req->se->op.open) req->se->op.open(req, nodeid, &fi); else if (req->se->conn.want_ext & FUSE_CAP_NO_OPEN_SUPPORT) @@ -1413,9 +1702,16 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_open(req, &fi); } -static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_open(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + _do_open(req, nodeid, inarg, NULL); +} + +static void _do_read(fuse_req_t req, const fuse_ino_t nodeid, const void *op_in, + const void *in_payload) { - struct fuse_read_in *arg = (struct fuse_read_in *) inarg; + (void)in_payload; + struct fuse_read_in *arg = (struct fuse_read_in *)op_in; if (req->se->op.read) { struct fuse_file_info fi; @@ -1431,68 +1727,97 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_read(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + _do_read(req, nodeid, inarg, NULL); +} + +static void _do_write(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_write_in *arg = (struct fuse_write_in *) inarg; + struct fuse_write_in *arg = (struct fuse_write_in *)op_in; + const char *buf = in_payload; struct fuse_file_info fi; - char *param; memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; - if (req->se->conn.proto_minor < 9) { - param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; - } else { + if (req->se->conn.proto_minor >= 9) { fi.lock_owner = arg->lock_owner; fi.flags = arg->flags; - param = PARAM(arg); } if (req->se->op.write) - req->se->op.write(req, nodeid, param, arg->size, - arg->offset, &fi); + req->se->op.write(req, nodeid, buf, arg->size, arg->offset, + &fi); else fuse_reply_err(req, ENOSYS); } -static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - const struct fuse_buf *ibuf) +static void do_write(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + struct fuse_write_in *arg = (struct fuse_write_in *)inarg; + const void *payload; + + if (req->se->conn.proto_minor < 9) + payload = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; + else + payload = PARAM(arg); + + _do_write(req, nodeid, arg, payload); +} + +static void _do_write_buf(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, struct fuse_bufvec *bufv) { struct fuse_session *se = req->se; - struct fuse_bufvec bufv = { - .buf[0] = *ibuf, - .count = 1, - }; - struct fuse_write_in *arg = (struct fuse_write_in *) inarg; + struct fuse_write_in *arg = (struct fuse_write_in *)op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + if (se->conn.proto_minor >= 9) { + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; + } + + se->op.write_buf(req, nodeid, bufv, arg->offset, &fi); +} + +static void do_write_buf(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg, const struct fuse_buf *ibuf) +{ + struct fuse_session *se = req->se; + struct fuse_bufvec bufv = { + .buf[0] = *ibuf, + .count = 1, + }; + struct fuse_write_in *arg = (struct fuse_write_in *)inarg; + if (se->conn.proto_minor < 9) { - bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; + bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; bufv.buf[0].size -= sizeof(struct fuse_in_header) + - FUSE_COMPAT_WRITE_IN_SIZE; + FUSE_COMPAT_WRITE_IN_SIZE; assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); } else { - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) bufv.buf[0].mem = PARAM(arg); bufv.buf[0].size -= sizeof(struct fuse_in_header) + - sizeof(struct fuse_write_in); + sizeof(struct fuse_write_in); } if (bufv.buf[0].size < arg->size) { - fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); + fuse_log(FUSE_LOG_ERR, + "fuse: %s: buffer size too small\n", __func__); fuse_reply_err(req, EIO); goto out; } bufv.buf[0].size = arg->size; - se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); + _do_write_buf(req, nodeid, inarg, &bufv); out: /* Need to reset the pipe if ->write_buf() didn't consume all data */ @@ -1500,9 +1825,11 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, fuse_ll_clear_pipe(se); } -static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_flush(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; + (void)in_payload; + struct fuse_flush_in *arg = (struct fuse_flush_in *)op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); @@ -1517,9 +1844,16 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_flush(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + _do_flush(req, nodeid, inarg, NULL); +} + +static void _do_release(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_release_in *arg = (struct fuse_release_in *) inarg; + (void)in_payload; + const struct fuse_release_in *arg = op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); @@ -1540,9 +1874,17 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, 0); } -static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_release(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; + _do_release(req, nodeid, inarg, NULL); +} + +static void _do_fsync(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + const struct fuse_fsync_in *arg = op_in; struct fuse_file_info fi; int datasync = arg->fsync_flags & 1; @@ -1555,13 +1897,21 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_fsync(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + _do_fsync(req, nodeid, inarg, NULL); +} + +static void _do_opendir(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_open_in *arg = (struct fuse_open_in *) inarg; + (void)in_payload; + const struct fuse_open_in *arg = op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); fi.flags = arg->flags; + /* XXX: fuse_open_in::open_flags */ if (req->se->op.opendir) req->se->op.opendir(req, nodeid, &fi); @@ -1571,9 +1921,17 @@ static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_open(req, &fi); } -static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_opendir(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + _do_opendir(req, nodeid, inarg, NULL); +} + +static void _do_readdir(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_read_in *arg = (struct fuse_read_in *) inarg; + (void)in_payload; + struct fuse_read_in *arg = (struct fuse_read_in *)op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); @@ -1585,9 +1943,17 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_readdir(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_read_in *arg = (struct fuse_read_in *) inarg; + _do_readdir(req, nodeid, inarg, NULL); +} + +static void _do_readdirplus(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + struct fuse_read_in *arg = (struct fuse_read_in *)op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); @@ -1599,9 +1965,17 @@ static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } -static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_readdirplus(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_release_in *arg = (struct fuse_release_in *) inarg; + _do_readdirplus(req, nodeid, inarg, NULL); +} + +static void _do_releasedir(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + struct fuse_release_in *arg = (struct fuse_release_in *)op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); @@ -1614,9 +1988,17 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, 0); } -static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_releasedir(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; + _do_releasedir(req, nodeid, inarg, NULL); +} + +static void _do_fsyncdir(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + struct fuse_fsync_in *arg = (struct fuse_fsync_in *)op_in; struct fuse_file_info fi; int datasync = arg->fsync_flags & 1; @@ -1628,11 +2010,18 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) else fuse_reply_err(req, ENOSYS); } +static void do_fsyncdir(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + _do_fsyncdir(req, nodeid, inarg, NULL); +} -static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_statfs(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { (void) nodeid; - (void) inarg; + (void)op_in; + (void)in_payload; if (req->se->op.statfs) req->se->op.statfs(req, nodeid); @@ -1644,55 +2033,93 @@ static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_statfs(req, &buf); } } +static void do_statfs(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + _do_statfs(req, nodeid, inarg, NULL); +} -static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_setxattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_session *se = req->se; - unsigned int xattr_ext = !!(se->conn.want_ext & FUSE_CAP_SETXATTR_EXT); - struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; - char *name = xattr_ext ? PARAM(arg) : - (char *)arg + FUSE_COMPAT_SETXATTR_IN_SIZE; - char *value = name + strlen(name) + 1; + struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)op_in; + const char *name = in_payload; + const char *value = name + strlen(name) + 1; /* XXX:The API should be extended to support extra_flags/setxattr_flags */ + if (req->se->op.setxattr) req->se->op.setxattr(req, nodeid, name, value, arg->size, - arg->flags); + arg->flags); else fuse_reply_err(req, ENOSYS); } +static void do_setxattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + struct fuse_session *se = req->se; + unsigned int xattr_ext = !!(se->conn.want & FUSE_CAP_SETXATTR_EXT); + const struct fuse_setxattr_in *arg = inarg; + char *payload = xattr_ext ? PARAM(arg) : + (char *)arg + FUSE_COMPAT_SETXATTR_IN_SIZE; + + _do_setxattr(req, nodeid, arg, payload); +} -static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_getxattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; + const struct fuse_getxattr_in *arg = op_in; if (req->se->op.getxattr) - req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); + req->se->op.getxattr(req, nodeid, in_payload, arg->size); else fuse_reply_err(req, ENOSYS); } -static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_getxattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) { - struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; + const struct fuse_getxattr_in *arg = inarg; + const void *payload = PARAM(arg); + + _do_getxattr(req, nodeid, arg, payload); +} + +static void _do_listxattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg, const void *in_payload) +{ + (void)in_payload; + const struct fuse_getxattr_in *arg = inarg; if (req->se->op.listxattr) req->se->op.listxattr(req, nodeid, arg->size); else fuse_reply_err(req, ENOSYS); } +static void do_listxattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + _do_listxattr(req, nodeid, inarg, NULL); +} -static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_removexattr(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg, const void *in_payload) { - char *name = (char *) inarg; + (void)inarg; + const char *name = in_payload; if (req->se->op.removexattr) req->se->op.removexattr(req, nodeid, name); else fuse_reply_err(req, ENOSYS); } +static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + _do_removexattr(req, nodeid, NULL, inarg); +} -static void convert_fuse_file_lock(struct fuse_file_lock *fl, +static void convert_fuse_file_lock(const struct fuse_file_lock *fl, struct flock *flock) { memset(flock, 0, sizeof(struct flock)); @@ -1706,9 +2133,11 @@ static void convert_fuse_file_lock(struct fuse_file_lock *fl, flock->l_pid = fl->pid; } -static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_getlk(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; + (void)in_payload; + const struct fuse_lk_in *arg = op_in; struct fuse_file_info fi; struct flock flock; @@ -1722,11 +2151,15 @@ static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) else fuse_reply_err(req, ENOSYS); } +static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + _do_getlk(req, nodeid, inarg, NULL); +} -static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, - const void *inarg, int sleep) +static void do_setlk_common(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, int sleep) { - struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; + const struct fuse_lk_in *arg = op_in; struct fuse_file_info fi; struct flock flock; @@ -1764,14 +2197,27 @@ static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, } } -static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_setlk(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - do_setlk_common(req, nodeid, inarg, 0); + (void)in_payload; + do_setlk_common(req, nodeid, op_in, 0); } +static void do_setlk(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + _do_setlk(req, nodeid, inarg, NULL); +} + +static void _do_setlkw(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + do_setlk_common(req, nodeid, op_in, 1); +} static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) { - do_setlk_common(req, nodeid, inarg, 1); + _do_setlkw(req, nodeid, inarg, NULL); } static int find_interrupted(struct fuse_session *se, struct fuse_req *req) @@ -1814,9 +2260,11 @@ static int find_interrupted(struct fuse_session *se, struct fuse_req *req) return 0; } -static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_interrupt(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; + (void)in_payload; + const struct fuse_interrupt_in *arg = op_in; struct fuse_session *se = req->se; (void) nodeid; @@ -1835,6 +2283,10 @@ static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) list_add_req(req, &se->interrupts); pthread_mutex_unlock(&se->lock); } +static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + _do_interrupt(req, nodeid, inarg, NULL); +} static struct fuse_req *check_interrupt(struct fuse_session *se, struct fuse_req *req) @@ -1861,21 +2313,28 @@ static struct fuse_req *check_interrupt(struct fuse_session *se, return NULL; } -static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_bmap(fuse_req_t req, const fuse_ino_t nodeid, const void *op_in, + const void *in_payload) { - struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; + (void)in_payload; + const struct fuse_bmap_in *arg = op_in; if (req->se->op.bmap) req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); else fuse_reply_err(req, ENOSYS); } +static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + _do_bmap(req, nodeid, inarg, NULL); +} -static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_ioctl(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; + struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)op_in; unsigned int flags = arg->flags; - void *in_buf = arg->in_size ? PARAM(arg) : NULL; + const void *in_buf = in_payload; struct fuse_file_info fi; if (flags & FUSE_IOCTL_DIR && @@ -1889,7 +2348,7 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && !(flags & FUSE_IOCTL_32BIT)) { - req->ioctl_64bit = 1; + req->flags.ioctl_64bit = 1; } if (req->se->op.ioctl) @@ -1899,15 +2358,24 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) else fuse_reply_err(req, ENOSYS); } +static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + const struct fuse_ioctl_in *arg = inarg; + void *in_buf = arg->in_size ? PARAM(arg) : NULL; + + _do_ioctl(req, nodeid, arg, in_buf); +} void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) { free(ph); } -static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_poll(fuse_req_t req, const fuse_ino_t nodeid, const void *op_in, + const void *in_payload) { - struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; + (void)in_payload; + struct fuse_poll_in *arg = (struct fuse_poll_in *)op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); @@ -1933,23 +2401,37 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) } } -static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_poll(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + _do_poll(req, nodeid, inarg, NULL); +} + +static void _do_fallocate(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; + (void)in_payload; + const struct fuse_fallocate_in *arg = op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; if (req->se->op.fallocate) - req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); + req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, + arg->length, &fi); else fuse_reply_err(req, ENOSYS); } -static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) +static void do_fallocate(fuse_req_t req, const fuse_ino_t nodeid, + const void *inarg) +{ + _do_fallocate(req, nodeid, inarg, NULL); +} + +static void copy_file_range_common(fuse_req_t req, const fuse_ino_t nodeid_in, + const struct fuse_copy_file_range_in *arg) { - struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; struct fuse_file_info fi_in, fi_out; memset(&fi_in, 0, sizeof(fi_in)); @@ -1958,19 +2440,63 @@ static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void memset(&fi_out, 0, sizeof(fi_out)); fi_out.fh = arg->fh_out; - if (req->se->op.copy_file_range) - req->se->op.copy_file_range(req, nodeid_in, arg->off_in, - &fi_in, arg->nodeid_out, - arg->off_out, &fi_out, arg->len, - arg->flags); + req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in, + arg->nodeid_out, arg->off_out, + &fi_out, arg->len, arg->flags); else fuse_reply_err(req, ENOSYS); } -static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void _do_copy_file_range(fuse_req_t req, const fuse_ino_t nodeid_in, + const void *op_in, const void *in_payload) +{ + const struct fuse_copy_file_range_in *arg = op_in; + struct fuse_copy_file_range_in arg_tmp; + + (void) in_payload; + /* fuse_write_out can only handle 32bit copy size */ + if (arg->len > 0xfffff000) { + arg_tmp = *arg; + arg_tmp.len = 0xfffff000; + arg = &arg_tmp; + } + copy_file_range_common(req, nodeid_in, arg); +} + +static void do_copy_file_range(fuse_req_t req, const fuse_ino_t nodeid_in, + const void *inarg) +{ + _do_copy_file_range(req, nodeid_in, inarg, NULL); +} + +static void _do_copy_file_range_64(fuse_req_t req, const fuse_ino_t nodeid_in, + const void *op_in, const void *in_payload) +{ + (void) in_payload; + req->flags.is_copy_file_range_64 = 1; + /* Limit size on 32bit userspace to avoid conversion overflow */ + if (sizeof(size_t) == 4) + _do_copy_file_range(req, nodeid_in, op_in, NULL); + else + copy_file_range_common(req, nodeid_in, op_in); +} + +static void do_copy_file_range_64(fuse_req_t req, const fuse_ino_t nodeid_in, + const void *inarg) +{ + _do_copy_file_range_64(req, nodeid_in, inarg, NULL); +} + +/* + * Note that the uint64_t offset in struct fuse_lseek_in is derived from + * linux kernel loff_t and is therefore signed. + */ +static void _do_lseek(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { - struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; + (void)in_payload; + const struct fuse_lseek_in *arg = op_in; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); @@ -1982,13 +2508,54 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_err(req, ENOSYS); } +static void do_lseek(fuse_req_t req, const fuse_ino_t nodeid, const void *inarg) +{ + _do_lseek(req, nodeid, inarg, NULL); +} + +#ifdef HAVE_STATX +static void _do_statx(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + const struct fuse_statx_in *arg = op_in; + struct fuse_file_info *fip = NULL; + struct fuse_file_info fi; + + if (arg->getattr_flags & FUSE_GETATTR_FH) { + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fip = &fi; + } + + if (req->se->op.statx) + req->se->op.statx(req, nodeid, arg->sx_flags, arg->sx_mask, fip); + else + fuse_reply_err(req, ENOSYS); +} +#else +static void _do_statx(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) +{ + (void)in_payload; + (void)req; + (void)nodeid; + (void)op_in; +} +#endif + +static void do_statx(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + _do_statx(req, nodeid, inarg, NULL); +} + static bool want_flags_valid(uint64_t capable, uint64_t want) { uint64_t unknown_flags = want & (~capable); if (unknown_flags != 0) { fuse_log(FUSE_LOG_ERR, - "fuse: unknown connection 'want' flags: 0x%08lx\n", - unknown_flags); + "fuse: unknown connection 'want' flags: 0x%08llx\n", + (unsigned long long)unknown_flags); return false; } return true; @@ -2014,10 +2581,12 @@ int fuse_convert_to_conn_want_ext(struct fuse_conn_info *conn) fuse_lower_32_bits(conn->want_ext) != conn->want) { if (conn->want_ext != se->conn_want_ext) { fuse_log(FUSE_LOG_ERR, - "%s: Both conn->want_ext and conn->want are set.\n" - "want=%x, want_ext=%lx, se->want=%lx se->want_ext=%lx\n", - __func__, conn->want, conn->want_ext, - se->conn_want, se->conn_want_ext); + "%s: Both conn->want_ext and conn->want are set.\n" + "want=%x want_ext=%llx, se->want=%x se->want_ext=%llx\n", + __func__, conn->want, + (unsigned long long)conn->want_ext, + se->conn_want, + (unsigned long long)se->conn_want_ext); return -EINVAL; } @@ -2064,13 +2633,14 @@ bool fuse_get_feature_flag(struct fuse_conn_info *conn, return conn->capable_ext & flag ? true : false; } - /* Prevent bogus data races (bogus since "init" is called before * multi-threading becomes relevant */ -static __attribute__((no_sanitize("thread"))) -void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static __attribute__((no_sanitize("thread"))) void +_do_init(fuse_req_t req, const fuse_ino_t nodeid, const void *op_in, + const void *in_payload) { - struct fuse_init_in *arg = (struct fuse_init_in *) inarg; + (void)in_payload; + const struct fuse_init_in *arg = op_in; struct fuse_init_out outarg; struct fuse_session *se = req->se; size_t bufsize = se->bufsize; @@ -2079,6 +2649,8 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) uint64_t outargflags = 0; bool buf_reallocable = se->buf_reallocable; (void) nodeid; + bool enable_io_uring = false; + if (se->debug) { fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); if (arg->major == 7 && arg->minor >= 6) { @@ -2172,6 +2744,9 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) se->conn.capable_ext |= FUSE_CAP_PASSTHROUGH; if (inargflags & FUSE_NO_EXPORT_SUPPORT) se->conn.capable_ext |= FUSE_CAP_NO_EXPORT_SUPPORT; + if (inargflags & FUSE_OVER_IO_URING) + se->conn.capable_ext |= FUSE_CAP_OVER_IO_URING; + } else { se->conn.max_readahead = 0; } @@ -2195,7 +2770,7 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) /* Default settings for modern filesystems. * * Most of these capabilities were disabled by default in - * libfuse2 for backwards compatibility reasons. In libfused, + * libfuse2 for backwards compatibility reasons. In libfuse3, * we can finally enable them by default (as long as they're * supported by the kernel). */ @@ -2215,6 +2790,7 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, FUSE_CAP_READDIRPLUS_AUTO); + LL_SET_DEFAULT(1, FUSE_CAP_OVER_IO_URING); /* This could safely become default, but libfuse needs an API extension * to support it @@ -2223,7 +2799,6 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) se->conn.time_gran = 1; - se->got_init = 1; if (se->op.init) { // Apply the first 32 bits of capable_ext to capable se->conn.capable = fuse_lower_32_bits(se->conn.capable_ext); @@ -2325,13 +2900,15 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) } if (se->conn.want_ext & FUSE_CAP_NO_EXPORT_SUPPORT) outargflags |= FUSE_NO_EXPORT_SUPPORT; - - if (inargflags & FUSE_INIT_EXT) { - outargflags |= FUSE_INIT_EXT; - outarg.flags2 = outargflags >> 32; + if (se->uring.enable && se->conn.want_ext & FUSE_CAP_OVER_IO_URING) { + outargflags |= FUSE_OVER_IO_URING; + enable_io_uring = true; } - outarg.flags = outargflags; + if ((inargflags & FUSE_REQUEST_TIMEOUT) && se->conn.request_timeout) { + outargflags |= FUSE_REQUEST_TIMEOUT; + outarg.request_timeout = se->conn.request_timeout; + } outarg.max_readahead = se->conn.max_readahead; outarg.max_write = se->conn.max_write; @@ -2372,16 +2949,52 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) else if (arg->minor < 23) outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; + /* XXX: Add an option to make non-available io-uring fatal */ + if (enable_io_uring) { + int ring_rc = fuse_uring_start(se); + + if (ring_rc != 0) { + fuse_log(FUSE_LOG_INFO, + "fuse: failed to start io-uring: %s\n", + strerror(ring_rc)); + outargflags &= ~FUSE_OVER_IO_URING; + enable_io_uring = false; + } + } + + if (inargflags & FUSE_INIT_EXT) { + outargflags |= FUSE_INIT_EXT; + outarg.flags2 = outargflags >> 32; + } + outarg.flags = outargflags; + + /* + * Has to be set before replying, as new kernel requests might + * immediately arrive and got_init is used for op-code sanity. + * Especially with external handlers, where we have no control + * over the thread scheduling. + */ + se->got_init = 1; send_reply_ok(req, &outarg, outargsize); + if (enable_io_uring) + fuse_uring_wake_ring_threads(se); } -static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static __attribute__((no_sanitize("thread"))) void +do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + _do_init(req, nodeid, inarg, NULL); +} + +static void _do_destroy(fuse_req_t req, const fuse_ino_t nodeid, + const void *op_in, const void *in_payload) { struct fuse_session *se = req->se; char *mountpoint; (void) nodeid; - (void) inarg; + (void)op_in; + (void)in_payload; mountpoint = atomic_exchange(&se->mountpoint, NULL); free(mountpoint); @@ -2394,6 +3007,11 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) send_reply_ok(req, NULL, 0); } +static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + _do_destroy(req, nodeid, inarg, NULL); +} + static void list_del_nreq(struct fuse_notify_req *nreq) { struct fuse_notify_req *prev = nreq->prev; @@ -2443,6 +3061,7 @@ static int send_notify_iov(struct fuse_session *se, int notify_code, struct iovec *iov, int count) { struct fuse_out_header out; + struct fuse_req *req = NULL; if (!se->got_init) return -ENOTCONN; @@ -2452,7 +3071,7 @@ static int send_notify_iov(struct fuse_session *se, int notify_code, iov[0].iov_base = &out; iov[0].iov_len = sizeof(struct fuse_out_header); - return fuse_send_msg(se, NULL, iov, count); + return fuse_send_msg(se, NULL, iov, count, req); } int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) @@ -2494,6 +3113,19 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); } +int fuse_lowlevel_notify_increment_epoch(struct fuse_session *se) +{ + struct iovec iov[1]; + + if (!se) + return -EINVAL; + + if (se->conn.proto_minor < 44) + return -ENOSYS; + + return send_notify_iov(se, FUSE_NOTIFY_INC_EPOCH, iov, 1); +} + /** * Notify parent attributes and the dentry matching parent/name * @@ -2594,6 +3226,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, struct iovec iov[3]; size_t size = fuse_buf_size(bufv); int res; + struct fuse_req *req = NULL; if (!se) return -EINVAL; @@ -2614,7 +3247,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, iov[1].iov_base = &outarg; iov[1].iov_len = sizeof(outarg); - res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); + res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags, req); if (res > 0) res = -res; @@ -2744,8 +3377,25 @@ int fuse_req_interrupted(fuse_req_t req) return interrupted; } +bool fuse_req_is_uring(fuse_req_t req) +{ + return req->flags.is_uring; +} + +#ifndef HAVE_URING +int fuse_req_get_payload(fuse_req_t req, char **payload, size_t *payload_sz, + void **mr) +{ + (void)req; + (void)payload; + (void)payload_sz; + (void)mr; + return -ENOTSUP; +} +#endif + static struct { - void (*func)(fuse_req_t, fuse_ino_t, const void *); + void (*func)(fuse_req_t req, const fuse_ino_t node, const void *arg); const char *name; } fuse_ll_ops[] = { [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, @@ -2781,7 +3431,7 @@ static struct { [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, [FUSE_ACCESS] = { do_access, "ACCESS" }, [FUSE_CREATE] = { do_create, "CREATE" }, - [FUSE_TMPFILE] = { do_tmpfile, "TMPFILE" }, + [FUSE_TMPFILE] = { do_tmpfile, "TMPFILE" }, [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, [FUSE_BMAP] = { do_bmap, "BMAP" }, [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, @@ -2793,10 +3443,68 @@ static struct { [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, [FUSE_RENAME2] = { do_rename2, "RENAME2" }, [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, + [FUSE_COPY_FILE_RANGE_64] = { do_copy_file_range_64, "COPY_FILE_RANGE_64" }, [FUSE_LSEEK] = { do_lseek, "LSEEK" }, + [FUSE_STATX] = { do_statx, "STATX" }, [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, }; +static struct { + void (*func)(fuse_req_t req, const fuse_ino_t ino, const void *op_in, + const void *op_payload); + const char *name; +} fuse_ll_ops2[] __attribute__((unused)) = { + [FUSE_LOOKUP] = { _do_lookup, "LOOKUP" }, + [FUSE_FORGET] = { _do_forget, "FORGET" }, + [FUSE_GETATTR] = { _do_getattr, "GETATTR" }, + [FUSE_SETATTR] = { _do_setattr, "SETATTR" }, + [FUSE_READLINK] = { _do_readlink, "READLINK" }, + [FUSE_SYMLINK] = { _do_symlink, "SYMLINK" }, + [FUSE_MKNOD] = { _do_mknod, "MKNOD" }, + [FUSE_MKDIR] = { _do_mkdir, "MKDIR" }, + [FUSE_UNLINK] = { _do_unlink, "UNLINK" }, + [FUSE_RMDIR] = { _do_rmdir, "RMDIR" }, + [FUSE_RENAME] = { _do_rename, "RENAME" }, + [FUSE_LINK] = { _do_link, "LINK" }, + [FUSE_OPEN] = { _do_open, "OPEN" }, + [FUSE_READ] = { _do_read, "READ" }, + [FUSE_WRITE] = { _do_write, "WRITE" }, + [FUSE_STATFS] = { _do_statfs, "STATFS" }, + [FUSE_RELEASE] = { _do_release, "RELEASE" }, + [FUSE_FSYNC] = { _do_fsync, "FSYNC" }, + [FUSE_SETXATTR] = { _do_setxattr, "SETXATTR" }, + [FUSE_GETXATTR] = { _do_getxattr, "GETXATTR" }, + [FUSE_LISTXATTR] = { _do_listxattr, "LISTXATTR" }, + [FUSE_REMOVEXATTR] = { _do_removexattr, "REMOVEXATTR" }, + [FUSE_FLUSH] = { _do_flush, "FLUSH" }, + [FUSE_INIT] = { _do_init, "INIT" }, + [FUSE_OPENDIR] = { _do_opendir, "OPENDIR" }, + [FUSE_READDIR] = { _do_readdir, "READDIR" }, + [FUSE_RELEASEDIR] = { _do_releasedir, "RELEASEDIR" }, + [FUSE_FSYNCDIR] = { _do_fsyncdir, "FSYNCDIR" }, + [FUSE_GETLK] = { _do_getlk, "GETLK" }, + [FUSE_SETLK] = { _do_setlk, "SETLK" }, + [FUSE_SETLKW] = { _do_setlkw, "SETLKW" }, + [FUSE_ACCESS] = { _do_access, "ACCESS" }, + [FUSE_CREATE] = { _do_create, "CREATE" }, + [FUSE_TMPFILE] = { _do_tmpfile, "TMPFILE" }, + [FUSE_INTERRUPT] = { _do_interrupt, "INTERRUPT" }, + [FUSE_BMAP] = { _do_bmap, "BMAP" }, + [FUSE_IOCTL] = { _do_ioctl, "IOCTL" }, + [FUSE_POLL] = { _do_poll, "POLL" }, + [FUSE_FALLOCATE] = { _do_fallocate, "FALLOCATE" }, + [FUSE_DESTROY] = { _do_destroy, "DESTROY" }, + [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, + [FUSE_BATCH_FORGET] = { _do_batch_forget, "BATCH_FORGET" }, + [FUSE_READDIRPLUS] = { _do_readdirplus, "READDIRPLUS" }, + [FUSE_RENAME2] = { _do_rename2, "RENAME2" }, + [FUSE_COPY_FILE_RANGE] = { _do_copy_file_range, "COPY_FILE_RANGE" }, + [FUSE_COPY_FILE_RANGE_64] = { _do_copy_file_range_64, "COPY_FILE_RANGE_64" }, + [FUSE_LSEEK] = { _do_lseek, "LSEEK" }, + [FUSE_STATX] = { _do_statx, "STATX" }, + [CUSE_INIT] = { _cuse_lowlevel_init, "CUSE_INIT" }, +}; + /* * For ABI compatibility we cannot allow higher values than CUSE_INIT. * Without ABI compatibility we could use the size of the array. @@ -2804,6 +3512,58 @@ static struct { */ #define FUSE_MAXOP (CUSE_INIT + 1) + +/** + * + * @return 0 if sanity is ok, error otherwise + */ +static inline int +fuse_req_opcode_sanity_ok(struct fuse_session *se, enum fuse_opcode in_op) +{ + int err = EIO; + + if (!se->got_init) { + enum fuse_opcode expected; + + expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; + if (in_op != expected) + return err; + } else if (in_op == FUSE_INIT || in_op == CUSE_INIT) + return err; + + return 0; +} + +static inline void +fuse_session_in2req(struct fuse_req *req, struct fuse_in_header *in) +{ + req->unique = in->unique; + req->ctx.uid = in->uid; + req->ctx.gid = in->gid; + req->ctx.pid = in->pid; +} + +/** + * Implement -o allow_root + */ +static inline int +fuse_req_check_allow_root(struct fuse_session *se, enum fuse_opcode in_op, + uid_t in_uid) +{ + int err = EACCES; + + if (se->deny_others && in_uid != se->owner && in_uid != 0 && + in_op != FUSE_INIT && in_op != FUSE_READ && + in_op != FUSE_WRITE && in_op != FUSE_FSYNC && + in_op != FUSE_RELEASE && in_op != FUSE_READDIR && + in_op != FUSE_FSYNCDIR && in_op != FUSE_RELEASEDIR && + in_op != FUSE_NOTIFY_REPLY && + in_op != FUSE_READDIRPLUS) + return err; + + return 0; +} + static const char *opname(enum fuse_opcode opcode) { if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) @@ -2868,9 +3628,11 @@ void fuse_session_process_buf_internal(struct fuse_session *se, in = buf->mem; } + trace_request_process(in->opcode, in->unique); + if (se->debug) { fuse_log(FUSE_LOG_DEBUG, - "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", + "dev unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", (unsigned long long) in->unique, opname((enum fuse_opcode) in->opcode), in->opcode, (unsigned long long) in->nodeid, buf->size, in->pid); @@ -2887,35 +3649,19 @@ void fuse_session_process_buf_internal(struct fuse_session *se, .iov_len = sizeof(struct fuse_out_header), }; - fuse_send_msg(se, ch, &iov, 1); + fuse_send_msg(se, ch, &iov, 1, NULL); goto clear_pipe; } - req->unique = in->unique; - req->ctx.uid = in->uid; - req->ctx.gid = in->gid; - req->ctx.pid = in->pid; + fuse_session_in2req(req, in); req->ch = ch ? fuse_chan_get(ch) : NULL; - err = EIO; - if (!se->got_init) { - enum fuse_opcode expected; - - expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; - if (in->opcode != expected) - goto reply_err; - } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) + err = fuse_req_opcode_sanity_ok(se, in->opcode); + if (err) goto reply_err; - err = EACCES; - /* Implement -o allow_root */ - if (se->deny_others && in->uid != se->owner && in->uid != 0 && - in->opcode != FUSE_INIT && in->opcode != FUSE_READ && - in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && - in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && - in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && - in->opcode != FUSE_NOTIFY_REPLY && - in->opcode != FUSE_READDIRPLUS) + err = fuse_req_check_allow_root(se, in->opcode, in->uid); + if (err) goto reply_err; err = ENOSYS; @@ -2979,6 +3725,59 @@ void fuse_session_process_buf_internal(struct fuse_session *se, goto out_free; } +void fuse_session_process_uring_cqe(struct fuse_session *se, + struct fuse_req *req, + struct fuse_in_header *in, void *op_in, + void *op_payload, size_t payload_len) +{ + int err; + + fuse_session_in2req(req, in); + + err = fuse_req_opcode_sanity_ok(se, in->opcode); + if (err) + goto reply_err; + + err = fuse_req_check_allow_root(se, in->opcode, in->uid); + if (err) + goto reply_err; + + err = ENOSYS; + if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) + goto reply_err; + + if (se->debug) { + fuse_log( + FUSE_LOG_DEBUG, + "cqe unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", + (unsigned long long)in->unique, + opname((enum fuse_opcode)in->opcode), in->opcode, + (unsigned long long)in->nodeid, payload_len, in->pid); + } + + if (in->opcode == FUSE_WRITE && se->op.write_buf) { + struct fuse_bufvec bufv = { + .buf[0] = { .size = payload_len, + .flags = 0, + .mem = op_payload }, + .count = 1, + }; + _do_write_buf(req, in->nodeid, op_in, &bufv); + } else if (in->opcode == FUSE_NOTIFY_REPLY) { + struct fuse_buf buf = { .size = payload_len, + .mem = op_payload }; + do_notify_reply(req, in->nodeid, op_in, &buf); + } else { + fuse_ll_ops2[in->opcode].func(req, in->nodeid, op_in, + op_payload); + } + + return; + +reply_err: + fuse_reply_err(req, err); +} + #define LL_OPTION(n,o,v) \ { n, offsetof(struct fuse_session, o), v } @@ -2987,6 +3786,8 @@ static const struct fuse_opt fuse_ll_opts[] = { LL_OPTION("-d", debug, 1), LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), + LL_OPTION("io_uring", uring.enable, 1), + LL_OPTION("io_uring_q_depth=%u", uring.q_depth, -1), FUSE_OPT_END }; @@ -3004,7 +3805,10 @@ void fuse_lowlevel_help(void) printf( " -o allow_other allow access by all users\n" " -o allow_root allow access by root\n" -" -o auto_unmount auto unmount on process termination\n"); +" -o auto_unmount auto unmount on process termination\n" +" -o io_uring enable io-uring\n" +" -o io_uring_q_depth= io-uring queue depth\n" +); } void fuse_session_destroy(struct fuse_session *se) @@ -3019,6 +3823,8 @@ void fuse_session_destroy(struct fuse_session *se) if (llp != NULL) fuse_ll_pipe_free(llp); pthread_key_delete(se->pipe_key); + sem_destroy(&se->mt_finish); + pthread_mutex_destroy(&se->mt_lock); pthread_mutex_destroy(&se->lock); free(se->cuse_data); if (se->fd != -1) @@ -3126,6 +3932,7 @@ static int _fuse_session_receive_buf(struct fuse_session *se, bufsize, 0); } err = errno; + trace_request_receive(err); if (fuse_session_exited(se)) return 0; @@ -3234,6 +4041,7 @@ static int _fuse_session_receive_buf(struct fuse_session *se, res = read(ch ? ch->fd : se->fd, buf->mem, bufsize); } err = errno; + trace_request_receive(err); if (fuse_session_exited(se)) return 0; @@ -3311,12 +4119,23 @@ fuse_session_new_versioned(struct fuse_args *args, struct fuse_session *se; struct mount_opts *mo; + if (op == NULL || op_size == 0) { + fuse_log(FUSE_LOG_ERR, + "fuse: warning: empty op list passed to fuse_session_new()\n"); + return NULL; + } + + if (version == NULL) { + fuse_log(FUSE_LOG_ERR, "fuse: warning: version not passed to fuse_session_new()\n"); + return NULL; + } + if (sizeof(struct fuse_lowlevel_ops) < op_size) { fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); op_size = sizeof(struct fuse_lowlevel_ops); } - if (args->argc == 0) { + if (args == NULL || args->argc == 0) { fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); return NULL; } @@ -3331,6 +4150,17 @@ fuse_session_new_versioned(struct fuse_args *args, se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; se->conn.max_readahead = UINT_MAX; + /* + * Allow overriding with env, mostly to avoid the need to modify + * all tests. I.e. to test with and without io-uring being enabled. + */ + se->uring.enable = getenv("FUSE_URING_ENABLE") ? + atoi(getenv("FUSE_URING_ENABLE")) : + SESSION_DEF_URING_ENABLE; + se->uring.q_depth = getenv("FUSE_URING_QUEUE_DEPTH") ? + atoi(getenv("FUSE_URING_QUEUE_DEPTH")) : + SESSION_DEF_URING_Q_DEPTH; + /* Parse options */ if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) goto out2; @@ -3369,6 +4199,8 @@ fuse_session_new_versioned(struct fuse_args *args, list_init_nreq(&se->notify_list); se->notify_ctr = 1; pthread_mutex_init(&se->lock, NULL); + sem_init(&se->mt_finish, 0, 0); + pthread_mutex_init(&se->mt_lock, NULL); err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); if (err) { @@ -3393,6 +4225,8 @@ fuse_session_new_versioned(struct fuse_args *args, return se; out5: + sem_destroy(&se->mt_finish); + pthread_mutex_destroy(&se->mt_lock); pthread_mutex_destroy(&se->lock); out4: fuse_opt_free_args(args); @@ -3413,9 +4247,22 @@ struct fuse_session *fuse_session_new_30(struct fuse_args *args, size_t op_size, void *userdata) { + struct fuse_lowlevel_ops null_ops = { 0 }; + /* unknown version */ struct libfuse_version version = { 0 }; + /* + * This function is the ABI interface function from fuse_session_new in + * compat.c. External libraries like "fuser" might call fuse_session_new() + * with NULL ops and then pass that session to fuse_session_mount(). + * The actual FUSE operations are handled in their own library. + */ + if (op == NULL) { + op = &null_ops; + op_size = sizeof(null_ops); + } + return fuse_session_new_versioned(args, op, op_size, &version, userdata); } @@ -3623,18 +4470,22 @@ int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) __attribute__((no_sanitize_thread)) void fuse_session_exit(struct fuse_session *se) { - se->exited = 1; + atomic_store_explicit(&se->mt_exited, 1, memory_order_relaxed); + sem_post(&se->mt_finish); } __attribute__((no_sanitize_thread)) void fuse_session_reset(struct fuse_session *se) { - se->exited = 0; + se->mt_exited = false; se->error = 0; } __attribute__((no_sanitize_thread)) int fuse_session_exited(struct fuse_session *se) { - return se->exited; + bool exited = + atomic_load_explicit(&se->mt_exited, memory_order_relaxed); + + return exited ? 1 : 0; } diff --git a/lib/fuse_misc.h b/lib/fuse_misc.h index 855edc326..1452593c3 100644 --- a/lib/fuse_misc.h +++ b/lib/fuse_misc.h @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #include diff --git a/lib/fuse_opt.c b/lib/fuse_opt.c index 1d3b6a1a5..046e57718 100644 --- a/lib/fuse_opt.c +++ b/lib/fuse_opt.c @@ -6,7 +6,7 @@ fuse_args`). This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #include "fuse_config.h" diff --git a/lib/fuse_signals.c b/lib/fuse_signals.c index b88278e42..03b0d8f11 100644 --- a/lib/fuse_signals.c +++ b/lib/fuse_signals.c @@ -5,7 +5,7 @@ Utility functions for setting signal handlers. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #include "fuse_config.h" @@ -22,7 +22,12 @@ #include #endif +/* + * Must not handle SIGCANCEL, as that is used to wake up threads from + * syscalls reading requests from /dev/fuse + */ static int teardown_sigs[] = { SIGHUP, SIGINT, SIGTERM }; + static int ignore_sigs[] = { SIGPIPE}; static int fail_sigs[] = { SIGILL, SIGTRAP, SIGABRT, SIGBUS, SIGFPE, SIGSEGV }; static struct fuse_session *fuse_instance; @@ -55,8 +60,14 @@ static void dump_stack(void) static void exit_handler(int sig) { - if (fuse_instance == NULL) + if (fuse_instance == NULL) { + fuse_log(FUSE_LOG_ERR, "fuse_instance is NULL\n"); return; + } + + if (fuse_instance->debug) + fuse_log(FUSE_LOG_ERR, "exit_handler called with sig %d\n", + sig); fuse_session_exit(fuse_instance); diff --git a/lib/fuse_uring.c b/lib/fuse_uring.c new file mode 100644 index 000000000..c3127b7e5 --- /dev/null +++ b/lib/fuse_uring.c @@ -0,0 +1,954 @@ +/* + * FUSE: Filesystem in Userspace + * Copyright (C) 2025 Bernd Schubert + * + * Implementation of (most of) FUSE-over-io-uring. + * + * This program can be distributed under the terms of the GNU LGPLv2. + * See the file LGPL2.txt + */ + +#define _GNU_SOURCE + +#include "fuse_i.h" +#include "fuse_kernel.h" +#include "fuse_uring_i.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Size of command data area in SQE when IORING_SETUP_SQE128 is used */ +#define FUSE_URING_MAX_SQE128_CMD_DATA 80 + +struct fuse_ring_ent { + struct fuse_ring_queue *ring_queue; /* back pointer */ + struct fuse_req req; + + struct fuse_uring_req_header *req_header; + void *op_payload; + size_t req_payload_sz; + + /* commit id of a fuse request */ + uint64_t req_commit_id; + + enum fuse_uring_cmd last_cmd; + + /* header and payload */ + struct iovec iov[2]; +}; + +struct fuse_ring_queue { + /* back pointer */ + struct fuse_ring_pool *ring_pool; + int qid; + int numa_node; + pthread_t tid; + int eventfd; + size_t req_header_sz; + struct io_uring ring; + + pthread_mutex_t ring_lock; + bool cqe_processing; + + /* size depends on queue depth */ + struct fuse_ring_ent ent[]; +}; + +/** + * Main fuse_ring structure, holds all fuse-ring data + */ +struct fuse_ring_pool { + struct fuse_session *se; + + /* number of queues */ + size_t nr_queues; + + /* number of per queue entries */ + size_t queue_depth; + + /* max payload size for fuse requests*/ + size_t max_req_payload_sz; + + /* size of a single queue */ + size_t queue_mem_size; + + unsigned int started_threads; + unsigned int failed_threads; + + /* Avoid sending queue entries before FUSE_INIT reply*/ + sem_t init_sem; + + pthread_cond_t thread_start_cond; + pthread_mutex_t thread_start_mutex; + + /* pointer to the first queue */ + struct fuse_ring_queue *queues; +}; + +static size_t +fuse_ring_queue_size(const size_t q_depth) +{ + const size_t req_size = sizeof(struct fuse_ring_ent) * q_depth; + + return sizeof(struct fuse_ring_queue) + req_size; +} + +static struct fuse_ring_queue * +fuse_uring_get_queue(struct fuse_ring_pool *fuse_ring, int qid) +{ + void *ptr = + ((char *)fuse_ring->queues) + (qid * fuse_ring->queue_mem_size); + + return ptr; +} + +/** + * return a pointer to the 80B area + */ +static void *fuse_uring_get_sqe_cmd(struct io_uring_sqe *sqe) +{ + return (void *)&sqe->cmd[0]; +} + +static void fuse_uring_sqe_set_req_data(struct fuse_uring_cmd_req *req, + const unsigned int qid, + const uint64_t commit_id) +{ + req->qid = qid; + req->commit_id = commit_id; + req->flags = 0; +} + +static void +fuse_uring_sqe_prepare(struct io_uring_sqe *sqe, struct fuse_ring_ent *req, + __u32 cmd_op) +{ + /* These fields should be written once, never change */ + sqe->opcode = IORING_OP_URING_CMD; + + /* + * IOSQE_FIXED_FILE: fd is the index to the fd *array* + * given to io_uring_register_files() + */ + sqe->flags = IOSQE_FIXED_FILE; + sqe->fd = 0; + + sqe->rw_flags = 0; + sqe->ioprio = 0; + sqe->off = 0; + + io_uring_sqe_set_data(sqe, req); + + sqe->cmd_op = cmd_op; + sqe->__pad1 = 0; +} + +static int fuse_uring_commit_sqe(struct fuse_ring_pool *ring_pool, + struct fuse_ring_queue *queue, + struct fuse_ring_ent *ring_ent) +{ + bool locked = false; + struct fuse_session *se = ring_pool->se; + struct fuse_uring_req_header *rrh = ring_ent->req_header; + struct fuse_out_header *out = (struct fuse_out_header *)&rrh->in_out; + struct fuse_uring_ent_in_out *ent_in_out = + (struct fuse_uring_ent_in_out *)&rrh->ring_ent_in_out; + struct io_uring_sqe *sqe; + + if (pthread_self() != queue->tid) { + pthread_mutex_lock(&queue->ring_lock); + locked = true; + } + + sqe = io_uring_get_sqe(&queue->ring); + + if (sqe == NULL) { + /* This is an impossible condition, unless there is a bug. + * The kernel sent back an SQEs, which is assigned to a request. + * There is no way to get out of SQEs, as the number of + * SQEs matches the number tof requests. + */ + + se->error = -EIO; + fuse_log(FUSE_LOG_ERR, "Failed to get a ring SQEs\n"); + + return -EIO; + } + + ring_ent->last_cmd = FUSE_IO_URING_CMD_COMMIT_AND_FETCH; + fuse_uring_sqe_prepare(sqe, ring_ent, ring_ent->last_cmd); + fuse_uring_sqe_set_req_data(fuse_uring_get_sqe_cmd(sqe), queue->qid, + ring_ent->req_commit_id); + + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, " unique: %" PRIu64 ", result=%d\n", + out->unique, ent_in_out->payload_sz); + } + + if (!queue->cqe_processing) + io_uring_submit(&queue->ring); + + if (locked) + pthread_mutex_unlock(&queue->ring_lock); + + return 0; +} + +int fuse_req_get_payload(fuse_req_t req, char **payload, size_t *payload_sz, + void **mr) +{ + struct fuse_ring_ent *ring_ent; + + /* Not possible without io-uring interface */ + if (!req->flags.is_uring) + return -EINVAL; + + ring_ent = container_of(req, struct fuse_ring_ent, req); + + *payload = ring_ent->op_payload; + *payload_sz = ring_ent->req_payload_sz; + + /* + * For now unused, but will be used later when the application can + * allocate the buffers itself and register them for rdma. + */ + if (mr) + *mr = NULL; + + return 0; +} + +int send_reply_uring(fuse_req_t req, int error, const void *arg, size_t argsize) +{ + int res; + struct fuse_ring_ent *ring_ent = + container_of(req, struct fuse_ring_ent, req); + struct fuse_uring_req_header *rrh = ring_ent->req_header; + struct fuse_out_header *out = (struct fuse_out_header *)&rrh->in_out; + struct fuse_uring_ent_in_out *ent_in_out = + (struct fuse_uring_ent_in_out *)&rrh->ring_ent_in_out; + + struct fuse_ring_queue *queue = ring_ent->ring_queue; + struct fuse_ring_pool *ring_pool = queue->ring_pool; + size_t max_payload_sz = ring_pool->max_req_payload_sz; + + if (argsize > max_payload_sz) { + fuse_log(FUSE_LOG_ERR, "argsize %zu exceeds buffer size %zu", + argsize, max_payload_sz); + error = -EINVAL; + } else if (argsize) { + if (arg != ring_ent->op_payload) + memcpy(ring_ent->op_payload, arg, argsize); + } + ent_in_out->payload_sz = argsize; + + out->error = error; + out->unique = req->unique; + + res = fuse_uring_commit_sqe(ring_pool, queue, ring_ent); + + fuse_free_req(req); + + return res; +} + +int fuse_reply_data_uring(fuse_req_t req, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags) +{ + struct fuse_ring_ent *ring_ent = + container_of(req, struct fuse_ring_ent, req); + + struct fuse_ring_queue *queue = ring_ent->ring_queue; + struct fuse_ring_pool *ring_pool = queue->ring_pool; + struct fuse_uring_req_header *rrh = ring_ent->req_header; + struct fuse_out_header *out = (struct fuse_out_header *)&rrh->in_out; + struct fuse_uring_ent_in_out *ent_in_out = + (struct fuse_uring_ent_in_out *)&rrh->ring_ent_in_out; + size_t max_payload_sz = ring_ent->req_payload_sz; + struct fuse_bufvec dest_vec = FUSE_BUFVEC_INIT(max_payload_sz); + int res; + + dest_vec.buf[0].mem = ring_ent->op_payload; + dest_vec.buf[0].size = max_payload_sz; + + res = fuse_buf_copy(&dest_vec, bufv, flags); + + out->error = res < 0 ? res : 0; + out->unique = req->unique; + + ent_in_out->payload_sz = res > 0 ? res : 0; + + res = fuse_uring_commit_sqe(ring_pool, queue, ring_ent); + + fuse_free_req(req); + + return res; +} + +/** + * Copy the iov into the ring buffer and submit and commit/fetch sqe + */ +int fuse_send_msg_uring(fuse_req_t req, struct iovec *iov, int count) +{ + struct fuse_ring_ent *ring_ent = + container_of(req, struct fuse_ring_ent, req); + + struct fuse_ring_queue *queue = ring_ent->ring_queue; + struct fuse_ring_pool *ring_pool = queue->ring_pool; + struct fuse_uring_req_header *rrh = ring_ent->req_header; + struct fuse_out_header *out = (struct fuse_out_header *)&rrh->in_out; + struct fuse_uring_ent_in_out *ent_in_out = + (struct fuse_uring_ent_in_out *)&rrh->ring_ent_in_out; + size_t max_buf = ring_pool->max_req_payload_sz; + size_t len = 0; + int res = 0; + + /* copy iov into the payload, idx=0 is the header section */ + for (int idx = 1; idx < count; idx++) { + struct iovec *cur = &iov[idx]; + + if (len + cur->iov_len > max_buf) { + fuse_log(FUSE_LOG_ERR, + "iov[%d] exceeds buffer size %zu", + idx, max_buf); + res = -EINVAL; /* Gracefully handle this? */ + break; + } + + memcpy(ring_ent->op_payload + len, cur->iov_base, cur->iov_len); + len += cur->iov_len; + } + + ent_in_out->payload_sz = len; + + out->error = res; + out->unique = req->unique; + out->len = len; + + return fuse_uring_commit_sqe(ring_pool, queue, ring_ent); +} + +static int fuse_queue_setup_io_uring(struct io_uring *ring, size_t qid, + size_t depth, int fd, int evfd) +{ + int rc; + struct io_uring_params params = {0}; + int files[2] = { fd, evfd }; + + depth += 1; /* for the eventfd poll SQE */ + + params.flags = IORING_SETUP_SQE128; + + /* Avoid cq overflow */ + params.flags |= IORING_SETUP_CQSIZE; + params.cq_entries = depth * 2; + + /* These flags should help to increase performance, but actually + * make it a bit slower - reason should get investigated. + */ + if (0) { + /* Has the main slow down effect */ + params.flags |= IORING_SETUP_SINGLE_ISSUER; + + // params.flags |= IORING_SETUP_DEFER_TASKRUN; + params.flags |= IORING_SETUP_TASKRUN_FLAG; + + /* Second main effect to make it slower */ + params.flags |= IORING_SETUP_COOP_TASKRUN; + } + + rc = io_uring_queue_init_params(depth, ring, ¶ms); + if (rc != 0) { + fuse_log(FUSE_LOG_ERR, "Failed to setup qid %zu: %d (%s)\n", + qid, rc, strerror(-rc)); + return rc; + } + + rc = io_uring_register_files(ring, files, 1); + if (rc != 0) { + rc = -errno; + fuse_log(FUSE_LOG_ERR, + "Failed to register files for ring idx %zu: %s", + qid, strerror(errno)); + return rc; + } + + return 0; +} + +static void fuse_session_destruct_uring(struct fuse_ring_pool *fuse_ring) +{ + for (size_t qid = 0; qid < fuse_ring->nr_queues; qid++) { + struct fuse_ring_queue *queue = + fuse_uring_get_queue(fuse_ring, qid); + + if (queue->tid != 0) { + uint64_t value = 1ULL; + int rc; + + rc = write(queue->eventfd, &value, sizeof(value)); + if (rc != sizeof(value)) + fprintf(stderr, + "Wrote to eventfd=%d err=%s: rc=%d\n", + queue->eventfd, strerror(errno), rc); + pthread_cancel(queue->tid); + pthread_join(queue->tid, NULL); + queue->tid = 0; + } + + if (queue->eventfd >= 0) { + close(queue->eventfd); + queue->eventfd = -1; + } + + if (queue->ring.ring_fd != -1) + io_uring_queue_exit(&queue->ring); + + for (size_t idx = 0; idx < fuse_ring->queue_depth; idx++) { + struct fuse_ring_ent *ent = &queue->ent[idx]; + + numa_free(ent->op_payload, ent->req_payload_sz); + numa_free(ent->req_header, queue->req_header_sz); + } + + pthread_mutex_destroy(&queue->ring_lock); + } + + free(fuse_ring->queues); + pthread_cond_destroy(&fuse_ring->thread_start_cond); + pthread_mutex_destroy(&fuse_ring->thread_start_mutex); + free(fuse_ring); +} + +static int fuse_uring_register_ent(struct fuse_ring_queue *queue, + struct fuse_ring_ent *ent) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(&queue->ring); + if (sqe == NULL) { + /* + * All SQEs are idle here - no good reason this + * could fail + */ + fuse_log(FUSE_LOG_ERR, "Failed to get all ring SQEs"); + return -EIO; + } + + ent->last_cmd = FUSE_IO_URING_CMD_REGISTER; + fuse_uring_sqe_prepare(sqe, ent, ent->last_cmd); + + /* only needed for fetch */ + ent->iov[0].iov_base = ent->req_header; + ent->iov[0].iov_len = queue->req_header_sz; + + ent->iov[1].iov_base = ent->op_payload; + ent->iov[1].iov_len = ent->req_payload_sz; + + sqe->addr = (uint64_t)(ent->iov); + sqe->len = 2; + + /* this is a fetch, kernel does not read commit id */ + fuse_uring_sqe_set_req_data(fuse_uring_get_sqe_cmd(sqe), queue->qid, 0); + + return 0; + +} + +static int fuse_uring_register_queue(struct fuse_ring_queue *queue) +{ + struct fuse_ring_pool *ring_pool = queue->ring_pool; + unsigned int sq_ready; + struct io_uring_sqe *sqe; + int res; + + for (size_t idx = 0; idx < ring_pool->queue_depth; idx++) { + struct fuse_ring_ent *ent = &queue->ent[idx]; + + res = fuse_uring_register_ent(queue, ent); + if (res != 0) + return res; + } + + sq_ready = io_uring_sq_ready(&queue->ring); + if (sq_ready != ring_pool->queue_depth) { + fuse_log(FUSE_LOG_ERR, + "SQE ready mismatch, expected %zu got %u\n", + ring_pool->queue_depth, sq_ready); + return -EINVAL; + } + + /* Poll SQE for the eventfd to wake up on teardown */ + sqe = io_uring_get_sqe(&queue->ring); + if (sqe == NULL) { + fuse_log(FUSE_LOG_ERR, "Failed to get eventfd SQE"); + return -EIO; + } + + io_uring_prep_poll_add(sqe, queue->eventfd, POLLIN); + io_uring_sqe_set_data(sqe, (void *)(uintptr_t)queue->eventfd); + + /* Only preparation until here, no submission yet */ + + return 0; +} + +static struct fuse_ring_pool *fuse_create_ring(struct fuse_session *se) +{ + struct fuse_ring_pool *fuse_ring = NULL; + const size_t nr_queues = get_nprocs_conf(); + size_t payload_sz = se->bufsize - FUSE_BUFFER_HEADER_SIZE; + size_t queue_sz; + + if (se->debug) + fuse_log(FUSE_LOG_DEBUG, "starting io-uring q-depth=%d\n", + se->uring.q_depth); + + fuse_ring = calloc(1, sizeof(*fuse_ring)); + if (fuse_ring == NULL) { + fuse_log(FUSE_LOG_ERR, "Allocating the ring failed\n"); + goto err; + } + + queue_sz = fuse_ring_queue_size(se->uring.q_depth); + fuse_ring->queues = calloc(1, queue_sz * nr_queues); + if (fuse_ring->queues == NULL) { + fuse_log(FUSE_LOG_ERR, "Allocating the queues failed\n"); + goto err; + } + + fuse_ring->se = se; + fuse_ring->nr_queues = nr_queues; + fuse_ring->queue_depth = se->uring.q_depth; + fuse_ring->max_req_payload_sz = payload_sz; + fuse_ring->queue_mem_size = queue_sz; + + /* + * very basic queue initialization, that cannot fail and will + * allow easy cleanup if something (like mmap) fails in the middle + * below + */ + for (size_t qid = 0; qid < nr_queues; qid++) { + struct fuse_ring_queue *queue = + fuse_uring_get_queue(fuse_ring, qid); + + queue->ring.ring_fd = -1; + queue->numa_node = numa_node_of_cpu(qid); + queue->qid = qid; + queue->ring_pool = fuse_ring; + queue->eventfd = -1; + pthread_mutex_init(&queue->ring_lock, NULL); + } + + pthread_cond_init(&fuse_ring->thread_start_cond, NULL); + pthread_mutex_init(&fuse_ring->thread_start_mutex, NULL); + sem_init(&fuse_ring->init_sem, 0, 0); + + return fuse_ring; + +err: + if (fuse_ring) + fuse_session_destruct_uring(fuse_ring); + + return NULL; +} + +static void fuse_uring_resubmit(struct fuse_ring_queue *queue, + struct fuse_ring_ent *ent) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(&queue->ring); + if (sqe == NULL) { + /* This is an impossible condition, unless there is a bug. + * The kernel sent back an SQEs, which is assigned to a request. + * There is no way to get out of SQEs, as the number of + * SQEs matches the number tof requests. + */ + + queue->ring_pool->se->error = -EIO; + fuse_log(FUSE_LOG_ERR, "Failed to get a ring SQEs\n"); + + return; + } + + fuse_uring_sqe_prepare(sqe, ent, ent->last_cmd); + + switch (ent->last_cmd) { + case FUSE_IO_URING_CMD_REGISTER: + sqe->addr = (uint64_t)(ent->iov); + sqe->len = 2; + fuse_uring_sqe_set_req_data(fuse_uring_get_sqe_cmd(sqe), + queue->qid, 0); + break; + case FUSE_IO_URING_CMD_COMMIT_AND_FETCH: + fuse_uring_sqe_set_req_data(fuse_uring_get_sqe_cmd(sqe), + queue->qid, ent->req_commit_id); + break; + default: + fuse_log(FUSE_LOG_ERR, "Unknown command type: %d\n", + ent->last_cmd); + queue->ring_pool->se->error = -EINVAL; + break; + } + + /* caller submits */ +} + +static void fuse_uring_handle_cqe(struct fuse_ring_queue *queue, + struct io_uring_cqe *cqe) +{ + struct fuse_ring_ent *ent = io_uring_cqe_get_data(cqe); + + if (!ent) { + fuse_log(FUSE_LOG_ERR, + "cqe=%p io_uring_cqe_get_data returned NULL\n", cqe); + return; + } + + struct fuse_req *req = &ent->req; + struct fuse_ring_pool *fuse_ring = queue->ring_pool; + struct fuse_uring_req_header *rrh = ent->req_header; + + struct fuse_in_header *in = (struct fuse_in_header *)&rrh->in_out; + struct fuse_uring_ent_in_out *ent_in_out = &rrh->ring_ent_in_out; + + ent->req_commit_id = ent_in_out->commit_id; + if (unlikely(ent->req_commit_id == 0)) { + /* + * If this happens kernel will not find the response - it will + * be stuck forever - better to abort immediately. + */ + fuse_log(FUSE_LOG_ERR, "Received invalid commit_id=0\n"); + abort(); + } + + memset(&req->flags, 0, sizeof(req->flags)); + memset(&req->u, 0, sizeof(req->u)); + req->flags.is_uring = 1; + req->ref_cnt++; + req->ch = NULL; /* not needed for uring */ + req->interrupted = 0; + list_init_req(req); + + fuse_session_process_uring_cqe(fuse_ring->se, req, in, &rrh->op_in, + ent->op_payload, ent_in_out->payload_sz); +} + +static int fuse_uring_queue_handle_cqes(struct fuse_ring_queue *queue) +{ + struct fuse_ring_pool *ring_pool = queue->ring_pool; + struct fuse_session *se = ring_pool->se; + size_t num_completed = 0; + struct io_uring_cqe *cqe; + unsigned int head; + struct fuse_ring_ent *ent; + int ret = 0; + + io_uring_for_each_cqe(&queue->ring, head, cqe) { + int err = 0; + + num_completed++; + + err = cqe->res; + if (unlikely(err != 0)) { + if (err > 0 && ((uintptr_t)io_uring_cqe_get_data(cqe) == + (unsigned int)queue->eventfd)) { + /* teardown from eventfd */ + return -ENOTCONN; + } + + + switch (err) { + case -EAGAIN: + fallthrough; + case -EINTR: + ent = io_uring_cqe_get_data(cqe); + fuse_uring_resubmit(queue, ent); + continue; + default: + break; + } + + /* -ENOTCONN is ok on umount */ + if (err != -ENOTCONN) { + se->error = cqe->res; + + /* return first error */ + if (ret == 0) + ret = err; + } + + } else { + fuse_uring_handle_cqe(queue, cqe); + } + } + + if (num_completed) + io_uring_cq_advance(&queue->ring, num_completed); + + return ret == 0 ? 0 : num_completed; +} + +/** + * In per-core-queue configuration we have thread per core - the thread + * to that core + */ +static void fuse_uring_set_thread_core(int qid) +{ + cpu_set_t mask; + int rc; + + CPU_ZERO(&mask); + CPU_SET(qid, &mask); + rc = sched_setaffinity(0, sizeof(cpu_set_t), &mask); + if (rc != 0) + fuse_log(FUSE_LOG_ERR, "Failed to bind qid=%d to its core: %s\n", + qid, strerror(errno)); + + if (0) { + const int policy = SCHED_IDLE; + const struct sched_param param = { + .sched_priority = sched_get_priority_min(policy), + }; + + /* Set the lowest possible priority, so that the application + * submitting requests is not moved away from the current core. + */ + rc = sched_setscheduler(0, policy, ¶m); + if (rc != 0) + fuse_log(FUSE_LOG_ERR, "Failed to set scheduler: %s\n", + strerror(errno)); + } +} + +/* + * @return negative error code or io-uring file descriptor + */ +static int fuse_uring_init_queue(struct fuse_ring_queue *queue) +{ + struct fuse_ring_pool *ring = queue->ring_pool; + struct fuse_session *se = ring->se; + int res; + size_t page_sz = sysconf(_SC_PAGESIZE); + + queue->eventfd = eventfd(0, EFD_CLOEXEC); + if (queue->eventfd < 0) { + res = -errno; + fuse_log(FUSE_LOG_ERR, + "Failed to create eventfd for qid %d: %s\n", + queue->qid, strerror(errno)); + return res; + } + + res = fuse_queue_setup_io_uring(&queue->ring, queue->qid, + ring->queue_depth, se->fd, + queue->eventfd); + if (res != 0) { + fuse_log(FUSE_LOG_ERR, "qid=%d io_uring init failed\n", + queue->qid); + return res; + } + + queue->req_header_sz = ROUND_UP(sizeof(struct fuse_ring_ent), + page_sz); + + for (size_t idx = 0; idx < ring->queue_depth; idx++) { + struct fuse_ring_ent *ring_ent = &queue->ent[idx]; + struct fuse_req *req = &ring_ent->req; + + ring_ent->ring_queue = queue; + + /* + * Also allocate the header to have it page aligned, which + * is a requirement for page pinning + */ + ring_ent->req_header = + numa_alloc_local(queue->req_header_sz); + if (!ring_ent->req_header) + return -ENOMEM; + ring_ent->req_payload_sz = ring->max_req_payload_sz; + + ring_ent->op_payload = + numa_alloc_local(ring_ent->req_payload_sz); + if (!ring_ent->op_payload) + return -ENOMEM; + + req->se = se; + pthread_mutex_init(&req->lock, NULL); + req->flags.is_uring = 1; + req->ref_cnt = 1; /* extra ref to avoid destruction */ + list_init_req(req); + } + + res = fuse_uring_register_queue(queue); + if (res != 0) { + fuse_log( + FUSE_LOG_ERR, + "Grave fuse-uring error on preparing SQEs, aborting\n"); + se->error = -EIO; + fuse_session_exit(se); + return res; + } + + return queue->ring.ring_fd; +} + +static void *fuse_uring_thread(void *arg) +{ + struct fuse_ring_queue *queue = arg; + struct fuse_ring_pool *ring_pool = queue->ring_pool; + struct fuse_session *se = ring_pool->se; + int err; + char thread_name[16] = { 0 }; + + snprintf(thread_name, 16, "fuse-ring-%d", queue->qid); + thread_name[15] = '\0'; + fuse_set_thread_name(thread_name); + + fuse_uring_set_thread_core(queue->qid); + + err = fuse_uring_init_queue(queue); + pthread_mutex_lock(&ring_pool->thread_start_mutex); + if (err < 0) + ring_pool->failed_threads++; + ring_pool->started_threads++; + pthread_cond_broadcast(&ring_pool->thread_start_cond); + pthread_mutex_unlock(&ring_pool->thread_start_mutex); + + if (err < 0) { + fuse_log(FUSE_LOG_ERR, "qid=%d queue setup failed\n", + queue->qid); + goto err_non_fatal; + } + + sem_wait(&ring_pool->init_sem); + + /* Not using fuse_session_exited(se), as that cannot be inlined */ + while (!atomic_load_explicit(&se->mt_exited, memory_order_relaxed)) { + io_uring_submit_and_wait(&queue->ring, 1); + + pthread_mutex_lock(&queue->ring_lock); + queue->cqe_processing = true; + err = fuse_uring_queue_handle_cqes(queue); + queue->cqe_processing = false; + pthread_mutex_unlock(&queue->ring_lock); + if (err < 0) + goto err; + } + + return NULL; + +err: + fuse_session_exit(se); +err_non_fatal: + return NULL; +} + +static int fuse_uring_start_ring_threads(struct fuse_ring_pool *ring) +{ + int rc = 0; + + for (size_t qid = 0; qid < ring->nr_queues; qid++) { + struct fuse_ring_queue *queue = fuse_uring_get_queue(ring, qid); + + rc = pthread_create(&queue->tid, NULL, fuse_uring_thread, queue); + if (rc != 0) + break; + } + + return rc; +} + +static int fuse_uring_sanity_check(struct fuse_session *se) +{ + if (se->uring.q_depth == 0) { + fuse_log(FUSE_LOG_ERR, "io-uring queue depth must be > 0\n"); + return -EINVAL; + } + + _Static_assert(sizeof(struct fuse_uring_cmd_req) <= + FUSE_URING_MAX_SQE128_CMD_DATA, + "SQE128_CMD_DATA has 80B cmd data"); + + return 0; +} + +int fuse_uring_start(struct fuse_session *se) +{ + int err = 0; + struct fuse_ring_pool *fuse_ring; + + fuse_uring_sanity_check(se); + + fuse_ring = fuse_create_ring(se); + if (fuse_ring == NULL) { + err = -EADDRNOTAVAIL; + goto err; + } + + se->uring.pool = fuse_ring; + + /* Hold off threads from send fuse ring entries (SQEs) */ + sem_init(&fuse_ring->init_sem, 0, 0); + pthread_cond_init(&fuse_ring->thread_start_cond, NULL); + pthread_mutex_init(&fuse_ring->thread_start_mutex, NULL); + + err = fuse_uring_start_ring_threads(fuse_ring); + if (err) + goto err; + + /* + * Wait for all threads to start or to fail + */ + pthread_mutex_lock(&fuse_ring->thread_start_mutex); + while (fuse_ring->started_threads < fuse_ring->nr_queues) + pthread_cond_wait(&fuse_ring->thread_start_cond, + &fuse_ring->thread_start_mutex); + + if (fuse_ring->failed_threads != 0) + err = -EADDRNOTAVAIL; + pthread_mutex_unlock(&fuse_ring->thread_start_mutex); + +err: + if (err) { + /* Note all threads need to have been started */ + if (fuse_ring) + fuse_session_destruct_uring(fuse_ring); + se->uring.pool = NULL; + } + return err; +} + +int fuse_uring_stop(struct fuse_session *se) +{ + struct fuse_ring_pool *ring = se->uring.pool; + + if (ring == NULL) + return 0; + + fuse_session_destruct_uring(ring); + + return 0; +} + +void fuse_uring_wake_ring_threads(struct fuse_session *se) +{ + struct fuse_ring_pool *ring = se->uring.pool; + + /* Wake up the threads to let them send SQEs */ + for (size_t qid = 0; qid < ring->nr_queues; qid++) + sem_post(&ring->init_sem); +} diff --git a/lib/fuse_uring_i.h b/lib/fuse_uring_i.h new file mode 100644 index 000000000..c1da73ad7 --- /dev/null +++ b/lib/fuse_uring_i.h @@ -0,0 +1,86 @@ +/* + * FUSE: Filesystem in Userspace + * Copyright (C) 2025 Bernd Schubert + * This program can be distributed under the terms of the GNU LGPLv2. + * See the file LGPL2.txt + */ + +#ifndef FUSE_URING_I_H_ +#define FUSE_URING_I_H_ + +#include "fuse_config.h" +#include "fuse_lowlevel.h" +#include "fuse_kernel.h" + +#ifndef HAVE_URING +#include "util.h" +#endif + +#include // IWYU pragma: keep + +/* io-uring defaults */ +#define SESSION_DEF_URING_ENABLE (0) +#define SESSION_DEF_URING_Q_DEPTH (8) + +void fuse_session_process_uring_cqe(struct fuse_session *se, + struct fuse_req *req, + struct fuse_in_header *in, void *in_header, + void *in_payload, size_t payload_len); + +#ifdef HAVE_URING + +struct fuse_in_header; + +int fuse_uring_start(struct fuse_session *se); +void fuse_uring_wake_ring_threads(struct fuse_session *se); +int fuse_uring_stop(struct fuse_session *se); +int send_reply_uring(fuse_req_t req, int error, const void *arg, + size_t argsize); + +int fuse_reply_data_uring(fuse_req_t req, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags); +int fuse_send_msg_uring(fuse_req_t req, struct iovec *iov, int count); + +#else // HAVE_URING + +static inline int fuse_uring_start(struct fuse_session *se FUSE_VAR_UNUSED) +{ + return -ENOTSUP; +} + +static inline void +fuse_uring_wake_ring_threads(struct fuse_session *se FUSE_VAR_UNUSED) +{ +} + +static inline int fuse_uring_stop(struct fuse_session *se FUSE_VAR_UNUSED) +{ + return -ENOTSUP; +} + +static inline int send_reply_uring(fuse_req_t req FUSE_VAR_UNUSED, + int error FUSE_VAR_UNUSED, + const void *arg FUSE_VAR_UNUSED, + size_t argsize FUSE_VAR_UNUSED) +{ + return -ENOTSUP; +} + +static inline int +fuse_reply_data_uring(fuse_req_t req FUSE_VAR_UNUSED, + struct fuse_bufvec *bufv FUSE_VAR_UNUSED, + enum fuse_buf_copy_flags flags FUSE_VAR_UNUSED) +{ + return -ENOTSUP; +} + +static inline int fuse_send_msg_uring(fuse_req_t req FUSE_VAR_UNUSED, + struct iovec *iov FUSE_VAR_UNUSED, + int count FUSE_VAR_UNUSED) +{ + return -ENOTSUP; +} + +#endif // HAVE_URING + +#endif // FUSE_URING_I_H_ diff --git a/lib/fuse_versionscript b/lib/fuse_versionscript index a2653fcdd..b58360feb 100644 --- a/lib/fuse_versionscript +++ b/lib/fuse_versionscript @@ -208,10 +208,20 @@ FUSE_3.17.3 { fuse_unset_feature_flag; fuse_get_feature_flag; - # Not part of public API, for internal test use only + # Not part of public API, for internal testing only fuse_convert_to_conn_want_ext; } FUSE_3.17; +FUSE_3.18 { + global: + fuse_req_is_uring; + fuse_req_get_payload; + fuse_lowlevel_notify_increment_epoch; + + fuse_reply_statx; + fuse_fs_statx; +} FUSE_3.17; + # Local Variables: # indent-tabs-mode: t # End: diff --git a/lib/helper.c b/lib/helper.c index aceff9fd5..5c13b93a4 100644 --- a/lib/helper.c +++ b/lib/helper.c @@ -7,7 +7,7 @@ file system by implementing nothing but the request handlers. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #include "fuse_config.h" diff --git a/lib/meson.build b/lib/meson.build index df5f8afbe..fcd95741c 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -19,6 +19,14 @@ if private_cfg.get('HAVE_ICONV') endif endif +if private_cfg.get('HAVE_URING', false) + libfuse_sources += [ 'fuse_uring.c' ] + deps += [ dependency('liburing') ] + deps += [ dependency('numa') ] +endif + + + libdl = cc.find_library('dl', required: false) if libdl.found() deps += [ libdl ] @@ -33,12 +41,13 @@ else endif fusermount_path = join_paths(get_option('prefix'), get_option('bindir')) -libfuse = library('fused', +libfuse = library('fuse3', libfuse_sources, - version: meson.project_version(), - soversion: '1', + version: base_version, + soversion: '4', include_directories: include_dirs, - dependencies: deps, install: true, + dependencies: deps, + install: true, link_depends: 'fuse_versionscript', c_args: [ '-DFUSE_USE_VERSION=317', '-DFUSERMOUNT_DIR="@0@"'.format(fusermount_path) ], @@ -49,9 +58,9 @@ pkg = import('pkgconfig') pkg.generate(libraries: [ libfuse, '-lpthread' ], libraries_private: '-ldl', version: meson.project_version(), - name: 'fused', + name: 'fuse3', description: 'Filesystem in Userspace', - subdirs: 'fused') + subdirs: 'fuse3') libfuse_dep = declare_dependency(include_directories: include_dirs, link_with: libfuse, dependencies: deps) diff --git a/lib/modules/iconv.c b/lib/modules/iconv.c index a0bf72be5..417c9043d 100644 --- a/lib/modules/iconv.c +++ b/lib/modules/iconv.c @@ -3,7 +3,7 @@ Copyright (C) 2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #include @@ -568,6 +568,22 @@ static off_t iconv_lseek(const char *path, off_t off, int whence, return res; } +#ifdef HAVE_STATX +static int iconv_statx(const char *path, int flags, int mask, struct statx *stxbuf, + struct fuse_file_info *fi) +{ + struct iconv *ic = iconv_get(); + char *newpath; + int res = iconv_convpath(ic, path, &newpath, 0); + + if (!res) { + res = fuse_fs_statx(ic->next, newpath, flags, mask, stxbuf, fi); + free(newpath); + } + return res; +} +#endif + static void *iconv_init(struct fuse_conn_info *conn, struct fuse_config *cfg) { @@ -627,6 +643,9 @@ static const struct fuse_operations iconv_oper = { .flock = iconv_flock, .bmap = iconv_bmap, .lseek = iconv_lseek, +#ifdef HAVE_STATX + .statx = iconv_statx, +#endif }; static const struct fuse_opt iconv_opts[] = { diff --git a/lib/modules/subdir.c b/lib/modules/subdir.c index e92eb6284..67c469799 100644 --- a/lib/modules/subdir.c +++ b/lib/modules/subdir.c @@ -3,7 +3,7 @@ Copyright (C) 2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB + See the file LGPL2.txt */ #include @@ -553,6 +553,22 @@ static off_t subdir_lseek(const char *path, off_t off, int whence, return res; } +#ifdef HAVE_STATX +static int subdir_statx(const char *path, int flags, int mask, struct statx *stxbuf, + struct fuse_file_info *fi) +{ + struct subdir *ic = subdir_get(); + char *newpath; + int res = subdir_addpath(ic, path, &newpath); + + if (!res) { + res = fuse_fs_statx(ic->next, newpath, flags, mask, stxbuf, fi); + free(newpath); + } + return res; +} +#endif + static void *subdir_init(struct fuse_conn_info *conn, struct fuse_config *cfg) { @@ -608,6 +624,9 @@ static const struct fuse_operations subdir_oper = { .flock = subdir_flock, .bmap = subdir_bmap, .lseek = subdir_lseek, +#ifdef HAVE_STATX + .statx = subdir_statx, +#endif }; static const struct fuse_opt subdir_opts[] = { diff --git a/lib/mount.c b/lib/mount.c index be7600636..7a856c101 100644 --- a/lib/mount.c +++ b/lib/mount.c @@ -5,9 +5,12 @@ Architecture specific file system mounting (Linux). This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ +/* For environ */ +#define _GNU_SOURCE + #include "fuse_config.h" #include "fuse_i.h" #include "fuse_misc.h" @@ -22,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -45,10 +49,7 @@ #define FUSERMOUNT_PROG "fusermount3" #define FUSE_COMMFD_ENV "_FUSE_COMMFD" #define FUSE_COMMFD2_ENV "_FUSE_COMMFD2" - -#ifndef HAVE_FORK -#define fork() vfork() -#endif +#define FUSE_KERN_DEVICE_ENV "FUSE_KERN_DEVICE" #ifndef MS_DIRSYNC #define MS_DIRSYNC 128 @@ -121,21 +122,50 @@ static const struct fuse_opt fuse_mount_opts[] = { FUSE_OPT_END }; -static void exec_fusermount(const char *argv[]) +/* + * Running fusermount by calling 'posix_spawn' + * + * @param out_pid might be NULL + */ +static int fusermount_posix_spawn(posix_spawn_file_actions_t *action, + char const * const argv[], pid_t *out_pid) { - execv(FUSERMOUNT_DIR "/" FUSERMOUNT_PROG, (char **) argv); - execvp(FUSERMOUNT_PROG, (char **) argv); + const char *full_path = FUSERMOUNT_DIR "/" FUSERMOUNT_PROG; + pid_t pid; + + /* See man 7 environ for the global environ pointer */ + + /* first try the install path */ + int status = posix_spawn(&pid, full_path, action, NULL, + (char * const *) argv, environ); + if (status != 0) { + /* if that fails, try a system install */ + status = posix_spawnp(&pid, FUSERMOUNT_PROG, action, NULL, + (char * const *) argv, environ); + } + + if (status != 0) { + fuse_log(FUSE_LOG_ERR, "Failed to call '%s': %s\n", + FUSERMOUNT_PROG, strerror(status)); + return -status; + } + + if (out_pid) + *out_pid = pid; + else + waitpid(pid, NULL, 0); /* FIXME: check exit code and return error if any */ + + return 0; } void fuse_mount_version(void) { - int pid = fork(); - if (!pid) { - const char *argv[] = { FUSERMOUNT_PROG, "--version", NULL }; - exec_fusermount(argv); - _exit(1); - } else if (pid != -1) - waitpid(pid, NULL, 0); + char const *const argv[] = {FUSERMOUNT_PROG, "--version", NULL}; + int status = fusermount_posix_spawn(NULL, argv, NULL); + + if(status != 0) + fuse_log(FUSE_LOG_ERR, "Running '%s --version' failed", + FUSERMOUNT_PROG); } struct mount_flags { @@ -255,7 +285,7 @@ static int receive_fd(int fd) while(((rv = recvmsg(fd, &msg, 0)) == -1) && errno == EINTR); if (rv == -1) { - perror("recvmsg"); + fuse_log(FUSE_LOG_ERR, "recvmsg failed: %s", strerror(errno)); return -1; } if(!rv) { @@ -275,7 +305,6 @@ static int receive_fd(int fd) void fuse_kern_unmount(const char *mountpoint, int fd) { int res; - int pid; if (fd != -1) { struct pollfd pfd; @@ -307,23 +336,21 @@ void fuse_kern_unmount(const char *mountpoint, int fd) if (res == 0) return; - pid = fork(); - if(pid == -1) + char const * const argv[] = + { FUSERMOUNT_PROG, "--unmount", "--quiet", "--lazy", + "--", mountpoint, NULL }; + int status = fusermount_posix_spawn(NULL, argv, NULL); + if(status != 0) { + fuse_log(FUSE_LOG_ERR, "Spawning %s to unmount failed: %s", + FUSERMOUNT_PROG, strerror(-status)); return; - - if(pid == 0) { - const char *argv[] = { FUSERMOUNT_PROG, "-u", "-q", "-z", - "--", mountpoint, NULL }; - - exec_fusermount(argv); - _exit(1); } - waitpid(pid, NULL, 0); } static int setup_auto_unmount(const char *mountpoint, int quiet) { - int fds[2], pid; + int fds[2]; + pid_t pid; int res; if (!mountpoint) { @@ -333,59 +360,71 @@ static int setup_auto_unmount(const char *mountpoint, int quiet) res = socketpair(PF_UNIX, SOCK_STREAM, 0, fds); if(res == -1) { - perror("fuse: socketpair() failed"); + fuse_log(FUSE_LOG_ERR, "Setting up auto-unmount socketpair() failed: %s\n", + strerror(errno)); return -1; } - pid = fork(); - if(pid == -1) { - perror("fuse: fork() failed"); - close(fds[0]); - close(fds[1]); - return -1; + char arg_fd_entry[30]; + snprintf(arg_fd_entry, sizeof(arg_fd_entry), "%i", fds[0]); + setenv(FUSE_COMMFD_ENV, arg_fd_entry, 1); + /* + * This helps to identify the FD hold by parent process. + * In auto-unmount case, parent process can close this FD explicitly to do unmount. + * The FD[1] can be got via getenv(FUSE_COMMFD2_ENV). + * One potential use case is to satisfy FD-Leak checks. + */ + snprintf(arg_fd_entry, sizeof(arg_fd_entry), "%i", fds[1]); + setenv(FUSE_COMMFD2_ENV, arg_fd_entry, 1); + + char const *const argv[] = { + FUSERMOUNT_PROG, + "--auto-unmount", + "--", + mountpoint, + NULL, + }; + + // TODO: add error handling for all manipulations of action. + posix_spawn_file_actions_t action; + posix_spawn_file_actions_init(&action); + + if (quiet) { + posix_spawn_file_actions_addopen(&action, STDOUT_FILENO, "/dev/null", O_WRONLY, 0); + posix_spawn_file_actions_addopen(&action, STDERR_FILENO, "/dev/null", O_WRONLY, 0); } + posix_spawn_file_actions_addclose(&action, fds[1]); - if(pid == 0) { - char env[10]; - const char *argv[32]; - int a = 0; - - if (quiet) { - int fd = open("/dev/null", O_RDONLY); - if (fd != -1) { - dup2(fd, 1); - dup2(fd, 2); - } - } + /* + * auto-umount runs in the background - it is not waiting for the + * process + */ + int status = fusermount_posix_spawn(&action, argv, &pid); - argv[a++] = FUSERMOUNT_PROG; - argv[a++] = "--auto-unmount"; - argv[a++] = "--"; - argv[a++] = mountpoint; - argv[a++] = NULL; + posix_spawn_file_actions_destroy(&action); + if(status != 0) { + close(fds[0]); close(fds[1]); - fcntl(fds[0], F_SETFD, 0); - snprintf(env, sizeof(env), "%i", fds[0]); - setenv(FUSE_COMMFD_ENV, env, 1); - exec_fusermount(argv); - perror("fuse: failed to exec fusermount3"); - _exit(1); + fuse_log(FUSE_LOG_ERR, "fuse: Setting up auto-unmount failed (spawn): %s", + strerror(-status)); + return -1; } - + // passed to child now, so can close here. close(fds[0]); // Now fusermount3 will only exit when fds[1] closes automatically when our // process exits. return 0; + // Note: fds[1] is leakend and doesn't get FD_CLOEXEC } static int fuse_mount_fusermount(const char *mountpoint, struct mount_opts *mo, const char *opts, int quiet) { - int fds[2], pid; + int fds[2]; + pid_t pid; int res; - int rv; if (!mountpoint) { fuse_log(FUSE_LOG_ERR, "fuse: missing mountpoint parameter\n"); @@ -394,51 +433,57 @@ static int fuse_mount_fusermount(const char *mountpoint, struct mount_opts *mo, res = socketpair(PF_UNIX, SOCK_STREAM, 0, fds); if(res == -1) { - perror("fuse: socketpair() failed"); + fuse_log(FUSE_LOG_ERR, "Running %s: socketpair() failed: %s\n", + FUSERMOUNT_PROG, strerror(errno)); return -1; } - pid = fork(); - if(pid == -1) { - perror("fuse: fork() failed"); - close(fds[0]); - close(fds[1]); - return -1; + char arg_fd_entry[30]; + snprintf(arg_fd_entry, sizeof(arg_fd_entry), "%i", fds[0]); + setenv(FUSE_COMMFD_ENV, arg_fd_entry, 1); + /* + * This helps to identify the FD hold by parent process. + * In auto-unmount case, parent process can close this FD explicitly to do unmount. + * The FD[1] can be got via getenv(FUSE_COMMFD2_ENV). + * One potential use case is to satisfy FD-Leak checks. + */ + snprintf(arg_fd_entry, sizeof(arg_fd_entry), "%i", fds[1]); + setenv(FUSE_COMMFD2_ENV, arg_fd_entry, 1); + + char const *const argv[] = { + FUSERMOUNT_PROG, + "-o", opts ? opts : "", + "--", + mountpoint, + NULL, + }; + + + posix_spawn_file_actions_t action; + posix_spawn_file_actions_init(&action); + + if (quiet) { + posix_spawn_file_actions_addopen(&action, STDOUT_FILENO, "/dev/null", O_WRONLY, 0); + posix_spawn_file_actions_addopen(&action, STDERR_FILENO, "/dev/null", O_WRONLY, 0); } + posix_spawn_file_actions_addclose(&action, fds[1]); - if(pid == 0) { - char env[10]; - const char *argv[32]; - int a = 0; + int status = fusermount_posix_spawn(&action, argv, &pid); - if (quiet) { - int fd = open("/dev/null", O_RDONLY); - if (fd != -1) { - dup2(fd, 1); - dup2(fd, 2); - } - } - - argv[a++] = FUSERMOUNT_PROG; - if (opts) { - argv[a++] = "-o"; - argv[a++] = opts; - } - argv[a++] = "--"; - argv[a++] = mountpoint; - argv[a++] = NULL; + posix_spawn_file_actions_destroy(&action); + if(status != 0) { + close(fds[0]); close(fds[1]); - fcntl(fds[0], F_SETFD, 0); - snprintf(env, sizeof(env), "%i", fds[0]); - setenv(FUSE_COMMFD_ENV, env, 1); - exec_fusermount(argv); - perror("fuse: failed to exec fusermount3"); - _exit(1); + fuse_log(FUSE_LOG_ERR, "posix_spawn(p)() for %s failed: %s", + FUSERMOUNT_PROG, strerror(-status)); + return -1; } + // passed to child now, so can close here. close(fds[0]); - rv = receive_fd(fds[1]); + + int fd = receive_fd(fds[1]); if (!mo->auto_unmount) { /* with auto_unmount option fusermount3 will not exit until @@ -447,10 +492,10 @@ static int fuse_mount_fusermount(const char *mountpoint, struct mount_opts *mo, waitpid(pid, NULL, 0); /* bury zombie */ } - if (rv >= 0) - fcntl(rv, F_SETFD, FD_CLOEXEC); + if (fd >= 0) + fcntl(fd, F_SETFD, FD_CLOEXEC); - return rv; + return fd; } #ifndef O_CLOEXEC @@ -461,7 +506,7 @@ static int fuse_mount_sys(const char *mnt, struct mount_opts *mo, const char *mnt_opts) { char tmp[128]; - const char *devname = "/dev/fuse"; + const char *devname = getenv(FUSE_KERN_DEVICE_ENV) ?: "/dev/fuse"; char *source = NULL; char *type = NULL; struct stat stbuf; @@ -483,7 +528,9 @@ static int fuse_mount_sys(const char *mnt, struct mount_opts *mo, fd = open(devname, O_RDWR | O_CLOEXEC); if (fd == -1) { if (errno == ENODEV || errno == ENOENT) - fuse_log(FUSE_LOG_ERR, "fuse: device not found, try 'modprobe fuse' first\n"); + fuse_log(FUSE_LOG_ERR, + "fuse: device %s not found. Kernel module not loaded?\n", + devname); else fuse_log(FUSE_LOG_ERR, "fuse: failed to open %s: %s\n", devname, strerror(errno)); diff --git a/lib/mount_bsd.c b/lib/mount_bsd.c index bd95a76d3..c12ab322e 100644 --- a/lib/mount_bsd.c +++ b/lib/mount_bsd.c @@ -5,7 +5,7 @@ Architecture specific file system mounting (FreeBSD). This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #include "fuse_config.h" @@ -187,7 +187,7 @@ static int fuse_mount_core(const char *mountpoint, const char *opts) if (pid == 0) { const char *argv[32]; int a = 0; - int ret = -1; + int ret = -1; if (! fdnam) { @@ -214,7 +214,10 @@ static int fuse_mount_core(const char *mountpoint, const char *opts) _exit(EXIT_FAILURE); } - _exit(EXIT_SUCCESS); + waitpid(pid, &status, 0); + if (!WIFEXITED(status)) + _exit(EXIT_FAILURE); + _exit(WEXITSTATUS(status)); } if (waitpid(cpid, &status, 0) == -1 || WEXITSTATUS(status) != 0) { diff --git a/lib/mount_util.c b/lib/mount_util.c index f19dfb4c8..8c0cdf72d 100644 --- a/lib/mount_util.c +++ b/lib/mount_util.c @@ -5,7 +5,7 @@ Architecture-independent mounting code. This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #include "fuse_config.h" diff --git a/lib/mount_util.h b/lib/mount_util.h index 0ef0fbe81..9cb9077dd 100644 --- a/lib/mount_util.h +++ b/lib/mount_util.h @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU LGPLv2. - See the file COPYING.LIB. + See the file LGPL2.txt. */ #include diff --git a/lib/usdt.h b/lib/usdt.h new file mode 100644 index 000000000..6a0bc4258 --- /dev/null +++ b/lib/usdt.h @@ -0,0 +1,540 @@ +/* + * Copied from https://github.com/libbpf/usdt/ + */ + +// SPDX-License-Identifier: BSD-2-Clause +/* + * This single-header library defines a collection of variadic macros for + * defining and triggering USDTs (User Statically-Defined Tracepoints): + * + * - For USDTs without associated semaphore: + * USDT(group, name, args...) + * + * - For USDTs with implicit (transparent to the user) semaphore: + * USDT_WITH_SEMA(group, name, args...) + * USDT_IS_ACTIVE(group, name) + * + * - For USDTs with explicit (user-defined and provided) semaphore: + * USDT_WITH_EXPLICIT_SEMA(sema, group, name, args...) + * USDT_SEMA_IS_ACTIVE(sema) + * + * all of which emit a NOP instruction into the instruction stream, and so + * have *zero* overhead for the surrounding code. USDTs are identified by + * a combination of `group` and `name` identifiers, which is used by external + * tracing tooling (tracers) for identifying exact USDTs of interest. + * + * USDTs can have an associated (2-byte) activity counter (USDT semaphore), + * automatically maintained by Linux kernel whenever any correctly written + * BPF-based tracer is attached to the USDT. This USDT semaphore can be used + * to check whether there is a need to do any extra data collection and + * processing for a given USDT (if necessary), and otherwise avoid extra work + * for a common case of USDT not being traced ("active"). + * + * See documentation for USDT_WITH_SEMA()/USDT_IS_ACTIVE() or + * USDT_WITH_EXPLICIT_SEMA()/USDT_SEMA_IS_ACTIVE() APIs below for details on + * working with USDTs with implicitly or explicitly associated + * USDT semaphores, respectively. + * + * There is also some additional data recorded into an auxiliary note + * section. The data in the note section describes the operands, in terms of + * size and location, used by tracing tooling to know where to find USDT + * arguments. Each location is encoded as an assembler operand string. + * Tracing tools (bpftrace and BPF-based tracers, systemtap, etc) insert + * breakpoints on top of the nop, and decode the location operand-strings, + * like an assembler, to find the values being passed. + * + * The operand strings are selected by the compiler for each operand. + * They are constrained by inline-assembler codes.The default is: + * + * #define USDT_ARG_CONSTRAINT nor + * + * This is a good default if the operands tend to be integral and + * moderate in number (smaller than number of registers). In other + * cases, the compiler may report "'asm' requires impossible reload" or + * similar. In this case, consider simplifying the macro call (fewer + * and simpler operands), reduce optimization, or override the default + * constraints string via: + * + * #define USDT_ARG_CONSTRAINT g + * #include + * + * For some historical description of USDT v3 format (the one used by this + * library and generally recognized and assumed by BPF-based tracing tools) + * see [0]. The more formal specification can be found at [1]. Additional + * argument constraints information can be found at [2]. + * + * Original SystemTap's sys/sdt.h implementation ([3]) was used as a base for + * this USDT library implementation. Current implementation differs *a lot* in + * terms of exposed user API and general usability, which was the main goal + * and focus of the reimplementation work. Nevertheless, underlying recorded + * USDT definitions are fully binary compatible and any USDT-based tooling + * should work equally well with USDTs defined by either SystemTap's or this + * library's USDT implementation. + * + * [0] https://ecos.sourceware.org/ml/systemtap/2010-q3/msg00145.html + * [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + * [2] https://gcc.gnu.org/onlinedocs/gcc/Constraints.html + * [3] https://sourceware.org/git/?p=systemtap.git;a=blob;f=includes/sys/sdt.h + */ +#ifndef __USDT_H +#define __USDT_H + +/* + * Changelog: + * + * 0.1.0 + * ----- + * - Initial release + */ +#define USDT_MAJOR_VERSION 0 +#define USDT_MINOR_VERSION 1 +#define USDT_PATCH_VERSION 0 + +/* C++20 and C23 added __VA_OPT__ as a standard replacement for non-standard `##__VA_ARGS__` extension */ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) || (defined(__cplusplus) && __cplusplus > 201703L) +#define __usdt_va_opt 1 +#define __usdt_va_args(...) __VA_OPT__(,) __VA_ARGS__ +#else +#define __usdt_va_args(...) , ##__VA_ARGS__ +#endif + +/* + * Trigger USDT with `group`:`name` identifier and pass through `args` as its + * arguments. Zero arguments are acceptable as well. No USDT semaphore is + * associated with this USDT. + * + * Such "semaphoreless" USDTs are commonly used when there is no extra data + * collection or processing needed to collect and prepare USDT arguments and + * they are just available in the surrounding code. USDT() macro will just + * record their locations in CPU registers or in memory for tracing tooling to + * be able to access them, if necessary. + */ +#ifdef __usdt_va_opt +#define USDT(group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_none, 0 __VA_OPT__(,) __VA_ARGS__) +#else +#define USDT(group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_none, 0, ##__VA_ARGS__) +#endif + +/* + * Trigger USDT with `group`:`name` identifier and pass through `args` as its + * arguments. Zero arguments are acceptable as well. USDT also get an + * implicitly-defined associated USDT semaphore, which will be "activated" by + * tracing tooling and can be used to check whether USDT is being actively + * observed. + * + * USDTs with semaphore are commonly used when there is a need to perform + * additional data collection and processing to prepare USDT arguments, which + * otherwise might not be necessary for the rest of application logic. In such + * case, USDT semaphore can be used to avoid unnecessary extra work. If USDT + * is not traced (which is presumed to be a common situation), the associated + * USDT semaphore is "inactive", and so there is no need to waste resources to + * prepare USDT arguments. Use USDT_IS_ACTIVE(group, name) to check whether + * USDT is "active". + * + * N.B. There is an inherent (albeit short) gap between checking whether USDT + * is active and triggering corresponding USDT, in which external tracer can + * be attached to an USDT and activate USDT semaphore after the activity check. + * If such a race occurs, tracers might miss one USDT execution. Tracers are + * expected to accommodate such possibility and this is expected to not be + * a problem for applications and tracers. + * + * N.B. Implicit USDT semaphore defined by USDT_WITH_SEMA() is contained + * within a single executable or shared library and is not shared outside + * them. I.e., if you use USDT_WITH_SEMA() with the same USDT group and name + * identifier across executable and shared library, it will work and won't + * conflict, per se, but will define independent USDT semaphores, one for each + * shared library/executable in which USDT_WITH_SEMA(group, name) is used. + * That is, if you attach to this USDT in one shared library (or executable), + * then only USDT semaphore within that shared library (or executable) will be + * updated by the kernel, while other libraries (or executable) will not see + * activated USDT semaphore. In short, it's best to use unique USDT group:name + * identifiers across different shared libraries (and, equivalently, between + * executable and shared library). This is advanced consideration and is + * rarely (if ever) seen in practice, but just to avoid surprises this is + * called out here. (Static libraries become a part of final executable, once + * linked by linker, so the above considerations don't apply to them.) + */ +#ifdef __usdt_va_opt +#define USDT_WITH_SEMA(group, name, ...) \ + __usdt_probe(group, name, \ + __usdt_sema_implicit, __usdt_sema_name(group, name) \ + __VA_OPT__(,) __VA_ARGS__) +#else +#define USDT_WITH_SEMA(group, name, ...) \ + __usdt_probe(group, name, \ + __usdt_sema_implicit, __usdt_sema_name(group, name), \ + ##__VA_ARGS__) +#endif + +struct usdt_sema { volatile unsigned short active; }; + +/* + * Check if USDT with `group`:`name` identifier is "active" (i.e., whether it + * is attached to by external tracing tooling and is actively observed). + * + * This macro can be used to decide whether any additional and potentially + * expensive data collection or processing should be done to pass extra + * information into the given USDT. It is assumed that USDT is triggered with + * USDT_WITH_SEMA() macro which will implicitly define associated USDT + * semaphore. (If one needs more control over USDT semaphore, see + * USDT_DEFINE_SEMA() and USDT_WITH_EXPLICIT_SEMA() macros below.) + * + * N.B. Such checks are necessarily racy and speculative. Between checking + * whether USDT is active and triggering the USDT itself, tracer can be + * detached with no notification. This race should be extremely rare and worst + * case should result in one-time wasted extra data collection and processing. + */ +#define USDT_IS_ACTIVE(group, name) ({ \ + extern struct usdt_sema __usdt_sema_name(group, name) \ + __usdt_asm_name(__usdt_sema_name(group, name)); \ + __usdt_sema_implicit(__usdt_sema_name(group, name)); \ + __usdt_sema_name(group, name).active > 0; \ +}) + +/* + * APIs for working with user-defined explicit USDT semaphores. + * + * This is a less commonly used advanced API for use cases in which user needs + * an explicit control over (potentially shared across multiple USDTs) USDT + * semaphore instance. This can be used when there is a group of logically + * related USDTs that all need extra data collection and processing whenever + * any of a family of related USDTs are "activated" (i.e., traced). In such + * a case, all such related USDTs will be associated with the same shared USDT + * semaphore defined with USDT_DEFINE_SEMA() and the USDTs themselves will be + * triggered with USDT_WITH_EXPLICIT_SEMA() macros, taking an explicit extra + * USDT semaphore identifier as an extra parameter. + */ + +/** + * Underlying C global variable name for user-defined USDT semaphore with + * `sema` identifier. Could be useful for debugging, but normally shouldn't be + * used explicitly. + */ +#define USDT_SEMA(sema) __usdt_sema_##sema + +/* + * Define storage for user-defined USDT semaphore `sema`. + * + * Should be used only once in non-header source file to let compiler allocate + * space for the semaphore variable. Just like with any other global variable. + * + * This macro can be used anywhere where global variable declaration is + * allowed. Just like with global variable definitions, there should be only + * one definition of user-defined USDT semaphore with given `sema` identifier, + * otherwise compiler or linker will complain about duplicate variable + * definition. + * + * For C++, it is allowed to use USDT_DEFINE_SEMA() both in global namespace + * and inside namespaces (including nested namespaces). Just make sure that + * USDT_DECLARE_SEMA() is placed within the namespace where this semaphore is + * referenced, or any of its parent namespaces, so the C++ language-level + * identifier is visible to the code that needs to reference the semaphore. + * At the lowest layer, USDT semaphores have global naming and visibility + * (they have a corresponding `__usdt_sema_` symbol, which can be linked + * against from C or C++ code, if necessary). To keep it simple, putting + * USDT_DECLARE_SEMA() declarations into global namespaces is the simplest + * no-brainer solution. All these aspects are irrelevant for plain C, because + * C doesn't have namespaces and everything is always in the global namespace. + * + * N.B. Due to USDT metadata being recorded in non-allocatable ELF note + * section, it has limitations when it comes to relocations, which, in + * practice, means that it's not possible to correctly share USDT semaphores + * between main executable and shared libraries, or even between multiple + * shared libraries. USDT semaphore has to be contained to individual shared + * library or executable to avoid unpleasant surprises with half-working USDT + * semaphores. We enforce this by marking semaphore ELF symbols as having + * a hidden visibility. This is quite an advanced use case and consideration + * and for most users this should have no consequences whatsoever. + */ +#define USDT_DEFINE_SEMA(sema) \ + struct usdt_sema __usdt_sema_sec USDT_SEMA(sema) \ + __usdt_asm_name(USDT_SEMA(sema)) \ + __attribute__((visibility("hidden"))) = { 0 } + +/* + * Declare extern reference to user-defined USDT semaphore `sema`. + * + * Refers to a variable defined in another compilation unit by + * USDT_DEFINE_SEMA() and allows to use the same USDT semaphore across + * multiple compilation units (i.e., .c and .cpp files). + * + * See USDT_DEFINE_SEMA() notes above for C++ language usage peculiarities. + */ +#define USDT_DECLARE_SEMA(sema) \ + extern struct usdt_sema USDT_SEMA(sema) __usdt_asm_name(USDT_SEMA(sema)) + +/* + * Check if user-defined USDT semaphore `sema` is "active" (i.e., whether it + * is attached to by external tracing tooling and is actively observed). + * + * This macro can be used to decide whether any additional and potentially + * expensive data collection or processing should be done to pass extra + * information into USDT(s) associated with USDT semaphore `sema`. + * + * N.B. Such checks are necessarily racy. Between checking the state of USDT + * semaphore and triggering associated USDT(s), the active tracer might attach + * or detach. This race should be extremely rare and worst case should result + * in one-time missed USDT event or wasted extra data collection and + * processing. USDT-using tracers should be written with this in mind and is + * not a concern of the application defining USDTs with associated semaphore. + */ +#define USDT_SEMA_IS_ACTIVE(sema) (USDT_SEMA(sema).active > 0) + +/* + * Invoke USDT specified by `group` and `name` identifiers and associate + * explicitly user-defined semaphore `sema` with it. Pass through `args` as + * USDT arguments. `args` are optional and zero arguments are acceptable. + * + * Semaphore is defined with the help of USDT_DEFINE_SEMA() macro and can be + * checked whether active with USDT_SEMA_IS_ACTIVE(). + */ +#ifdef __usdt_va_opt +#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema), ##__VA_ARGS__) +#else +#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema) __VA_OPT__(,) __VA_ARGS__) +#endif + +/* + * Adjustable implementation aspects + */ +#ifndef USDT_ARG_CONSTRAINT +#if defined __powerpc__ +#define USDT_ARG_CONSTRAINT nZr +#elif defined __arm__ +#define USDT_ARG_CONSTRAINT g +#elif defined __loongarch__ +#define USDT_ARG_CONSTRAINT nmr +#else +#define USDT_ARG_CONSTRAINT nor +#endif +#endif /* USDT_ARG_CONSTRAINT */ + +#ifndef USDT_NOP +#if defined(__ia64__) || defined(__s390__) || defined(__s390x__) +#define USDT_NOP nop 0 +#else +#define USDT_NOP nop +#endif +#endif /* USDT_NOP */ + +/* + * Implementation details + */ +/* USDT name for implicitly-defined USDT semaphore, derived from group:name */ +#define __usdt_sema_name(group, name) __usdt_sema_##group##__##name +/* ELF section into which USDT semaphores are put */ +#define __usdt_sema_sec __attribute__((section(".probes"))) + +#define __usdt_concat(a, b) a ## b +#define __usdt_apply(fn, n) __usdt_concat(fn, n) + +#ifndef __usdt_nth +#define __usdt_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, N, ...) N +#endif + +#ifndef __usdt_narg +#ifdef __usdt_va_opt +#define __usdt_narg(...) __usdt_nth(_ __VA_OPT__(,) __VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#else +#define __usdt_narg(...) __usdt_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#endif +#endif /* __usdt_narg */ + +#define __usdt_hash # +#define __usdt_str_(x) #x +#define __usdt_str(x) __usdt_str_(x) + +#ifndef __usdt_asm_name +#define __usdt_asm_name(name) __asm__(__usdt_str(name)) +#endif + +#define __usdt_asm1(a) __usdt_str(a) "\n" +#define __usdt_asm2(a,b) __usdt_str(a) "," __usdt_str(b) "\n" +#define __usdt_asm3(a,b,c) __usdt_str(a) "," __usdt_str(b) "," __usdt_str(c) "\n" +#define __usdt_asm5(a,b,c,d,e) __usdt_str(a) "," __usdt_str(b) "," __usdt_str(c) "," \ + __usdt_str(d) "," __usdt_str(e) "\n" + +#ifdef __LP64__ +#define __usdt_asm_addr .8byte +#else +#define __usdt_asm_addr .4byte +#endif + +#define __usdt_asm_strz_(x) __usdt_asm1(.asciz #x) +#define __usdt_asm_strz(x) __usdt_asm_strz_(x) +#define __usdt_asm_str_(x) __usdt_asm1(.ascii #x) +#define __usdt_asm_str(x) __usdt_asm_str_(x) + +/* "semaphoreless" USDT case */ +#ifndef __usdt_sema_none +#define __usdt_sema_none(sema) +#endif + +/* implicitly defined __usdt_sema__group__name semaphore (using weak symbols) */ +#ifndef __usdt_sema_implicit +#define __usdt_sema_implicit(sema) \ + __asm__ __volatile__ ( \ + __usdt_asm1(.ifndef sema) \ + __usdt_asm3( .pushsection .probes, "aw", "progbits") \ + __usdt_asm1( .weak sema) \ + __usdt_asm1( .hidden sema) \ + __usdt_asm1( .align 2) \ + __usdt_asm1(sema:) \ + __usdt_asm1( .zero 2) \ + __usdt_asm2( .type sema, @object) \ + __usdt_asm2( .size sema, 2) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.endif) \ + ); +#endif + +/* externally defined semaphore using USDT_DEFINE_SEMA() and passed explicitly by user */ +#ifndef __usdt_sema_explicit +#define __usdt_sema_explicit(sema) \ + __asm__ __volatile__ ("" :: "m" (sema)); +#endif + +/* main USDT definition (nop and .note.stapsdt metadata) */ +#define __usdt_probe(group, name, sema_def, sema, ...) do { \ + sema_def(sema) \ + __asm__ __volatile__ ( \ + __usdt_asm1(990: USDT_NOP) \ + __usdt_asm3( .pushsection .note.stapsdt, "", "note") \ + __usdt_asm1( .balign 4) \ + __usdt_asm3( .4byte 992f-991f,994f-993f,3) \ + __usdt_asm1(991: .asciz "stapsdt") \ + __usdt_asm1(992: .balign 4) \ + __usdt_asm1(993: __usdt_asm_addr 990b) \ + __usdt_asm1( __usdt_asm_addr _.stapsdt.base) \ + __usdt_asm1( __usdt_asm_addr sema) \ + __usdt_asm_strz(group) \ + __usdt_asm_strz(name) \ + __usdt_asm_args(__VA_ARGS__) \ + __usdt_asm1( .ascii "\0") \ + __usdt_asm1(994: .balign 4) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.ifndef _.stapsdt.base) \ + __usdt_asm5( .pushsection .stapsdt.base,"aG","progbits",.stapsdt.base,comdat)\ + __usdt_asm1( .weak _.stapsdt.base) \ + __usdt_asm1( .hidden _.stapsdt.base) \ + __usdt_asm1(_.stapsdt.base:) \ + __usdt_asm1( .space 1) \ + __usdt_asm2( .size _.stapsdt.base, 1) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.endif) \ + :: __usdt_asm_ops(__VA_ARGS__) \ + ); \ +} while (0) + +/* + * NB: gdb PR24541 highlighted an unspecified corner of the sdt.h + * operand note format. + * + * The named register may be a longer or shorter (!) alias for the + * storage where the value in question is found. For example, on + * i386, 64-bit value may be put in register pairs, and a register + * name stored would identify just one of them. Previously, gcc was + * asked to emit the %w[id] (16-bit alias of some registers holding + * operands), even when a wider 32-bit value was used. + * + * Bottom line: the byte-width given before the @ sign governs. If + * there is a mismatch between that width and that of the named + * register, then a sys/sdt.h note consumer may need to employ + * architecture-specific heuristics to figure out where the compiler + * has actually put the complete value. + */ +#if defined(__powerpc__) || defined(__powerpc64__) +#define __usdt_argref(id) %I[id]%[id] +#elif defined(__i386__) +#define __usdt_argref(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */ +#else +#define __usdt_argref(id) %[id] +#endif + +#define __usdt_asm_arg(n) __usdt_asm_str(%c[__usdt_asz##n]) \ + __usdt_asm1(.ascii "@") \ + __usdt_asm_str(__usdt_argref(__usdt_aval##n)) + +#define __usdt_asm_args0 /* no arguments */ +#define __usdt_asm_args1 __usdt_asm_arg(1) +#define __usdt_asm_args2 __usdt_asm_args1 __usdt_asm1(.ascii " ") __usdt_asm_arg(2) +#define __usdt_asm_args3 __usdt_asm_args2 __usdt_asm1(.ascii " ") __usdt_asm_arg(3) +#define __usdt_asm_args4 __usdt_asm_args3 __usdt_asm1(.ascii " ") __usdt_asm_arg(4) +#define __usdt_asm_args5 __usdt_asm_args4 __usdt_asm1(.ascii " ") __usdt_asm_arg(5) +#define __usdt_asm_args6 __usdt_asm_args5 __usdt_asm1(.ascii " ") __usdt_asm_arg(6) +#define __usdt_asm_args7 __usdt_asm_args6 __usdt_asm1(.ascii " ") __usdt_asm_arg(7) +#define __usdt_asm_args8 __usdt_asm_args7 __usdt_asm1(.ascii " ") __usdt_asm_arg(8) +#define __usdt_asm_args9 __usdt_asm_args8 __usdt_asm1(.ascii " ") __usdt_asm_arg(9) +#define __usdt_asm_args10 __usdt_asm_args9 __usdt_asm1(.ascii " ") __usdt_asm_arg(10) +#define __usdt_asm_args11 __usdt_asm_args10 __usdt_asm1(.ascii " ") __usdt_asm_arg(11) +#define __usdt_asm_args12 __usdt_asm_args11 __usdt_asm1(.ascii " ") __usdt_asm_arg(12) +#define __usdt_asm_args(...) __usdt_apply(__usdt_asm_args, __usdt_narg(__VA_ARGS__)) + +#define __usdt_is_arr(x) (__builtin_classify_type(x) == 14 || __builtin_classify_type(x) == 5) +#define __usdt_arg_size(x) (__usdt_is_arr(x) ? sizeof(void *) : sizeof(x)) + +/* + * We can't use __builtin_choose_expr() in C++, so fall back to table-based + * signedness determination for known types, utilizing templates magic. + */ +#ifdef __cplusplus + +#define __usdt_is_signed(x) (!__usdt_is_arr(x) && __usdt_t<__typeof(x)>::is_signed) + +#include + +template struct __usdt_t { static const bool is_signed = false; }; +template struct __usdt_t : public __usdt_t {}; +template struct __usdt_t : public __usdt_t {}; + +#define __usdt_def_signed(T) \ +template<> struct __usdt_t { static const bool is_signed = true; }; \ +template<> struct __usdt_t { static const bool is_signed = true; }; \ +template<> struct __usdt_t { static const bool is_signed = true; }; \ +template<> struct __usdt_t { static const bool is_signed = true; } +#define __usdt_maybe_signed(T) \ +template<> struct __usdt_t { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t { static const bool is_signed = (T)-1 < (T)1; } + +__usdt_def_signed(signed char); +__usdt_def_signed(short); +__usdt_def_signed(int); +__usdt_def_signed(long); +__usdt_def_signed(long long); +__usdt_maybe_signed(char); +__usdt_maybe_signed(wchar_t); + +#else /* !__cplusplus */ + +#define __usdt_is_inttype(x) (__builtin_classify_type(x) >= 1 && __builtin_classify_type(x) <= 4) +#define __usdt_inttype(x) __typeof(__builtin_choose_expr(__usdt_is_inttype(x), (x), 0U)) +#define __usdt_is_signed(x) ((__usdt_inttype(x))-1 < (__usdt_inttype(x))1) + +#endif /* __cplusplus */ + +#define __usdt_asm_op(n, x) \ + [__usdt_asz##n] "n" ((__usdt_is_signed(x) ? (int)-1 : 1) * (int)__usdt_arg_size(x)), \ + [__usdt_aval##n] __usdt_str(USDT_ARG_CONSTRAINT)(x) + +#define __usdt_asm_ops0() [__usdt_dummy] "g" (0) +#define __usdt_asm_ops1(x) __usdt_asm_op(1, x) +#define __usdt_asm_ops2(a,x) __usdt_asm_ops1(a), __usdt_asm_op(2, x) +#define __usdt_asm_ops3(a,b,x) __usdt_asm_ops2(a,b), __usdt_asm_op(3, x) +#define __usdt_asm_ops4(a,b,c,x) __usdt_asm_ops3(a,b,c), __usdt_asm_op(4, x) +#define __usdt_asm_ops5(a,b,c,d,x) __usdt_asm_ops4(a,b,c,d), __usdt_asm_op(5, x) +#define __usdt_asm_ops6(a,b,c,d,e,x) __usdt_asm_ops5(a,b,c,d,e), __usdt_asm_op(6, x) +#define __usdt_asm_ops7(a,b,c,d,e,f,x) __usdt_asm_ops6(a,b,c,d,e,f), __usdt_asm_op(7, x) +#define __usdt_asm_ops8(a,b,c,d,e,f,g,x) __usdt_asm_ops7(a,b,c,d,e,f,g), __usdt_asm_op(8, x) +#define __usdt_asm_ops9(a,b,c,d,e,f,g,h,x) __usdt_asm_ops8(a,b,c,d,e,f,g,h), __usdt_asm_op(9, x) +#define __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,x) __usdt_asm_ops9(a,b,c,d,e,f,g,h,i), __usdt_asm_op(10, x) +#define __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,x) __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,j), __usdt_asm_op(11, x) +#define __usdt_asm_ops12(a,b,c,d,e,f,g,h,i,j,k,x) __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,k), __usdt_asm_op(12, x) +#define __usdt_asm_ops(...) __usdt_apply(__usdt_asm_ops, __usdt_narg(__VA_ARGS__))(__VA_ARGS__) + +#endif /* __USDT_H */ diff --git a/lib/util.c b/lib/util.c index 956c3d2e9..2914f1cfc 100644 --- a/lib/util.c +++ b/lib/util.c @@ -1,3 +1,12 @@ + +#include "fuse_config.h" + +#ifdef HAVE_PTHREAD_SETNAME_NP +#define _GNU_SOURCE +#include +#endif + +#include #include #include @@ -33,3 +42,12 @@ int libfuse_strtol(const char *str, long *res) return 0; } +void fuse_set_thread_name(const char *name) +{ +#ifdef HAVE_PTHREAD_SETNAME_NP + pthread_setname_np(pthread_self(), name); +#else + (void)name; +#endif +} + diff --git a/lib/util.h b/lib/util.h index f24401a29..107a2bfdd 100644 --- a/lib/util.h +++ b/lib/util.h @@ -12,6 +12,7 @@ struct fuse_conn_info; int libfuse_strtol(const char *str, long *res); +void fuse_set_thread_name(const char *name); /** * Return the low bits of a number @@ -30,7 +31,7 @@ static inline uint64_t fuse_higher_32_bits(uint64_t nr) } #ifndef FUSE_VAR_UNUSED -#define FUSE_VAR_UNUSED(var) (__attribute__((unused)) var) +#define FUSE_VAR_UNUSED __attribute__((__unused__)) #endif #define container_of(ptr, type, member) \ @@ -39,4 +40,10 @@ static inline uint64_t fuse_higher_32_bits(uint64_t nr) ((type *)(__mptr - offsetof(type, member))); \ }) +#if __has_attribute(__fallthrough__) +#define fallthrough __attribute__((__fallthrough__)) +#else +#define fallthrough do {} while (0) #endif + +#endif /* FUSE_UTIL_H_ */ diff --git a/make_release_tarball.sh b/make_release_tarball.sh index ce2fe16c5..45438a5cd 100755 --- a/make_release_tarball.sh +++ b/make_release_tarball.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/sh -x # # Create tarball from Git tag, removing and adding # some files. diff --git a/meson.build b/meson.build index 32795fc8a..f0570dc2c 100644 --- a/meson.build +++ b/meson.build @@ -1,11 +1,10 @@ -project('libfused', - ['c'], - version: '1.1.0', # synced with fuse-3.17.4 +project('libfuse3', ['c'], + version: '3.18.2', meson_version: '>= 0.60.0', default_options: [ 'buildtype=debugoptimized', 'c_std=gnu11', - 'cpp_std=c++17', + 'cpp_std=c++20', 'warning_level=2', ]) @@ -74,7 +73,7 @@ private_cfg.set_quoted('PACKAGE_VERSION', meson.project_version()) # Test for presence of some functions test_funcs = [ 'fork', 'fstatat', 'openat', 'readlinkat', 'pipe2', 'splice', 'vmsplice', 'posix_fallocate', 'fdatasync', - 'utimensat', 'copy_file_range', 'fallocate' ] + 'utimensat', 'copy_file_range', 'fallocate', 'fspacectl' ] foreach func : test_funcs private_cfg.set('HAVE_' + func.to_upper(), cc.has_function(func, prefix: include_default, args: args_default)) @@ -93,11 +92,32 @@ special_funcs = { 'close_range': ''' #include #include + #ifdef linux #include + #endif int main(void) { unsigned int flags = CLOSE_RANGE_UNSHARE; return close_range(3, ~0U, flags); } + ''', + 'listmount': ''' + #include + #include + #include + #include + + int main(int argc, char *argv[]) { + struct mnt_id_req req = { + .size = sizeof(struct mnt_id_req), + .mnt_id = LSMT_ROOT, + }; + uint64_t mnt_ids[1]; + + int n = syscall(SYS_listmount, &req, &mnt_ids, 1, 0); + if (n == -1) { + return -1; + } + } ''' } @@ -107,6 +127,13 @@ foreach name, code : special_funcs name: name + ' check')) endforeach +# Headers checks +test_headers = [ 'sys/xattr.h', 'linux/limits.h' ] +foreach header : test_headers + private_cfg.set('HAVE_' + header.underscorify().to_upper(), + cc.has_header(header)) +endforeach + # Regular function checks private_cfg.set('HAVE_SETXATTR', cc.has_function('setxattr', prefix: '#include ')) @@ -114,6 +141,8 @@ private_cfg.set('HAVE_ICONV', cc.has_function('iconv', prefix: '#include ')) private_cfg.set('HAVE_BACKTRACE', cc.has_function('backtrace', prefix: '#include ')) +private_cfg.set('HAVE_STATX', + cc.has_function('statx', prefix : '#define _GNU_SOURCE\n#include ')) # Struct member checks private_cfg.set('HAVE_STRUCT_STAT_ST_ATIM', @@ -125,6 +154,32 @@ private_cfg.set('HAVE_STRUCT_STAT_ST_ATIMESPEC', prefix: include_default + '#include ', args: args_default)) +private_cfg.set('USDT_ENABLED', get_option('enable-usdt')) + +# Check for liburing with SQE128 support +code = ''' +#include +#include +int main(void) { + struct io_uring ring; + int ret = io_uring_queue_init(1, &ring, 0); +#ifndef IORING_SETUP_SQE128 +#error "No SQE128 support" +#endif + return ret; +}''' + +liburing = dependency('liburing', required: false) +libnuma = dependency('numa', required: false) + +if get_option('enable-io-uring') and liburing.found() and libnuma.found() + if cc.links(code, + name: 'liburing linking and SQE128 support', + dependencies: [liburing]) + private_cfg.set('HAVE_URING', true) + endif +endif + # # Compiler configuration # @@ -247,7 +302,7 @@ configure_file(output: 'fuse_config.h', configuration : private_cfg) # symbol (define) conflicts configure_file(output: 'libfuse_config.h', configuration : public_cfg, - install: true, install_dir: join_paths(get_option('includedir'), 'fused')) + install: true, install_dir: join_paths(get_option('includedir'), 'fuse3')) # '.' will refer to current build directory, which contains config.h include_dirs = include_directories('include', 'lib', '.') diff --git a/meson_options.txt b/meson_options.txt index fa4749c72..c1f8fe694 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -22,3 +22,8 @@ option('tests', type : 'boolean', value : true, option('disable-libc-symbol-version', type : 'boolean', value : false, description: 'Disable versioned symbols through libc') +option('enable-usdt', type : 'boolean', value : false, + description: 'Enable user statically defined tracepoints for extra observability') + +option('enable-io-uring', type: 'boolean', value: true, + description: 'Enable fuse-over-io-uring support') diff --git a/signify/fuse-3.19.pub b/signify/fuse-3.19.pub new file mode 100644 index 000000000..317f8f78f --- /dev/null +++ b/signify/fuse-3.19.pub @@ -0,0 +1,2 @@ +untrusted comment: signify public key +RWR4cEcMGJhD3Dnd3NOeJck3WiuVt9A7mrkq+nQYwrwwmMdDDAan/YiU diff --git a/test/ci-build.sh b/test/ci-build.sh index 40bb79e8d..2bced379c 100755 --- a/test/ci-build.sh +++ b/test/ci-build.sh @@ -118,6 +118,13 @@ sanitized_build() sudo rm -fr ${PREFIX_DIR} ) +# Sanitized with io-uring +export CC=clang +export CXX=clang++ +export FUSE_URING_ENABLE=1 +sanitized_build +unset FUSE_URING_ENABLE + # 32-bit sanitized build export CC=clang export CXX=clang++ @@ -146,6 +153,12 @@ export CC=clang export CXX=clang++ sanitized_build "-Ddisable-libc-symbol-version=true" +# Sanitized build without fuse-io-uring +export CC=clang +export CXX=clang++ +sanitized_build "-Denable-io-uring=false" + +# Build without any sanitizer non_sanitized_build # Documentation. diff --git a/test/conftest.py b/test/conftest.py index f52818928..291c9199b 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -77,8 +77,11 @@ def _check(self): cp = re.compile(pattern, re.IGNORECASE | re.MULTILINE) hit = cp.search(buf) if hit: - raise AssertionError('Suspicious output to stderr (matched "%s")' - % hit.group(0)) + # Skip FUSE error messages in the format "unique: X, error: -Y (...), outsize: Z" + # These are no errors, but just fuse debug messages with the return code + if re.search(r'unique: \d+, error: -\d+ \(.*\), outsize: \d+', hit.group(0)): + continue + raise AssertionError(f'Suspicious output to stderr (matched "{hit.group(0)}")') @pytest.fixture() def output_checker(request): diff --git a/test/hello.c b/test/hello.c index a07df0e48..cf576689b 100644 --- a/test/hello.c +++ b/test/hello.c @@ -3,7 +3,7 @@ * Copyright (C) 2001-2007 Miklos Szeredi * * This program can be distributed under the terms of the GNU GPLv2. - * See the file COPYING. + * See the file GPL2.txt. */ /** @file diff --git a/test/meson.build b/test/meson.build index a9e339fc1..9f6f409cc 100644 --- a/test/meson.build +++ b/test/meson.build @@ -19,6 +19,9 @@ td += executable('release_unlink_race', 'release_unlink_race.c', td += executable('test_want_conversion', 'test_want_conversion.c', dependencies: [ libfuse_dep ], install: false) +td += executable('test_signals', 'test_signals.c', + dependencies: [ libfuse_dep, thread_dep ], + install: false) td += executable('test_abi', 'test_abi.c', dependencies: [ libfuse_dep ], install: false) diff --git a/test/release_unlink_race.c b/test/release_unlink_race.c index 2edb20044..f7b7b81be 100644 --- a/test/release_unlink_race.c +++ b/test/release_unlink_race.c @@ -1,6 +1,6 @@ /* This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ #define FUSE_USE_VERSION 31 diff --git a/test/test_ctests.py b/test/test_ctests.py index 36b5ff347..b3863e058 100644 --- a/test/test_ctests.py +++ b/test/test_ctests.py @@ -148,3 +148,14 @@ def test_notify_file_size(tmpdir, notify, output_checker): logger.error(f"Failure in unmount: '{' '.join(cmdline)}'") cleanup(mount_process, mnt_dir) logger.debug("Unmount completed") + +def test_signals(output_checker): + """Test for proper signal handling (issue #1182)""" + logger = logging.getLogger(__name__) + logger.debug("Testing signal handling") + cmdline = [ pjoin(basename, 'test', 'test_signals') ] + logger.debug(f"Command line: {' '.join(cmdline)}") + subprocess.run(cmdline, stdout=output_checker.fd, \ + stderr=output_checker.fd, timeout=10, check=True) + logger.debug("Signal handling test completed successfully") + diff --git a/test/test_examples.py b/test/test_examples.py index 9c8b77eec..05efa0993 100755 --- a/test/test_examples.py +++ b/test/test_examples.py @@ -27,6 +27,11 @@ fuse_proto, fuse_caps, powerset, parse_kernel_version) from os.path import join as pjoin import logging +from enum import Enum + +class InodeCheck(Enum): + EXACT = 1 + NONZERO = 2 pytestmark = fuse_test_marker() @@ -138,7 +143,7 @@ def test_hello(tmpdir, name, options, cmdline_builder, output_checker): @pytest.mark.parametrize("writeback", (False, True)) @pytest.mark.parametrize("name", ('passthrough', 'passthrough_plus', - 'passthrough_fh', 'passthrough_ll')) + 'passthrough_fh', 'passthrough_ll', 'passthrough_zero_ino')) @pytest.mark.parametrize("debug", (False, True)) def test_passthrough(short_tmpdir, name, debug, output_checker, writeback): # Avoid false positives from libfuse debug messages @@ -153,14 +158,30 @@ def test_passthrough(short_tmpdir, name, debug, output_checker, writeback): mnt_dir = str(short_tmpdir.mkdir('mnt')) src_dir = str(short_tmpdir.mkdir('src')) + inode_check = InodeCheck.EXACT if name == 'passthrough_plus': cmdline = base_cmdline + \ [ pjoin(basename, 'example', 'passthrough'), '--plus', '-f', mnt_dir ] - else: + elif name == 'passthrough_zero_ino': + cmdline = base_cmdline + \ + [ pjoin(basename, 'example', 'passthrough'), + '--plus', '--readdir-zero-inodes', '-f', mnt_dir ] + inode_check = InodeCheck.NONZERO + elif name == 'passthrough_ll': + cmdline = base_cmdline + \ + [ pjoin(basename, 'example', name), + '-f', mnt_dir, '-o', 'timeout=0' ] + else: # passthrough and passthrough_fh cmdline = base_cmdline + \ [ pjoin(basename, 'example', name), '-f', mnt_dir ] + + # Set all timeouts to 0 for everything except passthrough_ll + # (this includes passthrough, passthrough_plus, and passthrough_fh) + if name != 'passthrough_ll': + cmdline.extend(['-o', 'entry_timeout=0,negative_timeout=0,attr_timeout=0,ac_attr_timeout=0']) + if debug: cmdline.append('-d') @@ -169,7 +190,9 @@ def test_passthrough(short_tmpdir, name, debug, output_checker, writeback): pytest.skip('example does not support writeback caching') cmdline.append('-o') cmdline.append('writeback') - + + print(f"\nDebug: Command line: {' '.join(cmdline)}") + mount_process = subprocess.Popen(cmdline, stdout=output_checker.fd, stderr=output_checker.fd) try: @@ -177,12 +200,12 @@ def test_passthrough(short_tmpdir, name, debug, output_checker, writeback): work_dir = mnt_dir + src_dir tst_statvfs(work_dir) - tst_readdir(src_dir, work_dir) - tst_readdir_big(src_dir, work_dir) + tst_readdir(src_dir, work_dir, inode_check) + tst_readdir_big(src_dir, work_dir, inode_check) tst_open_read(src_dir, work_dir) tst_open_write(src_dir, work_dir) tst_create(work_dir) - tst_passthrough(src_dir, work_dir) + tst_passthrough(src_dir, work_dir, inode_check) tst_append(src_dir, work_dir) tst_seek(src_dir, work_dir) tst_mkdir(work_dir) @@ -195,7 +218,8 @@ def test_passthrough(short_tmpdir, name, debug, output_checker, writeback): # Underlying fs may not have full nanosecond resolution tst_utimens(work_dir, ns_tol=1000) - tst_link(work_dir) + if inode_check == InodeCheck.EXACT: + tst_link(work_dir) tst_truncate_path(work_dir) tst_truncate_fd(work_dir) tst_open_unlink(work_dir) @@ -364,7 +388,8 @@ def test_fn(name): @pytest.mark.skipif(fuse_proto < (7,12), reason='not supported by running kernel') -@pytest.mark.parametrize("only_expire", ("invalidate_entries", "expire_entries")) +@pytest.mark.parametrize("only_expire", ("invalidate_entries", + "expire_entries", "inc_epoch")) @pytest.mark.parametrize("notify", (True, False)) def test_notify_inval_entry(tmpdir, only_expire, notify, output_checker): mnt_dir = str(tmpdir) @@ -378,6 +403,10 @@ def test_notify_inval_entry(tmpdir, only_expire, notify, output_checker): cmdline.append('--only-expire') if "FUSE_CAP_EXPIRE_ONLY" not in fuse_caps: pytest.skip('only-expire not supported by running kernel') + elif only_expire == "inc_epoch": + cmdline.append('--inc-epoch') + if fuse_proto < (7,44): + pytest.skip('inc-epoch not supported by running kernel') mount_process = subprocess.Popen(cmdline, stdout=output_checker.fd, stderr=output_checker.fd) try: @@ -740,7 +769,17 @@ def tst_link(mnt_dir): os.unlink(name1) -def tst_readdir(src_dir, mnt_dir): +def tst_inodes_nonzero(lines): + inode_nums = [int(line.split()[0]) for line in lines] + assert all(i != 0 for i in inode_nums), inode_nums + +def tst_inode(inode_check, actual, expected): + if inode_check == InodeCheck.EXACT: + assert expected == actual + elif inode_check == InodeCheck.NONZERO: + assert actual != 0 + +def tst_readdir(src_dir, mnt_dir, inode_check=InodeCheck.EXACT): newdir = name_generator() src_newdir = pjoin(src_dir, newdir) @@ -761,16 +800,18 @@ def tst_readdir(src_dir, mnt_dir): assert listdir_is == listdir_should inodes_is = readdir_inode(mnt_newdir) - inodes_should = readdir_inode(src_newdir) - assert inodes_is == inodes_should + if inode_check == InodeCheck.EXACT: + inodes_should = readdir_inode(src_newdir) + assert inodes_is == inodes_should + elif inode_check == InodeCheck.NONZERO: + tst_inodes_nonzero(inodes_is) os.unlink(file_) os.unlink(subfile) os.rmdir(subdir) os.rmdir(src_newdir) -def tst_readdir_big(src_dir, mnt_dir): - +def tst_readdir_big(src_dir, mnt_dir, inode_check=InodeCheck.EXACT): # Add enough entries so that readdir needs to be called # multiple times. fnames = [] @@ -786,13 +827,17 @@ def tst_readdir_big(src_dir, mnt_dir): assert listdir_is == listdir_should inodes_is = readdir_inode(mnt_dir) - inodes_should = readdir_inode(src_dir) - assert inodes_is == inodes_should + if inode_check == InodeCheck.EXACT: + inodes_should = readdir_inode(src_dir) + assert inodes_is == inodes_should + elif inode_check == InodeCheck.NONZERO: + tst_inodes_nonzero(inodes_is) for fname in fnames: + # A comment just to get a diff stat_src = os.stat(pjoin(src_dir, fname)) stat_mnt = os.stat(pjoin(mnt_dir, fname)) - assert stat_src.st_ino == stat_mnt.st_ino + tst_inode(inode_check, stat_mnt.st_ino, stat_src.st_ino) assert stat_src.st_mtime == stat_mnt.st_mtime assert stat_src.st_ctime == stat_mnt.st_ctime assert stat_src.st_size == stat_mnt.st_size @@ -868,28 +913,61 @@ def tst_utimens(mnt_dir, ns_tol=0): assert abs(fstat.st_atime_ns - atime_ns) <= ns_tol assert abs(fstat.st_mtime_ns - mtime_ns) <= ns_tol -def tst_passthrough(src_dir, mnt_dir): +def tst_passthrough(src_dir, mnt_dir, inode_check=InodeCheck.EXACT): name = name_generator() src_name = pjoin(src_dir, name) - mnt_name = pjoin(src_dir, name) + mnt_name = pjoin(mnt_dir, name) + + print(f"\nDebug: Creating file {name}") + print(f"Debug: src_name={src_name}") + print(f"Debug: mnt_name={mnt_name}") + + # First test: write to source directory assert name not in os.listdir(src_dir) assert name not in os.listdir(mnt_dir) with open(src_name, 'w') as fh: fh.write('Hello, world') + + print(f"Debug: File written to src_name") + + start_time = time.time() + while time.time() - start_time < 10: # 10 second timeout + if name in os.listdir(mnt_dir): + break + print(f"Debug: Waiting for file to appear... ({time.time() - start_time:.1f}s)") + time.sleep(0.1) + else: + pytest.fail("File did not appear in mount directory within 10 seconds") + assert name in os.listdir(src_dir) assert name in os.listdir(mnt_dir) - assert os.stat(src_name) == os.stat(mnt_name) + # Compare relevant stat attributes + src_stat = os.stat(src_name) + mnt_stat = os.stat(mnt_name) + assert src_stat.st_mode == mnt_stat.st_mode + tst_inode(inode_check, mnt_stat.st_ino, src_stat.st_ino) + assert src_stat.st_size == mnt_stat.st_size + assert src_stat.st_mtime == mnt_stat.st_mtime + + # Second test: write to mount directory name = name_generator() src_name = pjoin(src_dir, name) - mnt_name = pjoin(src_dir, name) + mnt_name = pjoin(mnt_dir, name) assert name not in os.listdir(src_dir) assert name not in os.listdir(mnt_dir) with open(mnt_name, 'w') as fh: fh.write('Hello, world') assert name in os.listdir(src_dir) assert name in os.listdir(mnt_dir) - assert os.stat(src_name) == os.stat(mnt_name) + + # Compare relevant stat attributes + src_stat = os.stat(src_name) + mnt_stat = os.stat(mnt_name) + assert src_stat.st_mode == mnt_stat.st_mode + tst_inode(inode_check, mnt_stat.st_ino, src_stat.st_ino) + assert src_stat.st_size == mnt_stat.st_size + assert abs(src_stat.st_mtime - mnt_stat.st_mtime) < 0.01 def tst_xattr(path): diff --git a/test/test_setattr.c b/test/test_setattr.c index ac552644c..ed0e93dc5 100644 --- a/test/test_setattr.c +++ b/test/test_setattr.c @@ -3,7 +3,7 @@ Copyright (C) 2016 Nikolaus Rath This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ diff --git a/test/test_signals.c b/test/test_signals.c new file mode 100644 index 000000000..3ce7fbd32 --- /dev/null +++ b/test/test_signals.c @@ -0,0 +1,202 @@ +/* + * FUSE: Filesystem in Userspace + * Copyright (C) 2025 Bernd Schubert + * + * Test for signal handling in libfuse. + * + * This program can be distributed under the terms of the GNU LGPLv2. + * See the file GPL2.txt + */ + +#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 17) + +#include "fuse_config.h" +#include "fuse_lowlevel.h" +#include "fuse_i.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void test_ll_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + (void)parent; + (void)name; + /* Simulate slow lookup to test signal interruption */ + sleep(2); + fuse_reply_err(req, ENOENT); +} + +static void test_ll_getattr(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + (void)ino; + (void)fi; + /* Simulate slow getattr to test signal interruption */ + sleep(2); + fuse_reply_err(req, ENOENT); +} + +static const struct fuse_lowlevel_ops test_ll_ops = { + .lookup = test_ll_lookup, + .getattr = test_ll_getattr, +}; + +static void *signal_sender_thread(void *arg) +{ + (void)arg; + + usleep(2 * 1000 * 1000); + + /* Send SIGTERM to the process */ + kill(getpid(), SIGTERM); + return NULL; +} + +static void fork_child(void) +{ + struct fuse_args args = FUSE_ARGS_INIT(0, NULL); + struct fuse_session *se; + struct fuse_loop_config *loop_config; + pthread_t sig_thread; + char *mountpoint = NULL; + int ret = -1; + + /* Add the program name to arg[0] */ + if (fuse_opt_add_arg(&args, "test_signals")) { + fprintf(stderr, "Failed to add argument\n"); + goto out_free_mountpoint; + } + + /* Add debug flag to see more output */ + fuse_opt_add_arg(&args, "-d"); + + /* Create temporary mount point */ + mountpoint = strdup("/tmp/fuse_test_XXXXXX"); + if (!mountpoint || !mkdtemp(mountpoint)) { + fprintf(stderr, "Failed to create temp dir\n"); + goto out_free_args; + } + + /* Create session */ + se = fuse_session_new(&args, &test_ll_ops, sizeof(test_ll_ops), NULL); + if (!se) { + fprintf(stderr, "Failed to create FUSE session\n"); + goto out_free_mountpoint; + } + + /* Mount filesystem */ + if (fuse_session_mount(se, mountpoint)) { + fprintf(stderr, "Failed to mount FUSE filesystem\n"); + goto out_destroy_session; + } + + /* Create loop config */ + loop_config = fuse_loop_cfg_create(); + if (!loop_config) { + fprintf(stderr, "Failed to create loop config\n"); + goto out_unmount; + } + fuse_loop_cfg_set_clone_fd(loop_config, 0); + fuse_loop_cfg_set_max_threads(loop_config, 2); + + /* Set up signal handlers */ + if (fuse_set_signal_handlers(se)) { + fprintf(stderr, "Failed to set up signal handlers\n"); + goto out_destroy_config; + } + + /* Create thread that will send signals */ + if (pthread_create(&sig_thread, NULL, signal_sender_thread, NULL)) { + fprintf(stderr, "Failed to create signal sender thread\n"); + goto out_remove_handlers; + } + + /* Enter FUSE loop */ + ret = fuse_session_loop_mt_312(se, loop_config); + + printf("Debug: fuse_session_loop_mt_312 returned %d\n", ret); + printf("Debug: session exited state: %d\n", fuse_session_exited(se)); + printf("Debug: session status: %d\n", se->error); + + /* Check exit status before cleanup */ + int clean_exit = (fuse_session_exited(se) && se->error == SIGTERM); + + /* Clean up */ + pthread_join(sig_thread, NULL); + fuse_remove_signal_handlers(se); + fuse_session_unmount(se); + fuse_session_destroy(se); + fuse_loop_cfg_destroy(loop_config); + rmdir(mountpoint); + free(mountpoint); + fuse_opt_free_args(&args); + + /* Use saved exit status */ + if (clean_exit) { + printf("Debug: Clean shutdown via SIGTERM\n"); + exit(0); + } + printf("Debug: Exiting with status %d\n", ret != 0); + exit(ret != 0); + +out_remove_handlers: + fuse_remove_signal_handlers(se); +out_destroy_config: + fuse_loop_cfg_destroy(loop_config); +out_unmount: + fuse_session_unmount(se); +out_destroy_session: + fuse_session_destroy(se); +out_free_mountpoint: + rmdir(mountpoint); + free(mountpoint); +out_free_args: + fuse_opt_free_args(&args); + exit(1); +} + +static void run_test_in_child(void) +{ + pid_t child; + int status; + + child = fork(); + if (child == -1) { + perror("fork"); + exit(1); + } + + if (child == 0) + fork_child(); + + /* In parent process */ + if (waitpid(child, &status, 0) == -1) { + perror("waitpid"); + exit(1); + } + + /* Check if child exited due to SIGTERM - this is expected */ + if (WIFSIGNALED(status) && WTERMSIG(status) == SIGTERM) { + printf("Child process terminated by SIGTERM as expected\n"); + exit(0); + } + + /* For any other type of exit, maintain existing behavior */ + exit(WIFEXITED(status) ? WEXITSTATUS(status) : 1); +} + +int main(void) +{ + printf("Testing SIGTERM handling in libfuse\n"); + run_test_in_child(); + printf("SIGTERM handling test passed\n"); + return 0; +} diff --git a/test/test_syscalls.c b/test/test_syscalls.c index 4bbe97340..61ee953b9 100644 --- a/test/test_syscalls.c +++ b/test/test_syscalls.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -921,6 +922,53 @@ static int test_copy_file_range(void) } #endif +#ifdef HAVE_STATX +static int test_statx(void) +{ + struct statx sb; + char msg[] = "hi"; + size_t msg_size = sizeof(msg); + struct timespec tp; + int res; + + memset(&sb, 0, sizeof(sb)); + unlink(testfile); + + start_test("statx"); + + res = create_testfile(testfile, msg, msg_size); + if (res == -1) + return -1; + + res = statx(-1, testfile, AT_EMPTY_PATH, + STATX_BASIC_STATS | STATX_BTIME, &sb); + if (res == -1) + return -1; + + if (sb.stx_size != msg_size) + return -1; + + clock_gettime(CLOCK_REALTIME, &tp); + + if (sb.stx_btime.tv_sec > tp.tv_sec) + return -1; + + if (sb.stx_btime.tv_sec == tp.tv_sec && + sb.stx_btime.tv_nsec >= tp.tv_nsec) + return -1; + + unlink(testfile); + + success(); + return 0; +} +#else +static int test_statx(void) +{ + return 0; +} +#endif + static int test_utime(void) { struct utimbuf utm; @@ -2179,6 +2227,7 @@ int main(int argc, char *argv[]) err += test_create_ro_dir(O_CREAT | O_WRONLY); err += test_create_ro_dir(O_CREAT | O_TRUNC); err += test_copy_file_range(); + err += test_statx(); #ifndef __FreeBSD__ err += test_create_tmpfile(); err += test_create_and_link_tmpfile(); diff --git a/test/test_write_cache.c b/test/test_write_cache.c index 9f21f02fb..00db5a65b 100644 --- a/test/test_write_cache.c +++ b/test/test_write_cache.c @@ -3,10 +3,9 @@ Copyright (C) 2016 Nikolaus Rath This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ - #define FUSE_USE_VERSION 30 /* Not really needed - just to test build with FUSE_USE_VERSION == 30 */ @@ -37,24 +36,22 @@ /* Command line parsing */ struct options { - int writeback; - int data_size; - int delay_ms; + int writeback; + int data_size; + int delay_ms; } options = { - .writeback = 0, - .data_size = 2048, - .delay_ms = 0, + .writeback = 0, + .data_size = 2048, + .delay_ms = 0, }; #define WRITE_SYSCALLS 64 -#define OPTION(t, p) \ - { t, offsetof(struct options, p), 1 } +#define OPTION(t, p) { t, offsetof(struct options, p), 1 } static const struct fuse_opt option_spec[] = { - OPTION("writeback_cache", writeback), - OPTION("--data-size=%d", data_size), - OPTION("--delay_ms=%d", delay_ms), - FUSE_OPT_END + OPTION("writeback_cache", writeback), + OPTION("--data-size=%d", data_size), OPTION("--delay_ms=%d", delay_ms), + FUSE_OPT_END }; static int got_write; static atomic_int write_cnt; @@ -63,229 +60,241 @@ pthread_cond_t cond = PTHREAD_COND_INITIALIZER; pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; static int write_start, write_done; -static void tfs_init (void *userdata, struct fuse_conn_info *conn) +static void tfs_init(void *userdata, struct fuse_conn_info *conn) { - (void) userdata; + (void)userdata; - if(options.writeback) { - assert(fuse_get_feature_flag(conn, FUSE_CAP_WRITEBACK_CACHE)); - fuse_set_feature_flag(conn, FUSE_CAP_WRITEBACK_CACHE); - } + if (options.writeback) { + assert(fuse_get_feature_flag(conn, FUSE_CAP_WRITEBACK_CACHE)); + fuse_set_feature_flag(conn, FUSE_CAP_WRITEBACK_CACHE); + } } -static int tfs_stat(fuse_ino_t ino, struct stat *stbuf) { - stbuf->st_ino = ino; - if (ino == FUSE_ROOT_ID) { - stbuf->st_mode = S_IFDIR | 0755; - stbuf->st_nlink = 1; - } - - else if (ino == FILE_INO) { - stbuf->st_mode = S_IFREG | 0222; - stbuf->st_nlink = 1; - stbuf->st_size = 0; - } - - else - return -1; - - return 0; +static int tfs_stat(fuse_ino_t ino, struct stat *stbuf) +{ + stbuf->st_ino = ino; + if (ino == FUSE_ROOT_ID) { + stbuf->st_mode = S_IFDIR | 0755; + stbuf->st_nlink = 1; + } + + else if (ino == FILE_INO) { + stbuf->st_mode = S_IFREG | 0222; + stbuf->st_nlink = 1; + stbuf->st_size = 0; + } + + else + return -1; + + return 0; } -static void tfs_lookup(fuse_req_t req, fuse_ino_t parent, - const char *name) { - struct fuse_entry_param e; - memset(&e, 0, sizeof(e)); +static void tfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + struct fuse_entry_param e; + + memset(&e, 0, sizeof(e)); - if (parent != FUSE_ROOT_ID) - goto err_out; - else if (strcmp(name, FILE_NAME) == 0) - e.ino = FILE_INO; - else - goto err_out; + if (parent != FUSE_ROOT_ID) + goto err_out; + else if (strcmp(name, FILE_NAME) == 0) + e.ino = FILE_INO; + else + goto err_out; - if (tfs_stat(e.ino, &e.attr) != 0) - goto err_out; - fuse_reply_entry(req, &e); - return; + if (tfs_stat(e.ino, &e.attr) != 0) + goto err_out; + fuse_reply_entry(req, &e); + return; err_out: - fuse_reply_err(req, ENOENT); + fuse_reply_err(req, ENOENT); } static void tfs_getattr(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) { - struct stat stbuf; + struct fuse_file_info *fi) +{ + struct stat stbuf; - (void) fi; + (void)fi; - memset(&stbuf, 0, sizeof(stbuf)); - if (tfs_stat(ino, &stbuf) != 0) - fuse_reply_err(req, ENOENT); - else - fuse_reply_attr(req, &stbuf, 5); + memset(&stbuf, 0, sizeof(stbuf)); + if (tfs_stat(ino, &stbuf) != 0) + fuse_reply_err(req, ENOENT); + else + fuse_reply_attr(req, &stbuf, 5); } -static void tfs_open(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) { - if (ino == FUSE_ROOT_ID) - fuse_reply_err(req, EISDIR); - else { - assert(ino == FILE_INO); - /* Test close(rofd) does not block waiting for pending writes */ - fi->noflush = !options.writeback && options.delay_ms && - (fi->flags & O_ACCMODE) == O_RDONLY; - fuse_reply_open(req, fi); - } +static void tfs_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) +{ + if (ino == FUSE_ROOT_ID) + fuse_reply_err(req, EISDIR); + else { + assert(ino == FILE_INO); + /* Test close(rofd) does not block waiting for pending writes */ + fi->noflush = !options.writeback && options.delay_ms && + (fi->flags & O_ACCMODE) == O_RDONLY; + fuse_reply_open(req, fi); + } } static void tfs_write(fuse_req_t req, fuse_ino_t ino, const char *buf, - size_t size, off_t off, struct fuse_file_info *fi) { - (void) fi; (void) buf; (void) off; - size_t expected; - - assert(ino == FILE_INO); - expected = options.data_size; - if(options.writeback) - expected *= 2; - - write_cnt++; - - if(size != expected && !options.writeback) - fprintf(stderr, "ERROR: Expected %zd bytes, got %zd\n!", - expected, size); - else - got_write = 1; - - /* Simulate waiting for pending writes */ - if (options.delay_ms) { - pthread_mutex_lock(&lock); - write_start = 1; - pthread_cond_signal(&cond); - pthread_mutex_unlock(&lock); - - usleep(options.delay_ms * 1000); - - pthread_mutex_lock(&lock); - write_done = 1; - pthread_cond_signal(&cond); - pthread_mutex_unlock(&lock); - } - - fuse_reply_write(req, size); + size_t size, off_t off, struct fuse_file_info *fi) +{ + (void)fi; + (void)buf; + (void)off; + size_t expected; + + assert(ino == FILE_INO); + expected = options.data_size; + if (options.writeback) + expected *= 2; + + write_cnt++; + + if (size != expected && !options.writeback) + fprintf(stderr, "ERROR: Expected %zd bytes, got %zd\n!", + expected, size); + else + got_write = 1; + + /* Simulate waiting for pending writes */ + if (options.delay_ms) { + pthread_mutex_lock(&lock); + write_start = 1; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&lock); + + usleep(options.delay_ms * 1000); + + pthread_mutex_lock(&lock); + write_done = 1; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&lock); + } + + fuse_reply_write(req, size); } static struct fuse_lowlevel_ops tfs_oper = { - .init = tfs_init, - .lookup = tfs_lookup, - .getattr = tfs_getattr, - .open = tfs_open, - .write = tfs_write, + .init = tfs_init, + .lookup = tfs_lookup, + .getattr = tfs_getattr, + .open = tfs_open, + .write = tfs_write, }; -static void* close_rofd(void *data) { - int rofd = (int)(long) data; +static void *close_rofd(void *data) +{ + int rofd = (int)(long)data; - /* Wait for first write to start */ - pthread_mutex_lock(&lock); - while (!write_start && !write_done) - pthread_cond_wait(&cond, &lock); - pthread_mutex_unlock(&lock); + /* Wait for first write to start */ + pthread_mutex_lock(&lock); + while (!write_start && !write_done) + pthread_cond_wait(&cond, &lock); + pthread_mutex_unlock(&lock); - close(rofd); - printf("rofd closed. write_start: %d write_done: %d\n", write_start, write_done); + close(rofd); + printf("rofd closed. write_start: %d write_done: %d\n", write_start, + write_done); - /* First write should not have been completed */ - if (write_done) - fprintf(stderr, "ERROR: close(rofd) blocked on write!\n"); + /* First write should not have been completed */ + if (write_done) + fprintf(stderr, "ERROR: close(rofd) blocked on write!\n"); - return NULL; + return NULL; } -static void* run_fs(void *data) { - struct fuse_session *se = (struct fuse_session*) data; - assert(fuse_session_loop(se) == 0); - return NULL; -} +static void *run_fs(void *data) +{ + struct fuse_session *se = (struct fuse_session *)data; -static void test_fs(char *mountpoint) { - char fname[PATH_MAX]; - char *buf; - const size_t iosize = options.data_size; - const size_t dsize = options.data_size * WRITE_SYSCALLS; - int fd, rofd; - pthread_t rofd_thread; - off_t off = 0; - - buf = malloc(dsize); - assert(buf != NULL); - assert((fd = open("/dev/urandom", O_RDONLY)) != -1); - assert(read(fd, buf, dsize) == dsize); - close(fd); - - assert(snprintf(fname, PATH_MAX, "%s/" FILE_NAME, - mountpoint) > 0); - fd = open(fname, O_WRONLY); - if (fd == -1) { - perror(fname); - assert(0); - } - - if (options.delay_ms) { - /* Verify that close(rofd) does not block waiting for pending writes */ - rofd = open(fname, O_RDONLY); - assert(pthread_create(&rofd_thread, NULL, close_rofd, (void *)(long)rofd) == 0); - /* Give close_rofd time to start */ - usleep(options.delay_ms * 1000); - } - - for (int cnt = 0; cnt < WRITE_SYSCALLS; cnt++) { - assert(pwrite(fd, buf + off, iosize, off) == iosize); - off += iosize; - assert(off <= dsize); - } - free(buf); - close(fd); - - if (options.delay_ms) { - printf("rwfd closed. write_start: %d write_done: %d\n", write_start, write_done); - assert(pthread_join(rofd_thread, NULL) == 0); - } + assert(fuse_session_loop(se) == 0); + return NULL; } -int main(int argc, char *argv[]) { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); - struct fuse_session *se; - struct fuse_cmdline_opts fuse_opts; - pthread_t fs_thread; +static void test_fs(char *mountpoint) +{ + char fname[PATH_MAX]; + char *buf; + const size_t iosize = options.data_size; + const size_t dsize = options.data_size * WRITE_SYSCALLS; + int fd, rofd; + pthread_t rofd_thread; + off_t off = 0; + + buf = malloc(dsize); + assert(buf != NULL); + assert((fd = open("/dev/urandom", O_RDONLY)) != -1); + assert(read(fd, buf, dsize) == dsize); + close(fd); + + assert(snprintf(fname, PATH_MAX, "%s/" FILE_NAME, mountpoint) > 0); + fd = open(fname, O_WRONLY); + if (fd == -1) { + perror(fname); + assert(0); + } + + if (options.delay_ms) { + /* Verify that close(rofd) does not block waiting for pending writes */ + rofd = open(fname, O_RDONLY); + assert(pthread_create(&rofd_thread, NULL, close_rofd, + (void *)(long)rofd) == 0); + /* Give close_rofd time to start */ + usleep(options.delay_ms * 1000); + } + + for (int cnt = 0; cnt < WRITE_SYSCALLS; cnt++) { + assert(pwrite(fd, buf + off, iosize, off) == iosize); + off += iosize; + assert(off <= dsize); + } + free(buf); + close(fd); + + if (options.delay_ms) { + printf("rwfd closed. write_start: %d write_done: %d\n", + write_start, write_done); + assert(pthread_join(rofd_thread, NULL) == 0); + } +} - assert(fuse_opt_parse(&args, &options, option_spec, NULL) == 0); - assert(fuse_parse_cmdline(&args, &fuse_opts) == 0); -#ifndef __FreeBSD__ - assert(fuse_opt_add_arg(&args, "-oauto_unmount") == 0); +int main(int argc, char *argv[]) +{ + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + struct fuse_session *se; + struct fuse_cmdline_opts fuse_opts; + pthread_t fs_thread; + + assert(fuse_opt_parse(&args, &options, option_spec, NULL) == 0); + assert(fuse_parse_cmdline(&args, &fuse_opts) == 0); +#ifndef __FreeBSD__ + assert(fuse_opt_add_arg(&args, "-oauto_unmount") == 0); #endif - se = fuse_session_new(&args, &tfs_oper, - sizeof(tfs_oper), NULL); - fuse_opt_free_args(&args); - assert (se != NULL); - assert(fuse_set_signal_handlers(se) == 0); - assert(fuse_session_mount(se, fuse_opts.mountpoint) == 0); + se = fuse_session_new(&args, &tfs_oper, sizeof(tfs_oper), NULL); + fuse_opt_free_args(&args); + assert(se != NULL); + assert(fuse_set_signal_handlers(se) == 0); + assert(fuse_session_mount(se, fuse_opts.mountpoint) == 0); - /* Start file-system thread */ - assert(pthread_create(&fs_thread, NULL, run_fs, (void *)se) == 0); + /* Start file-system thread */ + assert(pthread_create(&fs_thread, NULL, run_fs, (void *)se) == 0); - /* Write test data */ - test_fs(fuse_opts.mountpoint); - free(fuse_opts.mountpoint); + /* Write test data */ + test_fs(fuse_opts.mountpoint); + free(fuse_opts.mountpoint); - /* Stop file system */ - fuse_session_exit(se); - fuse_session_unmount(se); - assert(pthread_join(fs_thread, NULL) == 0); + /* Stop file system */ + fuse_session_exit(se); + fuse_session_unmount(se); + assert(pthread_join(fs_thread, NULL) == 0); - assert(got_write == 1); + assert(got_write == 1); - /* + /* * when writeback cache is enabled, kernel side can merge requests, but * memory pressure, system 'sync' might trigger data flushes before - flush * might happen in between write syscalls - merging subpage writes into @@ -293,19 +302,18 @@ int main(int argc, char *argv[]) { * Though we can expect that that at least some (but maybe all) write * system calls can be merged. */ - if (options.writeback) - assert(write_cnt < WRITE_SYSCALLS); - else - assert(write_cnt == WRITE_SYSCALLS); + if (options.writeback) + assert(write_cnt < WRITE_SYSCALLS); + else + assert(write_cnt == WRITE_SYSCALLS); - fuse_remove_signal_handlers(se); - fuse_session_destroy(se); + fuse_remove_signal_handlers(se); + fuse_session_destroy(se); - printf("Test completed successfully.\n"); - return 0; + printf("Test completed successfully.\n"); + return 0; } - /** * Local Variables: * mode: c diff --git a/test/util.py b/test/util.py index a421e7213..125fd50fe 100644 --- a/test/util.py +++ b/test/util.py @@ -50,6 +50,8 @@ def wait_for_mount(mount_process, mnt_dir, if test_fn(mnt_dir): return True if mount_process.poll() is not None: + if test_fn(mnt_dir): + return True pytest.fail('file system process terminated prematurely') time.sleep(0.1) elapsed += 0.1 diff --git a/util/fusermount.c b/util/fusermount.c index dd241a050..f17b44f51 100644 --- a/util/fusermount.c +++ b/util/fusermount.c @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ /* This program does the mounting and unmounting of FUSE filesystems */ @@ -36,11 +36,18 @@ #include #include -#ifdef HAVE_CLOSE_RANGE +#if defined HAVE_CLOSE_RANGE && defined linux #include #endif +#if defined HAVE_LISTMOUNT +#include +#include +#include +#endif + #define FUSE_COMMFD_ENV "_FUSE_COMMFD" +#define FUSE_KERN_DEVICE_ENV "FUSE_KERN_DEVICE" #define FUSE_DEV "/dev/fuse" @@ -565,7 +572,7 @@ static int unmount_fuse(const char *mnt, int quiet, int lazy) return res; } -static int count_fuse_fs(void) +static int count_fuse_fs_mtab(void) { struct mntent *entp; int count = 0; @@ -585,6 +592,72 @@ static int count_fuse_fs(void) return count; } +#ifdef HAVE_LISTMOUNT +static int count_fuse_fs_ls_mnt(void) +{ + #define SMBUF_SIZE 1024 + #define MNT_ID_LEN 128 + + int fuse_count = 0; + int n_mounts = 0; + int ret = 0; + uint64_t mnt_ids[MNT_ID_LEN]; + unsigned char smbuf[SMBUF_SIZE]; + struct mnt_id_req req = { + .size = sizeof(struct mnt_id_req), + }; + struct statmount *sm; + + for (;;) { + req.mnt_id = LSMT_ROOT; + + n_mounts = syscall(SYS_listmount, &req, &mnt_ids, MNT_ID_LEN, 0); + if (n_mounts == -1) { + if (errno != ENOSYS) { + fprintf(stderr, "%s: failed to list mounts: %s\n", progname, + strerror(errno)); + } + return -1; + } + + for (int i = 0; i < n_mounts; i++) { + req.mnt_id = mnt_ids[i]; + req.param = STATMOUNT_FS_TYPE; + ret = syscall(SYS_statmount, &req, &smbuf, SMBUF_SIZE, 0); + if (ret) { + if (errno == ENOENT) + continue; + + fprintf(stderr, "%s: failed to stat mount %lld: %s\n", progname, + req.mnt_id, strerror(errno)); + return -1; + } + + sm = (struct statmount *)smbuf; + if (sm->mask & STATMOUNT_FS_TYPE && + strcmp(&sm->str[sm->fs_type], "fuse") == 0) + fuse_count++; + } + + if (n_mounts < MNT_ID_LEN) + break; + req.param = mnt_ids[MNT_ID_LEN - 1]; + } + return fuse_count; +} + +static int count_fuse_fs(void) +{ + int count = count_fuse_fs_ls_mnt(); + + return count >= 0 ? count : count_fuse_fs_mtab(); +} +#else +static int count_fuse_fs(void) +{ + return count_fuse_fs_mtab(); +} +#endif #else /* IGNORE_MTAB */ static int count_fuse_fs(void) @@ -1146,6 +1219,7 @@ static int check_perm(const char **mntp, struct stat *stbuf, int *mountpoint_fd) 0x73717368 /* SQUASHFS_MAGIC */, 0x01021994 /* TMPFS_MAGIC */, 0x24051905 /* UBIFS_SUPER_MAGIC */, + 0x18031977 /* WEKAFS_SUPER_MAGIC */, #if __SIZEOF_LONG__ > 4 0x736675005346544e /* UFSD */, #endif @@ -1163,56 +1237,30 @@ static int check_perm(const char **mntp, struct stat *stbuf, int *mountpoint_fd) return -1; } -static int try_open(const char *dev, char **devp, int silent) -{ - int fd = open(dev, O_RDWR); - if (fd != -1) { - *devp = strdup(dev); - if (*devp == NULL) { - fprintf(stderr, "%s: failed to allocate memory\n", - progname); - close(fd); - fd = -1; - } - } else if (errno == ENODEV || - errno == ENOENT)/* check for ENOENT too, for the udev case */ - return -2; - else if (!silent) { - fprintf(stderr, "%s: failed to open %s: %s\n", progname, dev, - strerror(errno)); - } - return fd; -} - -static int try_open_fuse_device(char **devp) +static int open_fuse_device(const char *dev) { int fd; drop_privs(); - fd = try_open(FUSE_DEV, devp, 0); + fd = open(dev, O_RDWR); + if (fd == -1) { + if (errno == ENODEV || errno == ENOENT)/* check for ENOENT too, for the udev case */ + fprintf(stderr, + "%s: fuse device %s not found. Kernel module not loaded?\n", + progname, dev); + else + fprintf(stderr, + "%s: failed to open %s: %s\n", progname, dev, strerror(errno)); + } restore_privs(); return fd; } -static int open_fuse_device(char **devp) -{ - int fd = try_open_fuse_device(devp); - if (fd >= -1) - return fd; - - fprintf(stderr, - "%s: fuse device not found, try 'modprobe fuse' first\n", - progname); - - return -1; -} - - static int mount_fuse(const char *mnt, const char *opts, const char **type) { int res; int fd; - char *dev; + const char *dev = getenv(FUSE_KERN_DEVICE_ENV) ?: FUSE_DEV; struct stat stbuf; char *source = NULL; char *mnt_opts = NULL; @@ -1221,7 +1269,7 @@ static int mount_fuse(const char *mnt, const char *opts, const char **type) char *do_mount_opts = NULL; char *x_opts = NULL; - fd = open_fuse_device(&dev); + fd = open_fuse_device(dev); if (fd == -1) return -1; @@ -1292,7 +1340,6 @@ static int mount_fuse(const char *mnt, const char *opts, const char **type) out_free: free(source); free(mnt_opts); - free(dev); free(x_opts); free(do_mount_opts); @@ -1534,14 +1581,15 @@ int main(int argc, char *argv[]) static const struct option long_opts[] = { {"unmount", no_argument, NULL, 'u'}, - // Note: auto-unmount deliberately does not have a short version. - // It's meant for internal use by mount.c's setup_auto_unmount. - {"auto-unmount", no_argument, NULL, 'U'}, {"lazy", no_argument, NULL, 'z'}, {"quiet", no_argument, NULL, 'q'}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {"options", required_argument, NULL, 'o'}, + // Note: auto-unmount and comm-fd don't have short versions. + // They'ne meant for internal use by mount.c + {"auto-unmount", no_argument, NULL, 'U'}, + {"comm-fd", required_argument, NULL, 'c'}, {0, 0, 0, 0}}; progname = strdup(argc > 0 ? argv[0] : "fusermount"); @@ -1573,6 +1621,9 @@ int main(int argc, char *argv[]) auto_unmount = 1; setup_auto_unmount_only = 1; break; + case 'c': + commfd = optarg; + break; case 'z': lazy = 1; break; @@ -1619,7 +1670,8 @@ int main(int argc, char *argv[]) if (!setup_auto_unmount_only && unmount) goto do_unmount; - commfd = getenv(FUSE_COMMFD_ENV); + if(commfd == NULL) + commfd = getenv(FUSE_COMMFD_ENV); if (commfd == NULL) { fprintf(stderr, "%s: old style mounting not supported\n", progname); diff --git a/util/mount.fuse.c b/util/mount.fuse.c index b98fb2a65..f1a90fe8a 100644 --- a/util/mount.fuse.c +++ b/util/mount.fuse.c @@ -3,7 +3,7 @@ Copyright (C) 2001-2007 Miklos Szeredi This program can be distributed under the terms of the GNU GPLv2. - See the file COPYING. + See the file GPL2.txt. */ #include "fuse_config.h"