From 21cba06befe98abe99c07b5827ca364836bcd17d Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Mon, 21 Oct 2024 13:50:13 +1100 Subject: [PATCH 01/33] config: fix dequeue_signal check for kernels <4.20 Before 4.20, kernel_siginfo_t was just called siginfo_t. This was causing the kthread_dequeue_signal_3arg_task check, which uses kernel_siginfo_t, to fail on older kernels. In d6b8c17f1, we started checking for the "new" three-arg dequeue_signal() by testing for the "old" version. Because that test is explicitly using kernel_siginfo_t, it would fail, leading to the build trying to use the new three-arg version, which would then not compile. This commit fixes that by avoiding checking for the old 3-arg dequeue_signal entirely. Instead, we check for the new one, as well as the 4-arg form, and we use the old form as a fallback. This way, we never have to test for it explicitly, and once we're building HAVE_SIGINFO will make sure we get the right kernel_siginfo_t for it, so everything works out nice. Original-patch-by: Finix Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Closes #16666 --- config/kernel-kthread.m4 | 49 ++++++++++++++++++-------------- module/os/linux/spl/spl-thread.c | 6 ++-- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/config/kernel-kthread.m4 b/config/kernel-kthread.m4 index 4d580efead6b..607953146323 100644 --- a/config/kernel-kthread.m4 +++ b/config/kernel-kthread.m4 @@ -17,14 +17,21 @@ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT], [ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL], [ dnl # - dnl # 5.17 API: enum pid_type * as new 4th dequeue_signal() argument, - dnl # 5768d8906bc23d512b1a736c1e198aa833a6daa4 ("signal: Requeue signals in the appropriate queue") + dnl # prehistory: + dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, + dnl # siginfo_t *info) dnl # - dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info); - dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); + dnl # 4.20: kernel_siginfo_t introduced, replaces siginfo_t + dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, + dnl kernel_siginfo_t *info) dnl # - dnl # 6.12 API: first arg struct_task* removed - dnl # int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); + dnl # 5.17: enum pid_type introduced as 4th arg + dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, + dnl # kernel_siginfo_t *info, enum pid_type *type) + dnl # + dnl # 6.12: first arg struct_task* removed + dnl # int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, + dnl # enum pid_type *type) dnl # AC_MSG_CHECKING([whether dequeue_signal() takes 4 arguments]) ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal_4arg], [ @@ -33,11 +40,11 @@ AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL], [ [dequeue_signal() takes 4 arguments]) ], [ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether dequeue_signal() a task argument]) - ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal_3arg_task], [ + AC_MSG_CHECKING([whether 3-arg dequeue_signal() takes a type argument]) + ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal_3arg_type], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DEQUEUE_SIGNAL_3ARG_TASK, 1, - [dequeue_signal() takes a task argument]) + AC_DEFINE(HAVE_DEQUEUE_SIGNAL_3ARG_TYPE, 1, + [3-arg dequeue_signal() takes a type argument]) ], [ AC_MSG_RESULT(no) ]) @@ -56,17 +63,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT], [ ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL], [ - ZFS_LINUX_TEST_SRC([kthread_dequeue_signal_3arg_task], [ - #include - ], [ - struct task_struct *task = NULL; - sigset_t *mask = NULL; - kernel_siginfo_t *info = NULL; - int error __attribute__ ((unused)); - - error = dequeue_signal(task, mask, info); - ]) - ZFS_LINUX_TEST_SRC([kthread_dequeue_signal_4arg], [ #include ], [ @@ -78,6 +74,17 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL], [ error = dequeue_signal(task, mask, info, type); ]) + + ZFS_LINUX_TEST_SRC([kthread_dequeue_signal_3arg_type], [ + #include + ], [ + sigset_t *mask = NULL; + kernel_siginfo_t *info = NULL; + enum pid_type *type = NULL; + int error __attribute__ ((unused)); + + error = dequeue_signal(mask, info, type); + ]) ]) AC_DEFUN([ZFS_AC_KERNEL_KTHREAD], [ diff --git a/module/os/linux/spl/spl-thread.c b/module/os/linux/spl/spl-thread.c index 7f74d44f91ff..7b0ce30c7884 100644 --- a/module/os/linux/spl/spl-thread.c +++ b/module/os/linux/spl/spl-thread.c @@ -171,11 +171,11 @@ issig(void) #if defined(HAVE_DEQUEUE_SIGNAL_4ARG) enum pid_type __type; if (dequeue_signal(current, &set, &__info, &__type) != 0) { -#elif defined(HAVE_DEQUEUE_SIGNAL_3ARG_TASK) - if (dequeue_signal(current, &set, &__info) != 0) { -#else +#elif defined(HAVE_DEQUEUE_SIGNAL_3ARG_TYPE) enum pid_type __type; if (dequeue_signal(&set, &__info, &__type) != 0) { +#else + if (dequeue_signal(current, &set, &__info) != 0) { #endif spin_unlock_irq(¤t->sighand->siglock); kernel_signal_stop(); From 96f382d1132c42203b4307a3039edeebe918ef70 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Tue, 22 Oct 2024 05:38:30 +1100 Subject: [PATCH 02/33] spl/thread: explicitly define thread_func_t as noreturn All of our thread entry functions have this signature: void (*)(void*) __attribute__((noreturn)) The low-level `__thread_create()` function accepts a `thread_func_t` as the entry point, which is defined more simply as: void (*)(void *) And then the `thread_create()` and `thread_create_named()` macros cast the passed-in function point down to `thread_func_t`, that is, casting away the `noreturn` attribute. Clang considers casting between these two types to be invalid because both the caller and the callee may have elided parts of the stack frame save and restore, knowing that they won't be needed. Recent Linux appears to be setting `-Wcast-function-type-strict`, which causes this invalid cast to emit a warning, which with `-Werror` is converted to an error, breaking the build. This commit fixes this in the simplest possible way: adding `noreturn` to the `thread_func_t` attribute. Since all our thread entry functions already have this attribute, it's arguably a just a consistency fix anyway. I considered removing the casts in the macros, which silences the warnings, but it turns out that Clang has a bug that won't emit this error for implicit conversions, only explicit casts. So leaving them there seems like a reasonable belt-and-suspenders approach. Also, frankly, this whole mechanism seems a little undercooked inside LLVM, so I'm content go with my intuition about the smallest, least invaisve change. **NOTE**: `__thread_create` is exported by `spl.ko` and has a `thread_func_t` arg, so this is an ABI break. Whether that matters in practice, I have no idea. Further reading: - https://github.com/llvm/llvm-project/commit/1aad641c793090b4d036c03e737df2ebe2c32c57 - https://github.com/llvm/llvm-project/issues/7325 - https://github.com/llvm/llvm-project/issues/41465 Sponsored-by: https://despairlabs.com/sponsor/ Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Closes #16672 Closes #16673 --- include/os/linux/spl/sys/thread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/os/linux/spl/sys/thread.h b/include/os/linux/spl/sys/thread.h index 4f7f659e528d..c7ef7efa0a25 100644 --- a/include/os/linux/spl/sys/thread.h +++ b/include/os/linux/spl/sys/thread.h @@ -42,7 +42,7 @@ #define TS_ZOMB EXIT_ZOMBIE #define TS_STOPPED TASK_STOPPED -typedef void (*thread_func_t)(void *); +typedef void (*thread_func_t)(void *) __attribute__((noreturn)); #define thread_create_named(name, stk, stksize, func, arg, len, \ pp, state, pri) \ From 152ae5c9bc2beb69f1b55481f7b3940aeb266c55 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 21 Oct 2024 14:46:20 -0400 Subject: [PATCH 03/33] ZTS: Add additional exceptions The following tests have been observed to occasionally fail when running under the CI. Updated our exceptions list to track them. Signed-off-by: Brian Behlendorf Closes #16670 --- tests/test-runner/bin/zts-report.py.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 2562836213af..07ec2c4b601b 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -213,6 +213,7 @@ maybe = { 'cli_root/zfs_unshare/zfs_unshare_006_pos': ['SKIP', na_reason], 'cli_root/zpool_add/zpool_add_004_pos': ['FAIL', known_reason], 'cli_root/zpool_destroy/zpool_destroy_001_pos': ['SKIP', 6145], + 'cli_root/zpool_import/import_devices_missing': ['FAIL', 16669], 'cli_root/zpool_import/zpool_import_missing_003_pos': ['SKIP', 6839], 'cli_root/zpool_initialize/zpool_initialize_import_export': ['FAIL', 11948], @@ -275,7 +276,8 @@ if sys.platform.startswith('freebsd'): 'pool_checkpoint/checkpoint_big_rewind': ['FAIL', 12622], 'pool_checkpoint/checkpoint_indirect': ['FAIL', 12623], 'resilver/resilver_restart_001': ['FAIL', known_reason], - 'snapshot/snapshot_002_pos': ['FAIL', '14831'], + 'snapshot/snapshot_002_pos': ['FAIL', 14831], + 'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', 16668], 'bclone/bclone_crossfs_corner_cases': ['SKIP', cfr_cross_reason], 'bclone/bclone_crossfs_corner_cases_limited': ['SKIP', cfr_cross_reason], From aefc2da8a594d7a8059c862eab464d5f798393b3 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 23 Oct 2024 13:19:46 -0400 Subject: [PATCH 04/33] Workaround issue of Linux vdev_disk.c, (#16678) in some cases not linearizing buffers with disk sector crossing a page boundary. It is fine for hardware, but somehow required by LUKS. It is not typical for ZFS to produce such buffers, but it may happen if 6KB block is compressed to 4KB, while still having 2KB alignment. Banning the 6KB buffers helps vdevs with ashifh=12. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Reviewed-by: Tony Hutter Reviewed-by: Tino Reichardt --- module/zfs/zio.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index a5daf73d59ba..3c7305e0e724 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -187,6 +187,20 @@ zio_init(void) continue; #endif +#if defined(__linux__) && defined(_KERNEL) + /* + * Workaround issue of Linux vdev_disk.c, in some cases not + * linearizing buffers with disk sector crossing a page + * boundary. It is fine for hardware, but somehow required by + * LUKS. It is not typical for ZFS to produce such buffers, but + * it may happen if 6KB block is compressed to 4KB, while still + * having 2KB alignment. Banning the 6KB buffers helps vdevs + * with ashifh=12. + */ + if (size > PAGESIZE && !IS_P2ALIGNED(size, PAGESIZE)) + continue; +#endif + if (IS_P2ALIGNED(size, PAGESIZE)) align = PAGESIZE; else From 2e4e0928220f1a67d0412f8cbcc4b426ddfb58f2 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Thu, 24 Oct 2024 23:05:45 +0200 Subject: [PATCH 05/33] Fix dependency install on Debian 11 (#16683) Adding cryptsetup breaks some dialog things on Debian 11. Apply some workaround for it. Signed-off-by: Tino Reichardt Reviewed-by: George Melikov Reviewed-by: Tony Hutter --- .github/workflows/scripts/qemu-3-deps.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/scripts/qemu-3-deps.sh b/.github/workflows/scripts/qemu-3-deps.sh index a2fb5e38249a..4c6227b88ed0 100755 --- a/.github/workflows/scripts/qemu-3-deps.sh +++ b/.github/workflows/scripts/qemu-3-deps.sh @@ -111,6 +111,7 @@ case "$1" in archlinux ;; debian*) + echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections debian echo "##[group]Install Debian specific" sudo apt-get install -yq linux-perf dh-sequence-dkms From 94a03dd1e49a1bfa464944b624cd718076a8c0ee Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 25 Oct 2024 12:03:37 -0400 Subject: [PATCH 06/33] Pack dmu_buf_impl_t by 16 bytes On 64bit FreeBSD this reduces one from 296 to 280 bytes. On small block workloads dbufs may consume gigabytes of ARC, and this saves 5% of it. Reviewed-by: Tino Reichardt Reviewed-by: Brian Atkinson Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #16684 --- include/sys/dbuf.h | 53 +++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 56741cd2a58b..c9bfb9a8026c 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -264,6 +264,27 @@ typedef struct dmu_buf_impl { */ uint8_t db_level; + /* This block was freed while a read or write was active. */ + uint8_t db_freed_in_flight; + + /* + * Evict user data as soon as the dirty and reference counts are equal. + */ + uint8_t db_user_immediate_evict; + + /* + * dnode_evict_dbufs() or dnode_evict_bonus() tried to evict this dbuf, + * but couldn't due to outstanding references. Evict once the refcount + * drops to 0. + */ + uint8_t db_pending_evict; + + /* Number of TXGs in which this buffer is dirty. */ + uint8_t db_dirtycnt; + + /* The buffer was partially read. More reads may follow. */ + uint8_t db_partial_read; + /* * Protects db_buf's contents if they contain an indirect block or data * block of the meta-dnode. We use this lock to protect the structure of @@ -288,6 +309,9 @@ typedef struct dmu_buf_impl { */ dbuf_states_t db_state; + /* In which dbuf cache this dbuf is, if any. */ + dbuf_cached_state_t db_caching_status; + /* * Refcount accessed by dmu_buf_{hold,rele}. * If nonzero, the buffer can't be destroyed. @@ -304,39 +328,10 @@ typedef struct dmu_buf_impl { /* Link in dbuf_cache or dbuf_metadata_cache */ multilist_node_t db_cache_link; - /* Tells us which dbuf cache this dbuf is in, if any */ - dbuf_cached_state_t db_caching_status; - uint64_t db_hash; - /* Data which is unique to data (leaf) blocks: */ - /* User callback information. */ dmu_buf_user_t *db_user; - - /* - * Evict user data as soon as the dirty and reference - * counts are equal. - */ - uint8_t db_user_immediate_evict; - - /* - * This block was freed while a read or write was - * active. - */ - uint8_t db_freed_in_flight; - - /* - * dnode_evict_dbufs() or dnode_evict_bonus() tried to - * evict this dbuf, but couldn't due to outstanding - * references. Evict once the refcount drops to 0. - */ - uint8_t db_pending_evict; - - uint8_t db_dirtycnt; - - /* The buffer was partially read. More reads may follow. */ - uint8_t db_partial_read; } dmu_buf_impl_t; #define DBUF_HASH_MUTEX(h, idx) \ From e5d1f6816758f3ab1939608a9a45c18ec36bef41 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 25 Oct 2024 09:07:44 -0700 Subject: [PATCH 07/33] ZTS: Add LUKS sanity test Add a LUKS sanity test to trigger: #16631 Reviewed-by: Tino Reichardt Reviewed-by: Rob Norris Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Tony Hutter Closes #16681 --- .github/workflows/scripts/qemu-3-deps.sh | 35 ++++---- tests/runfiles/linux.run | 6 ++ tests/zfs-tests/include/commands.cfg | 1 + tests/zfs-tests/tests/Makefile.am | 3 +- .../tests/functional/luks/luks_sanity.ksh | 90 +++++++++++++++++++ 5 files changed, 117 insertions(+), 18 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/luks/luks_sanity.ksh diff --git a/.github/workflows/scripts/qemu-3-deps.sh b/.github/workflows/scripts/qemu-3-deps.sh index 4c6227b88ed0..bf06c1892ceb 100755 --- a/.github/workflows/scripts/qemu-3-deps.sh +++ b/.github/workflows/scripts/qemu-3-deps.sh @@ -13,10 +13,10 @@ function archlinux() { echo "##[endgroup]" echo "##[group]Install Development Tools" - sudo pacman -Sy --noconfirm base-devel bc cpio dhclient dkms fakeroot \ - fio gdb inetutils jq less linux linux-headers lsscsi nfs-utils parted \ - pax perf python-packaging python-setuptools qemu-guest-agent ksh samba \ - sysstat rng-tools rsync wget xxhash + sudo pacman -Sy --noconfirm base-devel bc cpio cryptsetup dhclient dkms \ + fakeroot fio gdb inetutils jq less linux linux-headers lsscsi nfs-utils \ + parted pax perf python-packaging python-setuptools qemu-guest-agent ksh \ + samba sysstat rng-tools rsync wget xxhash echo "##[endgroup]" } @@ -30,11 +30,11 @@ function debian() { echo "##[group]Install Development Tools" sudo apt-get install -y \ - acl alien attr autoconf bc cpio curl dbench dh-python dkms fakeroot \ - fio gdb gdebi git ksh lcov isc-dhcp-client jq libacl1-dev libaio-dev \ - libattr1-dev libblkid-dev libcurl4-openssl-dev libdevmapper-dev libelf-dev \ - libffi-dev libmount-dev libpam0g-dev libselinux-dev libssl-dev libtool \ - libtool-bin libudev-dev libunwind-dev linux-headers-$(uname -r) \ + acl alien attr autoconf bc cpio cryptsetup curl dbench dh-python dkms \ + fakeroot fio gdb gdebi git ksh lcov isc-dhcp-client jq libacl1-dev \ + libaio-dev libattr1-dev libblkid-dev libcurl4-openssl-dev libdevmapper-dev \ + libelf-dev libffi-dev libmount-dev libpam0g-dev libselinux-dev libssl-dev \ + libtool libtool-bin libudev-dev libunwind-dev linux-headers-$(uname -r) \ lsscsi nfs-kernel-server pamtester parted python3 python3-all-dev \ python3-cffi python3-dev python3-distlib python3-packaging \ python3-setuptools python3-sphinx qemu-guest-agent rng-tools rpm2cpio \ @@ -68,14 +68,15 @@ function rhel() { echo "##[group]Install Development Tools" sudo dnf group install -y "Development Tools" sudo dnf install -y \ - acl attr bc bzip2 curl dbench dkms elfutils-libelf-devel fio gdb git \ - jq kernel-rpm-macros ksh libacl-devel libaio-devel libargon2-devel \ - libattr-devel libblkid-devel libcurl-devel libffi-devel ncompress \ - libselinux-devel libtirpc-devel libtool libudev-devel libuuid-devel \ - lsscsi mdadm nfs-utils openssl-devel pam-devel pamtester parted perf \ - python3 python3-cffi python3-devel python3-packaging kernel-devel \ - python3-setuptools qemu-guest-agent rng-tools rpcgen rpm-build rsync \ - samba sysstat systemd watchdog wget xfsprogs-devel xxhash zlib-devel + acl attr bc bzip2 cryptsetup curl dbench dkms elfutils-libelf-devel fio \ + gdb git jq kernel-rpm-macros ksh libacl-devel libaio-devel \ + libargon2-devel libattr-devel libblkid-devel libcurl-devel libffi-devel \ + ncompress libselinux-devel libtirpc-devel libtool libudev-devel \ + libuuid-devel lsscsi mdadm nfs-utils openssl-devel pam-devel pamtester \ + parted perf python3 python3-cffi python3-devel python3-packaging \ + kernel-devel python3-setuptools qemu-guest-agent rng-tools rpcgen \ + rpm-build rsync samba sysstat systemd watchdog wget xfsprogs-devel xxhash \ + zlib-devel echo "##[endgroup]" } diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 5534cd27f637..76d07a6cc9c1 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -147,6 +147,12 @@ tags = ['functional', 'largest_pool'] tests = ['longname_001_pos', 'longname_002_pos', 'longname_003_pos'] tags = ['functional', 'longname'] +[tests/functional/luks:Linux] +pre = +post = +tests = ['luks_sanity'] +tags = ['functional', 'luks'] + [tests/functional/mmap:Linux] tests = ['mmap_libaio_001_pos', 'mmap_sync_001_pos'] tags = ['functional', 'mmap'] diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index be41ce5210e8..5985b5fe1526 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -129,6 +129,7 @@ export SYSTEM_FILES_LINUX='attr blkdiscard blockdev chattr + cryptsetup exportfs fallocate flock diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index bc767b9f624f..7d1551a63f0d 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -80,7 +80,8 @@ if BUILD_LINUX nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/simd/simd_supported.ksh \ functional/tmpfile/cleanup.ksh \ - functional/tmpfile/setup.ksh + functional/tmpfile/setup.ksh \ + functional/luks/luks_sanity.ksh endif nobase_dist_datadir_zfs_tests_tests_DATA += \ diff --git a/tests/zfs-tests/tests/functional/luks/luks_sanity.ksh b/tests/zfs-tests/tests/functional/luks/luks_sanity.ksh new file mode 100755 index 000000000000..9cee26503de7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/luks/luks_sanity.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. +# Use is subject to license terms. +# + +# DESCRIPTION: +# Verify ZFS works on a LUKS-backed pool +# +# STRATEGY: +# 1. Create a LUKS device +# 2. Make a pool with it +# 3. Write files to the pool +# 4. Verify no errors + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +VDEV=$(mktemp --suffix=luks_sanity) +TESTPOOL=testpool + +function cleanup +{ + log_must zpool destroy $TESTPOOL + + log_must cryptsetup luksClose /dev/mapper/luksdev + log_must rm -f $VDEV +} + +log_assert "Verify ZFS on LUKS works" +log_onexit cleanup + +PASS="fdsjfosdijfsdkjsldfjdlk" + +# Make a small LUKS device since LUKS formatting takes time and we want to +# make this test run as quickly as possible. +truncate -s 100M $VDEV + +log_must cryptsetup luksFormat --type luks2 $VDEV <<< $PASS +log_must cryptsetup luksOpen $VDEV luksdev <<< $PASS + +log_must zpool create $TESTPOOL /dev/mapper/luksdev + +CPUS=$(get_num_cpus) + +# Use these specific size and offset ranges as they often cause errors with +# https://github.com/openzfs/zfs/issues/16631 +# and we want to try to test for that. +for SIZE in {70..100} ; do + for OFF in {70..100} ; do + for i in {1..$CPUS} ; do + dd if=/dev/urandom of=/$TESTPOOL/file$i-bs$SIZE-off$OFF \ + seek=$OFF bs=$SIZE count=1 &>/dev/null & + done + wait + done + sync_pool $TESTPOOL + rm -f /$TESTPOOL/file* +done + +# Verify no read/write/checksum errors. Don't use JSON here so that we could +# could potentially backport this test case to the 2.2.x branch. +if zpool status -e | grep -q "luksdev" ; then + log_note "$(zpool status -v)" + log_fail "Saw errors writing to LUKS device" +fi + +log_pass "Verified ZFS on LUKS works" From c480e06d88e07fe5b8aaa920cc966498bb321a8d Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 29 Oct 2024 19:49:54 +0100 Subject: [PATCH 08/33] Fix gcc unused value warning in FreeBSD simd.h The macros `simd_stat_init()` and `simd_stat_fini()` in FreeBSD's `simd.h` are defined as zero, but they are actually only used as statements. Replace the definitions with `do {} while (0)` instead, to avoid gcc `-Wunused-value` warnings. Reviewed-by: Tino Reichardt Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: Toomas Soome Signed-off-by: Dimitry Andric Closes #16693 --- include/os/freebsd/spl/sys/simd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/os/freebsd/spl/sys/simd.h b/include/os/freebsd/spl/sys/simd.h index 6bc46755c4e3..d16e1db5e826 100644 --- a/include/os/freebsd/spl/sys/simd.h +++ b/include/os/freebsd/spl/sys/simd.h @@ -50,7 +50,7 @@ #define kfpu_fini() do {} while (0) #endif -#define simd_stat_init() 0 -#define simd_stat_fini() 0 +#define simd_stat_init() do {} while (0) +#define simd_stat_fini() do {} while (0) #endif From 2bf15202110767b6adbacc02d7e9dae1c305d944 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 29 Oct 2024 20:05:02 +0100 Subject: [PATCH 09/33] Fix gcc uninitialized warning in FreeBSD zio_crypt.c In FreeBSD's `zio_do_crypt_data()`, ensure that two `struct uio` variables are cleared before copying data out of them. This avoids accessing garbage data, and fixes gcc `-Wuninitialized` warnings. Reviewed-by: Brian Behlendorf Reviewed-by: Toomas Soome Reviewed-by: Alexander Motin Signed-off-by: Dimitry Andric Closes #16688 --- module/os/freebsd/zfs/zio_crypt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/module/os/freebsd/zfs/zio_crypt.c b/module/os/freebsd/zfs/zio_crypt.c index 2b62abcccb78..feaca93fb933 100644 --- a/module/os/freebsd/zfs/zio_crypt.c +++ b/module/os/freebsd/zfs/zio_crypt.c @@ -1686,11 +1686,10 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, freebsd_crypt_session_t *tmpl = NULL; uint8_t *authbuf = NULL; - + memset(&puio_s, 0, sizeof (puio_s)); + memset(&cuio_s, 0, sizeof (cuio_s)); zfs_uio_init(&puio, &puio_s); zfs_uio_init(&cuio, &cuio_s); - memset(GET_UIO_STRUCT(&puio), 0, sizeof (struct uio)); - memset(GET_UIO_STRUCT(&cuio), 0, sizeof (struct uio)); #ifdef FCRYPTO_DEBUG printf("%s(%s, %p, %p, %d, %p, %p, %u, %s, %p, %p, %p)\n", From 6187b194349c5a728c9df8c6842f1866d5c9782a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 29 Oct 2024 15:23:24 -0400 Subject: [PATCH 10/33] On the first vdev open ignore impossible ashift hints If on the first open device's logical ashift is bigger than set by pool's ashift property, ignore the last as unusable instead of creating vdev that will fail most of I/Os due to misalignment. Reviewed-by: Rob Norris Reviewed-by: Brian Behlendorf Reviewed-by: Ameer Hamza Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #16690 --- module/zfs/vdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index bcab46c63bfa..983f444d79b0 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2205,10 +2205,11 @@ vdev_open(vdev_t *vd) vd->vdev_max_asize = max_asize; /* - * If the vdev_ashift was not overridden at creation time, + * If the vdev_ashift was not overridden at creation time + * (0) or the override value is impossible for the device, * then set it the logical ashift and optimize the ashift. */ - if (vd->vdev_ashift == 0) { + if (vd->vdev_ashift < vd->vdev_logical_ashift) { vd->vdev_ashift = vd->vdev_logical_ashift; if (vd->vdev_logical_ashift > ASHIFT_MAX) { From ae93aeb84941760d3fcc14d55fb246e57b97fee2 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Wed, 30 Oct 2024 17:11:40 -0700 Subject: [PATCH 11/33] Add warning for external consumers of dmu_tx_callback_register While reading some code @grwilson came across the above function that seemingly had no consumers besides a ztest callback that ensures that the tx_callback infrastructure works correctly. It turns out that Lustre is the main (and potentially the only) consumer of this. Refer to `osd_trans_commit_cb` of `lustre/osd-zfs/osd_handler.c` in the Lustre repo for more info. Let's add a comment highlighting this before someone removes it by mistake. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Reviewed-by: Alexander Motin Signed-off-by: Serapheim Dimitropoulos Closes #16698 --- module/zfs/dmu_tx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 3fdcebdff918..6aee7afb6954 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -1377,6 +1377,13 @@ dmu_tx_pool(dmu_tx_t *tx) return (tx->tx_pool); } +/* + * Register a callback to be executed at the end of a TXG. + * + * Note: This currently exists for outside consumers, specifically the ZFS OSD + * for Lustre. Please do not remove before checking that project. For examples + * on how to use this see `ztest_commit_callback`. + */ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) { From 63bafe60ec741c269d29e26b192a8a5c4f6acf92 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Fri, 25 Oct 2024 16:14:37 +1100 Subject: [PATCH 12/33] vdev_disk: try harder to ensure IO alignment rules It seems out our notion of "properly" aligned IO was incomplete. In particular, dm-crypt does its own splitting, and assumes that a logical block will never cross an order-0 page boundary (ie, the physical page size, not compound size). This effectively means that it needs to be possible to split a BIO at any page or block size boundary and have it work correctly. This updates the alignment check function to enforce these rules (to the extent possible). Our response to misaligned data is to make some new allocation that is properly aligned, and copy the data into it. It turns out that linearising (via abd_borrow_buf()) is not enough, because we allocate eg 4K blocks from a general purpose slab, and so may receive (or already have) a 4K block that crosses pages. So instead, we allocate a new ABD, which is guaranteed to be aligned properly to block sizes, and then copy everything into it, and back out on the way back. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: Tony Hutter Signed-off-by: Rob Norris Closes #16687 #16631 #15646 #15533 #14533 --- module/os/linux/zfs/vdev_disk.c | 122 ++++---- .../functional/vdev_disk/page_alignment.c | 282 +++++++++++------- 2 files changed, 240 insertions(+), 164 deletions(-) diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index a6271d3a7df1..7a4944410906 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -805,14 +805,11 @@ vbio_completion(struct bio *bio) * to the ADB, with changes if appropriate. */ if (vbio->vbio_abd != NULL) { - void *buf = abd_to_buf(vbio->vbio_abd); + if (zio->io_type == ZIO_TYPE_READ) + abd_copy(zio->io_abd, vbio->vbio_abd, zio->io_size); + abd_free(vbio->vbio_abd); vbio->vbio_abd = NULL; - - if (zio->io_type == ZIO_TYPE_READ) - abd_return_buf_copy(zio->io_abd, buf, zio->io_size); - else - abd_return_buf(zio->io_abd, buf, zio->io_size); } /* Final cleanup */ @@ -834,38 +831,61 @@ vbio_completion(struct bio *bio) * NOTE: if you change this function, change the copy in * tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c, and add test * data there to validate the change you're making. - * */ typedef struct { - uint_t bmask; - uint_t npages; - uint_t end; -} vdev_disk_check_pages_t; + size_t blocksize; + int seen_first; + int seen_last; +} vdev_disk_check_alignment_t; static int -vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) +vdev_disk_check_alignment_cb(struct page *page, size_t off, size_t len, + void *priv) { (void) page; - vdev_disk_check_pages_t *s = priv; + vdev_disk_check_alignment_t *s = priv; /* - * If we didn't finish on a block size boundary last time, then there - * would be a gap if we tried to use this ABD as-is, so abort. + * The cardinal rule: a single on-disk block must never cross an + * physical (order-0) page boundary, as the kernel expects to be able + * to split at both LBS and page boundaries. + * + * This implies various alignment rules for the blocks in this + * (possibly compound) page, which we can check for. */ - if (s->end != 0) + + /* + * If the previous page did not end on a page boundary, then we + * can't proceed without creating a hole. + */ + if (s->seen_last) + return (1); + + /* This page must contain only whole LBS-sized blocks. */ + if (!IS_P2ALIGNED(len, s->blocksize)) return (1); /* - * Note if we're taking less than a full block, so we can check it - * above on the next call. + * If this is not the first page in the ABD, then the data must start + * on a page-aligned boundary (so the kernel can split on page + * boundaries without having to deal with a hole). If it is, then + * it can start on LBS-alignment. */ - s->end = (off+len) & s->bmask; + if (s->seen_first) { + if (!IS_P2ALIGNED(off, PAGESIZE)) + return (1); + } else { + if (!IS_P2ALIGNED(off, s->blocksize)) + return (1); + s->seen_first = 1; + } - /* All blocks after the first must start on a block size boundary. */ - if (s->npages != 0 && (off & s->bmask) != 0) - return (1); + /* + * If this data does not end on a page-aligned boundary, then this + * must be the last page in the ABD, for the same reason. + */ + s->seen_last = !IS_P2ALIGNED(off+len, PAGESIZE); - s->npages++; return (0); } @@ -874,15 +894,14 @@ vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) * the number of pages, or 0 if it can't be submitted like this. */ static boolean_t -vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev) +vdev_disk_check_alignment(abd_t *abd, uint64_t size, struct block_device *bdev) { - vdev_disk_check_pages_t s = { - .bmask = bdev_logical_block_size(bdev)-1, - .npages = 0, - .end = 0, + vdev_disk_check_alignment_t s = { + .blocksize = bdev_logical_block_size(bdev), }; - if (abd_iterate_page_func(abd, 0, size, vdev_disk_check_pages_cb, &s)) + if (abd_iterate_page_func(abd, 0, size, + vdev_disk_check_alignment_cb, &s)) return (B_FALSE); return (B_TRUE); @@ -916,37 +935,32 @@ vdev_disk_io_rw(zio_t *zio) /* * Check alignment of the incoming ABD. If any part of it would require - * submitting a page that is not aligned to the logical block size, - * then we take a copy into a linear buffer and submit that instead. - * This should be impossible on a 512b LBS, and fairly rare on 4K, - * usually requiring abnormally-small data blocks (eg gang blocks) - * mixed into the same ABD as larger ones (eg aggregated). + * submitting a page that is not aligned to both the logical block size + * and the page size, then we take a copy into a new memory region with + * correct alignment. This should be impossible on a 512b LBS. On + * larger blocks, this can happen at least when a small number of + * blocks (usually 1) are allocated from a shared slab, or when + * abnormally-small data regions (eg gang headers) are mixed into the + * same ABD as larger allocations (eg aggregations). */ abd_t *abd = zio->io_abd; - if (!vdev_disk_check_pages(abd, zio->io_size, bdev)) { - void *buf; - if (zio->io_type == ZIO_TYPE_READ) - buf = abd_borrow_buf(zio->io_abd, zio->io_size); - else - buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size); + if (!vdev_disk_check_alignment(abd, zio->io_size, bdev)) { + /* Allocate a new memory region with guaranteed alignment */ + abd = abd_alloc_for_io(zio->io_size, + zio->io_abd->abd_flags & ABD_FLAG_META); + + /* If we're writing copy our data into it */ + if (zio->io_type == ZIO_TYPE_WRITE) + abd_copy(abd, zio->io_abd, zio->io_size); /* - * Wrap the copy in an abd_t, so we can use the same iterators - * to count and fill the vbio later. - */ - abd = abd_get_from_buf(buf, zio->io_size); - - /* - * False here would mean the borrowed copy has an invalid - * alignment too, which would mean we've somehow been passed a - * linear ABD with an interior page that has a non-zero offset - * or a size not a multiple of PAGE_SIZE. This is not possible. - * It would mean either zio_buf_alloc() or its underlying - * allocators have done something extremely strange, or our - * math in vdev_disk_check_pages() is wrong. In either case, + * False here would mean the new allocation has an invalid + * alignment too, which would mean that abd_alloc() is not + * guaranteeing this, or our logic in + * vdev_disk_check_alignment() is wrong. In either case, * something in seriously wrong and its not safe to continue. */ - VERIFY(vdev_disk_check_pages(abd, zio->io_size, bdev)); + VERIFY(vdev_disk_check_alignment(abd, zio->io_size, bdev)); } /* Allocate vbio, with a pointer to the borrowed ABD if necessary */ diff --git a/tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c b/tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c index 5c6d28eb2c44..7b926da6c01c 100644 --- a/tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c +++ b/tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c @@ -30,7 +30,7 @@ /* * This tests the vdev_disk page alignment check callback - * vdev_disk_check_pages_cb(). For now, this test includes a copy of that + * vdev_disk_check_alignment_cb(). For now, this test includes a copy of that * function from module/os/linux/zfs/vdev_disk.c. If you change it here, * remember to change it there too, and add tests data here to validate the * change you're making. @@ -38,36 +38,69 @@ struct page; +/* + * This is spl_pagesize() in userspace, which requires linking libspl, but + * would also then use the platform page size, which isn't what we want for + * a test. To keep the check callback the same as the real one, we just + * redefine it. + */ +#undef PAGESIZE +#define PAGESIZE (4096) + typedef struct { - uint32_t bmask; - uint32_t npages; - uint32_t end; -} vdev_disk_check_pages_t; + size_t blocksize; + int seen_first; + int seen_last; +} vdev_disk_check_alignment_t; static int -vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) +vdev_disk_check_alignment_cb(struct page *page, size_t off, size_t len, + void *priv) { (void) page; - vdev_disk_check_pages_t *s = priv; + vdev_disk_check_alignment_t *s = priv; /* - * If we didn't finish on a block size boundary last time, then there - * would be a gap if we tried to use this ABD as-is, so abort. + * The cardinal rule: a single on-disk block must never cross an + * physical (order-0) page boundary, as the kernel expects to be able + * to split at both LBS and page boundaries. + * + * This implies various alignment rules for the blocks in this + * (possibly compound) page, which we can check for. */ - if (s->end != 0) + + /* + * If the previous page did not end on a page boundary, then we + * can't proceed without creating a hole. + */ + if (s->seen_last) + return (1); + + /* This page must contain only whole LBS-sized blocks. */ + if (!IS_P2ALIGNED(len, s->blocksize)) return (1); /* - * Note if we're taking less than a full block, so we can check it - * above on the next call. + * If this is not the first page in the ABD, then the data must start + * on a page-aligned boundary (so the kernel can split on page + * boundaries without having to deal with a hole). If it is, then + * it can start on LBS-alignment. */ - s->end = (off+len) & s->bmask; + if (s->seen_first) { + if (!IS_P2ALIGNED(off, PAGESIZE)) + return (1); + } else { + if (!IS_P2ALIGNED(off, s->blocksize)) + return (1); + s->seen_first = 1; + } - /* All blocks after the first must start on a block size boundary. */ - if (s->npages != 0 && (off & s->bmask) != 0) - return (1); + /* + * If this data does not end on a page-aligned boundary, then this + * must be the last page in the ABD, for the same reason. + */ + s->seen_last = !IS_P2ALIGNED(off+len, PAGESIZE); - s->npages++; return (0); } @@ -75,8 +108,8 @@ typedef struct { /* test name */ const char *name; - /* blocks size mask */ - uint32_t mask; + /* stored block size */ + uint32_t blocksize; /* amount of data to take */ size_t size; @@ -89,39 +122,39 @@ static const page_test_t valid_tests[] = { /* 512B block tests */ { "512B blocks, 4K single page", - 0x1ff, 0x1000, { + 512, 0x1000, { { 0x0, 0x1000 }, }, }, { "512B blocks, 1K at start of page", - 0x1ff, 0x400, { + 512, 0x400, { { 0x0, 0x1000 }, }, }, { "512B blocks, 1K at end of page", - 0x1ff, 0x400, { + 512, 0x400, { { 0x0c00, 0x0400 }, }, }, { "512B blocks, 1K within page, 512B start offset", - 0x1ff, 0x400, { + 512, 0x400, { { 0x0200, 0x0e00 }, }, }, { "512B blocks, 8K across 2x4K pages", - 0x1ff, 0x2000, { + 512, 0x2000, { { 0x0, 0x1000 }, { 0x0, 0x1000 }, }, }, { "512B blocks, 4K across two pages, 2K start offset", - 0x1ff, 0x1000, { + 512, 0x1000, { { 0x0800, 0x0800 }, { 0x0, 0x0800 }, }, }, { "512B blocks, 16K across 5x4K pages, 512B start offset", - 0x1ff, 0x4000, { + 512, 0x4000, { { 0x0200, 0x0e00 }, { 0x0, 0x1000 }, { 0x0, 0x1000 }, @@ -130,7 +163,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, 8x8K compound pages", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0, 0x2000 }, { 0x0, 0x2000 }, { 0x0, 0x2000 }, @@ -142,7 +175,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, 9x8K compound pages, 512B start offset", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0200, 0x1e00 }, { 0x0, 0x2000 }, { 0x0, 0x2000 }, @@ -155,7 +188,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, 2x16K compound pages, 8x4K pages", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0, 0x8000 }, { 0x0, 0x8000 }, { 0x0, 0x1000 }, @@ -169,7 +202,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, mixed 4K/8K/16K pages", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0, 0x1000 }, { 0x0, 0x2000 }, { 0x0, 0x1000 }, @@ -183,7 +216,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0400, 0x0c00 }, { 0x0, 0x1000 }, { 0x0, 0x1000 }, @@ -200,48 +233,18 @@ static const page_test_t valid_tests[] = { /* 4K block tests */ { "4K blocks, 4K single page", - 0xfff, 0x1000, { + 4096, 0x1000, { { 0x0, 0x1000 }, }, - }, { - "4K blocks, 1K at start of page", - 0xfff, 0x400, { - { 0x0, 0x1000 }, - }, - }, { - "4K blocks, 1K at end of page", - 0xfff, 0x400, { - { 0x0c00, 0x0400 }, - }, - }, { - "4K blocks, 1K within page, 512B start offset", - 0xfff, 0x400, { - { 0x0200, 0x0e00 }, - }, }, { "4K blocks, 8K across 2x4K pages", - 0xfff, 0x2000, { + 4096, 0x2000, { { 0x0, 0x1000 }, { 0x0, 0x1000 }, }, - }, { - "4K blocks, 4K across two pages, 2K start offset", - 0xfff, 0x1000, { - { 0x0800, 0x0800 }, - { 0x0, 0x0800 }, - }, - }, { - "4K blocks, 16K across 5x4K pages, 512B start offset", - 0xfff, 0x4000, { - { 0x0200, 0x0e00 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x0200 }, - }, }, { "4K blocks, 64K data, 8x8K compound pages", - 0xfff, 0x10000, { + 4096, 0x10000, { { 0x0, 0x2000 }, { 0x0, 0x2000 }, { 0x0, 0x2000 }, @@ -251,22 +254,9 @@ static const page_test_t valid_tests[] = { { 0x0, 0x2000 }, { 0x0, 0x2000 }, }, - }, { - "4K blocks, 64K data, 9x8K compound pages, 512B start offset", - 0xfff, 0x10000, { - { 0x0200, 0x1e00 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x0200 }, - }, }, { "4K blocks, 64K data, 2x16K compound pages, 8x4K pages", - 0xfff, 0x10000, { + 4096, 0x10000, { { 0x0, 0x8000 }, { 0x0, 0x8000 }, { 0x0, 0x1000 }, @@ -280,7 +270,7 @@ static const page_test_t valid_tests[] = { }, }, { "4K blocks, 64K data, mixed 4K/8K/16K pages", - 0xfff, 0x10000, { + 4096, 0x10000, { { 0x0, 0x1000 }, { 0x0, 0x2000 }, { 0x0, 0x1000 }, @@ -292,9 +282,78 @@ static const page_test_t valid_tests[] = { { 0x0, 0x1000 }, { 0x0, 0x2000 }, }, + }, + + { 0 }, +}; + +static const page_test_t invalid_tests[] = { + /* + * Gang tests. Composed of lots of smaller allocations, rarely properly + * aligned. + */ + { + "512B blocks, 16K data, 512 leader (gang block simulation)", + 512, 0x8000, { + { 0x0, 0x0200 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x0c00 }, + }, + }, { + "4K blocks, 32K data, 2 incompatible spans " + "(gang abd simulation)", + 4096, 0x8000, { + { 0x0800, 0x0800 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x0800 }, + { 0x0800, 0x0800 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x0800 }, + }, + }, + + /* + * Blocks must not span multiple physical pages. These tests used to + * be considered valid, but were since found to be invalid and were + * moved here. + */ + { + "4K blocks, 4K across two pages, 2K start offset", + 4096, 0x1000, { + { 0x0800, 0x0800 }, + { 0x0, 0x0800 }, + }, + }, { + "4K blocks, 16K across 5x4K pages, 512B start offset", + 4096, 0x4000, { + { 0x0200, 0x0e00 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x0200 }, + }, + }, { + "4K blocks, 64K data, 9x8K compound pages, 512B start offset", + 4096, 0x10000, { + { 0x0200, 0x1e00 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x0200 }, + }, }, { "4K blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset", - 0xfff, 0x10000, { + 4096, 0x10000, { { 0x0400, 0x0c00 }, { 0x0, 0x1000 }, { 0x0, 0x1000 }, @@ -308,35 +367,40 @@ static const page_test_t valid_tests[] = { }, }, - { 0 }, -}; - -static const page_test_t invalid_tests[] = { + /* + * This is the very typical case of a 4K block being allocated from + * the middle of a mixed-used slab backed by a higher-order compound + * page. + */ { - "512B blocks, 16K data, 512 leader (gang block simulation)", - 0x1ff, 0x8000, { - { 0x0, 0x0200 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x0c00 }, + "4K blocks, 4K data from compound slab, 2K-align offset", + 4096, 0x1000, { + { 0x1800, 0x6800 } + } + }, + + /* + * Blocks smaller than LBS should never be possible, but used to be by + * accident (see GH#16990). We test for and reject them just to be + * sure. + */ + { + "4K blocks, 1K at end of page", + 4096, 0x400, { + { 0x0c00, 0x0400 }, }, }, { - "4K blocks, 32K data, 2 incompatible spans " - "(gang abd simulation)", - 0xfff, 0x8000, { - { 0x0800, 0x0800 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x0800 }, - { 0x0800, 0x0800 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x0800 }, + "4K blocks, 1K at start of page", + 4096, 0x400, { + { 0x0, 0x1000 }, + }, + }, { + "4K blocks, 1K within page, 512B start offset", + 4096, 0x400, { + { 0x0200, 0x0e00 }, }, }, + { 0 }, }; @@ -345,10 +409,8 @@ run_test(const page_test_t *test, bool verbose) { size_t rem = test->size; - vdev_disk_check_pages_t s = { - .bmask = 0xfff, - .npages = 0, - .end = 0, + vdev_disk_check_alignment_t s = { + .blocksize = test->blocksize, }; for (int i = 0; test->pages[i][1] > 0; i++) { @@ -362,7 +424,7 @@ run_test(const page_test_t *test, bool verbose) "rem %lx, take %lx\n", i, off, len, rem, take); - if (vdev_disk_check_pages_cb(NULL, off, take, &s)) { + if (vdev_disk_check_alignment_cb(NULL, off, take, &s)) { if (verbose) printf(" ABORT: misalignment detected, " "rem %lx\n", rem); @@ -389,7 +451,7 @@ run_test_set(const page_test_t *tests, bool want, int *ntests, int *npassed) for (const page_test_t *test = &tests[0]; test->name; test++) { bool pass = (run_test(test, false) == want); if (pass) { - printf("%s: PASS\n", test->name); + printf("%c %s: PASS\n", want ? '+' : '-', test->name); (*npassed)++; } else { printf("%s: FAIL [expected %s, got %s]\n", test->name, From e7425ae6248db21240049df859630d5044dcd58a Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Fri, 25 Oct 2024 17:28:20 +1100 Subject: [PATCH 13/33] vdev_disk: move abd return and free off the interrupt handler Freeing an ABD can take sleeping locks to update various stats. We aren't allowed to sleep on an interrupt handler. So, move the free off to the io_done callback. We should never have been freeing things in the interrupt handler, but we got away with it because we were usually freeing a linear ABD, which at most is returning two objects to a cache and never sleeping. Scatter ABDs can be used now, and those have more complex locking. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: Tony Hutter Signed-off-by: Rob Norris Closes #16687 --- module/os/linux/zfs/vdev_disk.c | 40 ++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 7a4944410906..6a66a72b91a9 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -801,21 +801,13 @@ vbio_completion(struct bio *bio) bio_put(bio); /* - * If we copied the ABD before issuing it, clean up and return the copy - * to the ADB, with changes if appropriate. + * We're likely in an interrupt context so we can't do ABD/memory work + * here; instead we stash vbio on the zio and take care of it in the + * done callback. */ - if (vbio->vbio_abd != NULL) { - if (zio->io_type == ZIO_TYPE_READ) - abd_copy(zio->io_abd, vbio->vbio_abd, zio->io_size); + ASSERT3P(zio->io_bio, ==, NULL); + zio->io_bio = vbio; - abd_free(vbio->vbio_abd); - vbio->vbio_abd = NULL; - } - - /* Final cleanup */ - kmem_free(vbio, sizeof (vbio_t)); - - /* All done, submit for processing */ zio_delay_interrupt(zio); } @@ -1451,6 +1443,28 @@ vdev_disk_io_start(zio_t *zio) static void vdev_disk_io_done(zio_t *zio) { + /* If this was a read or write, we need to clean up the vbio */ + if (zio->io_bio != NULL) { + vbio_t *vbio = zio->io_bio; + zio->io_bio = NULL; + + /* + * If we copied the ABD before issuing it, clean up and return + * the copy to the ADB, with changes if appropriate. + */ + if (vbio->vbio_abd != NULL) { + if (zio->io_type == ZIO_TYPE_READ) + abd_copy(zio->io_abd, vbio->vbio_abd, + zio->io_size); + + abd_free(vbio->vbio_abd); + vbio->vbio_abd = NULL; + } + + /* Final cleanup */ + kmem_free(vbio, sizeof (vbio_t)); + } + /* * If the device returned EIO, we revalidate the media. If it is * determined the media has changed this triggers the asynchronous From 3c650bec152eab64bc094b7314e866610104db29 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Thu, 24 Oct 2024 11:27:14 +1100 Subject: [PATCH 14/33] Revert "Workaround issue of Linux vdev_disk.c, (#16678)" Now that we can handle these different alignments, we don't this workaround. This reverts commit aefc2da8a594d7a8059c862eab464d5f798393b3. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: Tony Hutter Signed-off-by: Rob Norris Closes #16687 --- module/zfs/zio.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 3c7305e0e724..a5daf73d59ba 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -187,20 +187,6 @@ zio_init(void) continue; #endif -#if defined(__linux__) && defined(_KERNEL) - /* - * Workaround issue of Linux vdev_disk.c, in some cases not - * linearizing buffers with disk sector crossing a page - * boundary. It is fine for hardware, but somehow required by - * LUKS. It is not typical for ZFS to produce such buffers, but - * it may happen if 6KB block is compressed to 4KB, while still - * having 2KB alignment. Banning the 6KB buffers helps vdevs - * with ashifh=12. - */ - if (size > PAGESIZE && !IS_P2ALIGNED(size, PAGESIZE)) - continue; -#endif - if (IS_P2ALIGNED(size, PAGESIZE)) align = PAGESIZE; else From acb6e71eda3c95ee4551bd3796defe7b57ff96f0 Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Thu, 31 Oct 2024 22:32:31 -0400 Subject: [PATCH 15/33] Added output to `zpool online` and `offline` I was surprised to discover today that `zpool online` and `zpool offline` don't print any information about why they failed in many cases, they just return 1 with no information about why. Let's improve that where we can without changing the library function. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Reviewed-by: Alexander Motin Reviewed-by: Allan Jude Signed-off-by: Rich Ercolani Closes #16244 --- cmd/zpool/zpool_main.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index ea180f6b705e..6a45a063d91a 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -7966,8 +7966,11 @@ zpool_do_online(int argc, char **argv) poolname = argv[0]; - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) { + (void) fprintf(stderr, gettext("failed to open pool " + "\"%s\""), poolname); return (1); + } for (i = 1; i < argc; i++) { vdev_state_t oldstate; @@ -7988,12 +7991,15 @@ zpool_do_online(int argc, char **argv) &l2cache, NULL); if (tgt == NULL) { ret = 1; + (void) fprintf(stderr, gettext("couldn't find device " + "\"%s\" in pool \"%s\"\n"), argv[i], poolname); continue; } uint_t vsc; oldstate = ((vdev_stat_t *)fnvlist_lookup_uint64_array(tgt, ZPOOL_CONFIG_VDEV_STATS, &vsc))->vs_state; - if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) { + if ((rc = zpool_vdev_online(zhp, argv[i], flags, + &newstate)) == 0) { if (newstate != VDEV_STATE_HEALTHY) { (void) printf(gettext("warning: device '%s' " "onlined, but remains in faulted state\n"), @@ -8019,6 +8025,9 @@ zpool_do_online(int argc, char **argv) } } } else { + (void) fprintf(stderr, gettext("Failed to online " + "\"%s\" in pool \"%s\": %d\n"), + argv[i], poolname, rc); ret = 1; } } @@ -8103,8 +8112,11 @@ zpool_do_offline(int argc, char **argv) poolname = argv[0]; - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) { + (void) fprintf(stderr, gettext("failed to open pool " + "\"%s\""), poolname); return (1); + } for (i = 1; i < argc; i++) { uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); From 673efbbf5d160aea9a58f9eaac6d781d0975659a Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Sat, 2 Nov 2024 03:08:33 +1100 Subject: [PATCH 16/33] zdb: add extra -T flag to show histograms of BRT refcounts Sponsored-by: https://despairlabs.com/sponsor/ Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Rob Norris Closes #16692 --- cmd/zdb/zdb.c | 29 ++++++++++++++++++++++++----- man/man8/zdb.8 | 4 +++- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 42b82c08249f..46587671202a 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2152,14 +2152,21 @@ dump_brt(spa_t *spa) if (dump_opt['T'] < 3) return; + /* -TTT shows a per-vdev histograms; -TTTT shows all entries */ + boolean_t do_histo = dump_opt['T'] == 3; + char dva[64]; - printf("\n%-16s %-10s\n", "DVA", "REFCNT"); + + if (!do_histo) + printf("\n%-16s %-10s\n", "DVA", "REFCNT"); for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) { brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid]; if (brtvd == NULL || !brtvd->bv_initiated) continue; + uint64_t counts[64] = {}; + zap_cursor_t zc; zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries); @@ -2172,14 +2179,26 @@ dump_brt(spa_t *spa) za->za_integer_length, za->za_num_integers, &refcnt)); - uint64_t offset = *(const uint64_t *)za->za_name; + if (do_histo) + counts[highbit64(refcnt)]++; + else { + uint64_t offset = + *(const uint64_t *)za->za_name; - snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", vdevid, - (u_longlong_t)offset); - printf("%-16s %-10llu\n", dva, (u_longlong_t)refcnt); + snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", + vdevid, (u_longlong_t)offset); + printf("%-16s %-10llu\n", dva, + (u_longlong_t)refcnt); + } } zap_cursor_fini(&zc); zap_attribute_free(za); + + if (do_histo) { + printf("\nBRT: vdev %" PRIu64 + ": DVAs with 2^n refcnts:\n", vdevid); + dump_histogram(counts, 64, 0); + } } } diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index 08f5a3f70040..ae35454ad083 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -14,7 +14,7 @@ .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC. .\" Copyright (c) 2017 Intel Corporation. .\" -.Dd November 18, 2023 +.Dd October 27, 2024 .Dt ZDB 8 .Os . @@ -408,6 +408,8 @@ blocks cloned, the space saving as a result of cloning, and the saving ratio. .It Fl TT Display the per-vdev BRT statistics, including total references. .It Fl TTT +Display histograms of per-vdev BRT refcounts. +.It Fl TTTT Dump the contents of the block reference tables. .It Fl u , -uberblock Display the current uberblock. From db2354534db85dbc7c52f1365f07d82d6376d965 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Fri, 1 Nov 2024 10:02:03 -0700 Subject: [PATCH 17/33] ZTS: Add Fedora 41, remove Fedora 39 Fedora 41 was released 10/29/24, and Fedora 39 will be EOL on 11/12/24. Update Fedora runners in the test suite. Some minor tweaks also needed to support ksh 1.0.10. Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Signed-off-by: Tony Hutter Closes #16700 --- .github/workflows/scripts/qemu-2-start.sh | 12 ++++++------ .github/workflows/scripts/qemu-3-deps.sh | 8 +++++++- .github/workflows/scripts/qemu-4-build.sh | 2 +- .github/workflows/zfs-qemu.yml | 6 +++--- .../cli_root/zpool_add/zpool_add_dryrun_output.ksh | 6 +++--- .../zpool_create/zpool_create_dryrun_output.ksh | 4 ++-- .../zpool_split/zpool_split_dryrun_output.ksh | 6 +++--- 7 files changed, 25 insertions(+), 19 deletions(-) diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 84e13832d10f..39ac92107b71 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -52,16 +52,16 @@ case "$OS" in OSNAME="Debian 12" URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2" ;; - fedora39) - OSNAME="Fedora 39" - OSv="fedora39" - URL="https://download.fedoraproject.org/pub/fedora/linux/releases/39/Cloud/x86_64/images/Fedora-Cloud-Base-39-1.5.x86_64.qcow2" - ;; fedora40) OSNAME="Fedora 40" - OSv="fedora39" + OSv="fedora-unknown" URL="https://download.fedoraproject.org/pub/fedora/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2" ;; + fedora41) + OSNAME="Fedora 41" + OSv="fedora-unknown" + URL="https://download.fedoraproject.org/pub/fedora/linux/releases/41/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-41-1.4.x86_64.qcow2" + ;; freebsd13-3r) OSNAME="FreeBSD 13.3-RELEASE" OSv="freebsd13.0" diff --git a/.github/workflows/scripts/qemu-3-deps.sh b/.github/workflows/scripts/qemu-3-deps.sh index bf06c1892ceb..96979cd02e09 100755 --- a/.github/workflows/scripts/qemu-3-deps.sh +++ b/.github/workflows/scripts/qemu-3-deps.sh @@ -66,7 +66,13 @@ function rhel() { echo "##[endgroup]" echo "##[group]Install Development Tools" - sudo dnf group install -y "Development Tools" + + # Alma wants "Development Tools", Fedora 41 wants "development-tools" + if ! sudo dnf group install -y "Development Tools" ; then + echo "Trying 'development-tools' instead of 'Development Tools'" + sudo dnf group install -y development-tools + fi + sudo dnf install -y \ acl attr bc bzip2 cryptsetup curl dbench dkms elfutils-libelf-devel fio \ gdb git jq kernel-rpm-macros ksh libacl-devel libaio-devel \ diff --git a/.github/workflows/scripts/qemu-4-build.sh b/.github/workflows/scripts/qemu-4-build.sh index 1051ee1f4deb..955f605f5bce 100755 --- a/.github/workflows/scripts/qemu-4-build.sh +++ b/.github/workflows/scripts/qemu-4-build.sh @@ -83,7 +83,7 @@ function rpm_build_and_install() { echo "##[endgroup]" echo "##[group]Install" - run sudo dnf -y --skip-broken localinstall $(ls *.rpm | grep -v src.rpm) + run sudo dnf -y --nobest install $(ls *.rpm | grep -v src.rpm) echo "##[endgroup]" } diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index f819e9938e31..e90030f4c02e 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -22,8 +22,8 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13-4r", "freebsd14-0r", "freebsd14-1s", "ubuntu20", "ubuntu22", "ubuntu24"]' - QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13-3r", "freebsd14-1r", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora40", "fedora41", "freebsd13-4r", "freebsd14-0r", "freebsd14-1s", "ubuntu20", "ubuntu22", "ubuntu24"]' + QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora41", "freebsd13-3r", "freebsd14-1r", "ubuntu24"]' # determine CI type when running on PR ci_type="full" if ${{ github.event_name == 'pull_request' }}; then @@ -46,7 +46,7 @@ jobs: strategy: fail-fast: false matrix: - # rhl: almalinux8, almalinux9, centos-stream9, fedora39, fedora40 + # rhl: almalinux8, almalinux9, centos-stream9, fedora40, fedora41 # debian: debian11, debian12, ubuntu20, ubuntu22, ubuntu24 # misc: archlinux, tumbleweed # FreeBSD Release: freebsd13-3r, freebsd13-4r, freebsd14-0r, freebsd14-1r diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh index fa1ce9c64d33..3b06e8c35c77 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh @@ -148,9 +148,9 @@ done # Foreach test create pool, add -n devices and check output. for (( i=0; i < ${#tests[@]}; i+=1 )); do - typeset tree="${tests[$i].tree}" - typeset add="${tests[$i].add}" - typeset want="${tests[$i].want}" + tree="${tests[$i].tree}" + add="${tests[$i].add}" + want="${tests[$i].want}" log_must eval zpool create "$TESTPOOL" $tree log_must poolexists "$TESTPOOL" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh index 485891c945a3..0671ea618e05 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh @@ -124,8 +124,8 @@ done # Foreach test create pool, add -n devices and check output. for (( i=0; i < ${#tests[@]}; i+=1 )); do - typeset tree="${tests[$i].tree}" - typeset want="${tests[$i].want}" + tree="${tests[$i].tree}" + want="${tests[$i].want}" typeset out="$(log_must eval "zpool create -n '$TESTPOOL' $tree" | \ sed /^SUCCESS/d)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh index 410b1fe7a03e..def1d154387d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh @@ -133,9 +133,9 @@ done # Foreach test create pool, add -n devices and check output. for (( i=0; i < ${#tests[@]}; i+=1 )); do - typeset tree="${tests[$i].tree}" - typeset devs="${tests[$i].devs}" - typeset want="${tests[$i].want}" + tree="${tests[$i].tree}" + devs="${tests[$i].devs}" + want="${tests[$i].want}" log_must eval zpool create "$TESTPOOL" $tree log_must poolexists "$TESTPOOL" From 1c7d4b4c943b6da0f77df25ec572d975f6631a98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 1 Nov 2024 20:12:13 +0100 Subject: [PATCH 18/33] module: unicode: remove unused uconv.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ahelenia Ziemiańska Closes #16702 --- include/sys/u8_textprep.h | 22 - lib/libunicode/Makefile.am | 3 +- module/Kbuild.in | 3 +- module/Makefile.bsd | 3 +- module/unicode/uconv.c | 859 ------------------------------------- 5 files changed, 3 insertions(+), 887 deletions(-) delete mode 100644 module/unicode/uconv.c diff --git a/include/sys/u8_textprep.h b/include/sys/u8_textprep.h index e82037de4fe4..1ec25613a63a 100644 --- a/include/sys/u8_textprep.h +++ b/include/sys/u8_textprep.h @@ -36,28 +36,6 @@ extern "C" { #endif -/* - * Unicode encoding conversion functions and their macros. - */ -#define UCONV_IN_BIG_ENDIAN 0x0001 -#define UCONV_OUT_BIG_ENDIAN 0x0002 -#define UCONV_IN_SYSTEM_ENDIAN 0x0004 -#define UCONV_OUT_SYSTEM_ENDIAN 0x0008 -#define UCONV_IN_LITTLE_ENDIAN 0x0010 -#define UCONV_OUT_LITTLE_ENDIAN 0x0020 -#define UCONV_IGNORE_NULL 0x0040 -#define UCONV_IN_ACCEPT_BOM 0x0080 -#define UCONV_OUT_EMIT_BOM 0x0100 - -extern int uconv_u16tou32(const uint16_t *, size_t *, uint32_t *, size_t *, - int); -extern int uconv_u16tou8(const uint16_t *, size_t *, uchar_t *, size_t *, int); -extern int uconv_u32tou16(const uint32_t *, size_t *, uint16_t *, size_t *, - int); -extern int uconv_u32tou8(const uint32_t *, size_t *, uchar_t *, size_t *, int); -extern int uconv_u8tou16(const uchar_t *, size_t *, uint16_t *, size_t *, int); -extern int uconv_u8tou32(const uchar_t *, size_t *, uint32_t *, size_t *, int); - /* * UTF-8 text preparation functions and their macros. * diff --git a/lib/libunicode/Makefile.am b/lib/libunicode/Makefile.am index 906759471163..e1ac666a5e60 100644 --- a/lib/libunicode/Makefile.am +++ b/lib/libunicode/Makefile.am @@ -3,5 +3,4 @@ libunicode_la_CFLAGS = $(AM_CFLAGS) $(KERNEL_CFLAGS) $(LIBRARY_CFLAGS) noinst_LTLIBRARIES += libunicode.la nodist_libunicode_la_SOURCES = \ - module/unicode/u8_textprep.c \ - module/unicode/uconv.c + module/unicode/u8_textprep.c diff --git a/module/Kbuild.in b/module/Kbuild.in index dcbdbc912f6d..fc14d5cb535e 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -224,8 +224,7 @@ zfs-objs += $(addprefix nvpair/,$(NVPAIR_OBJS)) UNICODE_OBJS := \ - u8_textprep.o \ - uconv.o + u8_textprep.o zfs-objs += $(addprefix unicode/,$(UNICODE_OBJS)) diff --git a/module/Makefile.bsd b/module/Makefile.bsd index 61a664c5bf66..c605069d07d3 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -217,8 +217,7 @@ SRCS+= abd_os.c \ zvol_os.c #unicode -SRCS+= u8_textprep.c \ - uconv.c +SRCS+= u8_textprep.c #zcommon SRCS+= cityhash.c \ diff --git a/module/unicode/uconv.c b/module/unicode/uconv.c deleted file mode 100644 index 4bd19ebdd242..000000000000 --- a/module/unicode/uconv.c +++ /dev/null @@ -1,859 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or https://opensource.org/licenses/CDDL-1.0. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - - -/* - * Unicode encoding conversion functions among UTF-8, UTF-16, and UTF-32. - * (PSARC/2005/446, PSARC/2007/038, PSARC/2007/517) - * Man pages: uconv_u16tou32(9F), uconv_u16tou8(9F), uconv_u32tou16(9F), - * uconv_u32tou8(9F), uconv_u8tou16(9F), and uconv_u8tou32(9F). See also - * the section 3C man pages. - * Interface stability: Committed - */ - -#include -#ifdef _KERNEL -#include -#include -#include -#include -#include -#else -#include -#endif /* _KERNEL */ -#include -#include - - -/* - * The max and min values of high and low surrogate pairs of UTF-16, - * UTF-16 bit shift value, bit mask, and starting value outside of BMP. - */ -#define UCONV_U16_HI_MIN (0xd800U) -#define UCONV_U16_HI_MAX (0xdbffU) -#define UCONV_U16_LO_MIN (0xdc00U) -#define UCONV_U16_LO_MAX (0xdfffU) -#define UCONV_U16_BIT_SHIFT (0x0400U) -#define UCONV_U16_BIT_MASK (0x0fffffU) -#define UCONV_U16_START (0x010000U) - -/* The maximum value of Unicode coding space and ASCII coding space. */ -#define UCONV_UNICODE_MAX (0x10ffffU) -#define UCONV_ASCII_MAX (0x7fU) - -/* The mask values for input and output endians. */ -#define UCONV_IN_ENDIAN_MASKS (UCONV_IN_BIG_ENDIAN | UCONV_IN_LITTLE_ENDIAN) -#define UCONV_OUT_ENDIAN_MASKS (UCONV_OUT_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN) - -/* Native and reversed endian macros. */ -#ifdef _ZFS_BIG_ENDIAN -#define UCONV_IN_NAT_ENDIAN UCONV_IN_BIG_ENDIAN -#define UCONV_IN_REV_ENDIAN UCONV_IN_LITTLE_ENDIAN -#define UCONV_OUT_NAT_ENDIAN UCONV_OUT_BIG_ENDIAN -#define UCONV_OUT_REV_ENDIAN UCONV_OUT_LITTLE_ENDIAN -#else -#define UCONV_IN_NAT_ENDIAN UCONV_IN_LITTLE_ENDIAN -#define UCONV_IN_REV_ENDIAN UCONV_IN_BIG_ENDIAN -#define UCONV_OUT_NAT_ENDIAN UCONV_OUT_LITTLE_ENDIAN -#define UCONV_OUT_REV_ENDIAN UCONV_OUT_BIG_ENDIAN -#endif /* _BIG_ENDIAN */ - -/* The Byte Order Mark (BOM) character in normal and reversed byte orderings. */ -#define UCONV_BOM_NORMAL (0xfeffU) -#define UCONV_BOM_SWAPPED (0xfffeU) -#define UCONV_BOM_SWAPPED_32 (0xfffe0000U) - -/* UTF-32 boundaries based on UTF-8 character byte lengths. */ -#define UCONV_U8_ONE_BYTE (0x7fU) -#define UCONV_U8_TWO_BYTES (0x7ffU) -#define UCONV_U8_THREE_BYTES (0xffffU) -#define UCONV_U8_FOUR_BYTES (0x10ffffU) - -/* The common minimum and maximum values at the UTF-8 character bytes. */ -#define UCONV_U8_BYTE_MIN (0x80U) -#define UCONV_U8_BYTE_MAX (0xbfU) - -/* - * The following "6" and "0x3f" came from "10xx xxxx" bit representation of - * UTF-8 character bytes. - */ -#define UCONV_U8_BIT_SHIFT 6 -#define UCONV_U8_BIT_MASK 0x3f - -/* - * The following vector shows remaining bytes in a UTF-8 character. - * Index will be the first byte of the character. - */ -static const uchar_t remaining_bytes_tbl[0x100] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -/* C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF */ - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - -/* D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - -/* E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - -/* F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF */ - 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* - * The following is a vector of bit-masks to get used bits in - * the first byte of a UTF-8 character. Index is remaining bytes at above of - * the character. - */ -static const uchar_t u8_masks_tbl[6] = { 0x00, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; - -/* - * The following two vectors are to provide valid minimum and - * maximum values for the 2'nd byte of a multibyte UTF-8 character for - * better illegal sequence checking. The index value must be the value of - * the first byte of the UTF-8 character. - */ -static const uchar_t valid_min_2nd_byte[0x100] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - -/* C0 C1 C2 C3 C4 C5 C6 C7 */ - 0, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* C8 C9 CA CB CC CD CE CF */ - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* D0 D1 D2 D3 D4 D5 D6 D7 */ - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* D8 D9 DA DB DC DD DE DF */ - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* E0 E1 E2 E3 E4 E5 E6 E7 */ - 0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* E8 E9 EA EB EC ED EE EF */ - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* F0 F1 F2 F3 F4 F5 F6 F7 */ - 0x90, 0x80, 0x80, 0x80, 0x80, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const uchar_t valid_max_2nd_byte[0x100] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - -/* C0 C1 C2 C3 C4 C5 C6 C7 */ - 0, 0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* C8 C9 CA CB CC CD CE CF */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* D0 D1 D2 D3 D4 D5 D6 D7 */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* D8 D9 DA DB DC DD DE DF */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* E0 E1 E2 E3 E4 E5 E6 E7 */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* E8 E9 EA EB EC ED EE EF */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf, - -/* F0 F1 F2 F3 F4 F5 F6 F7 */ - 0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0 -}; - - -static int -check_endian(int flag, int *in, int *out) -{ - *in = flag & UCONV_IN_ENDIAN_MASKS; - - /* You cannot have both. */ - if (*in == UCONV_IN_ENDIAN_MASKS) - return (EBADF); - - if (*in == 0) - *in = UCONV_IN_NAT_ENDIAN; - - *out = flag & UCONV_OUT_ENDIAN_MASKS; - - /* You cannot have both. */ - if (*out == UCONV_OUT_ENDIAN_MASKS) - return (EBADF); - - if (*out == 0) - *out = UCONV_OUT_NAT_ENDIAN; - - return (0); -} - -static boolean_t -check_bom16(const uint16_t *u16s, size_t u16l, int *in) -{ - if (u16l > 0) { - if (*u16s == UCONV_BOM_NORMAL) { - *in = UCONV_IN_NAT_ENDIAN; - return (B_TRUE); - } - if (*u16s == UCONV_BOM_SWAPPED) { - *in = UCONV_IN_REV_ENDIAN; - return (B_TRUE); - } - } - - return (B_FALSE); -} - -static boolean_t -check_bom32(const uint32_t *u32s, size_t u32l, int *in) -{ - if (u32l > 0) { - if (*u32s == UCONV_BOM_NORMAL) { - *in = UCONV_IN_NAT_ENDIAN; - return (B_TRUE); - } - if (*u32s == UCONV_BOM_SWAPPED_32) { - *in = UCONV_IN_REV_ENDIAN; - return (B_TRUE); - } - } - - return (B_FALSE); -} - -int -uconv_u16tou32(const uint16_t *u16s, size_t *utf16len, - uint32_t *u32s, size_t *utf32len, int flag) -{ - int inendian; - int outendian; - size_t u16l; - size_t u32l; - uint32_t hi; - uint32_t lo; - boolean_t do_not_ignore_null; - - /* - * Do preliminary validity checks on parameters and collect info on - * endians. - */ - if (u16s == NULL || utf16len == NULL) - return (EILSEQ); - - if (u32s == NULL || utf32len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - /* - * Initialize input and output parameter buffer indices and - * temporary variables. - */ - u16l = u32l = 0; - hi = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - /* - * Check on the BOM at the beginning of the input buffer if required - * and if there is indeed one, process it. - */ - if ((flag & UCONV_IN_ACCEPT_BOM) && - check_bom16(u16s, *utf16len, &inendian)) - u16l++; - - /* - * Reset inendian and outendian so that after this point, those can be - * used as condition values. - */ - inendian &= UCONV_IN_NAT_ENDIAN; - outendian &= UCONV_OUT_NAT_ENDIAN; - - /* - * If there is something in the input buffer and if necessary and - * requested, save the BOM at the output buffer. - */ - if (*utf16len > 0 && *utf32len > 0 && (flag & UCONV_OUT_EMIT_BOM)) - u32s[u32l++] = (outendian) ? UCONV_BOM_NORMAL : - UCONV_BOM_SWAPPED_32; - - /* - * Do conversion; if encounter a surrogate pair, assemble high and - * low pair values to form a UTF-32 character. If a half of a pair - * exists alone, then, either it is an illegal (EILSEQ) or - * invalid (EINVAL) value. - */ - for (; u16l < *utf16len; u16l++) { - if (u16s[u16l] == 0 && do_not_ignore_null) - break; - - lo = (uint32_t)((inendian) ? u16s[u16l] : BSWAP_16(u16s[u16l])); - - if (lo >= UCONV_U16_HI_MIN && lo <= UCONV_U16_HI_MAX) { - if (hi) - return (EILSEQ); - hi = lo; - continue; - } else if (lo >= UCONV_U16_LO_MIN && lo <= UCONV_U16_LO_MAX) { - if (! hi) - return (EILSEQ); - lo = (((hi - UCONV_U16_HI_MIN) * UCONV_U16_BIT_SHIFT + - lo - UCONV_U16_LO_MIN) & UCONV_U16_BIT_MASK) - + UCONV_U16_START; - hi = 0; - } else if (hi) { - return (EILSEQ); - } - - if (u32l >= *utf32len) - return (E2BIG); - - u32s[u32l++] = (outendian) ? lo : BSWAP_32(lo); - } - - /* - * If high half didn't see low half, then, it's most likely the input - * parameter is incomplete. - */ - if (hi) - return (EINVAL); - - /* - * Save the number of consumed and saved characters. They do not - * include terminating NULL character (U+0000) at the end of - * the input buffer (even when UCONV_IGNORE_NULL isn't specified and - * the input buffer length is big enough to include the terminating - * NULL character). - */ - *utf16len = u16l; - *utf32len = u32l; - - return (0); -} - -int -uconv_u16tou8(const uint16_t *u16s, size_t *utf16len, - uchar_t *u8s, size_t *utf8len, int flag) -{ - int inendian; - int outendian; - size_t u16l; - size_t u8l; - uint32_t hi; - uint32_t lo; - boolean_t do_not_ignore_null; - - if (u16s == NULL || utf16len == NULL) - return (EILSEQ); - - if (u8s == NULL || utf8len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u16l = u8l = 0; - hi = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - if ((flag & UCONV_IN_ACCEPT_BOM) && - check_bom16(u16s, *utf16len, &inendian)) - u16l++; - - inendian &= UCONV_IN_NAT_ENDIAN; - - for (; u16l < *utf16len; u16l++) { - if (u16s[u16l] == 0 && do_not_ignore_null) - break; - - lo = (uint32_t)((inendian) ? u16s[u16l] : BSWAP_16(u16s[u16l])); - - if (lo >= UCONV_U16_HI_MIN && lo <= UCONV_U16_HI_MAX) { - if (hi) - return (EILSEQ); - hi = lo; - continue; - } else if (lo >= UCONV_U16_LO_MIN && lo <= UCONV_U16_LO_MAX) { - if (! hi) - return (EILSEQ); - lo = (((hi - UCONV_U16_HI_MIN) * UCONV_U16_BIT_SHIFT + - lo - UCONV_U16_LO_MIN) & UCONV_U16_BIT_MASK) - + UCONV_U16_START; - hi = 0; - } else if (hi) { - return (EILSEQ); - } - - /* - * Now we convert a UTF-32 character into a UTF-8 character. - * Unicode coding space is between U+0000 and U+10FFFF; - * anything bigger is an illegal character. - */ - if (lo <= UCONV_U8_ONE_BYTE) { - if (u8l >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)lo; - } else if (lo <= UCONV_U8_TWO_BYTES) { - if ((u8l + 1) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xc0 | ((lo & 0x07c0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x003f)); - } else if (lo <= UCONV_U8_THREE_BYTES) { - if ((u8l + 2) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xe0 | ((lo & 0x0f000) >> 12)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x00fc0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x0003f)); - } else if (lo <= UCONV_U8_FOUR_BYTES) { - if ((u8l + 3) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xf0 | ((lo & 0x01c0000) >> 18)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x003f000) >> 12)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x0000fc0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x000003f)); - } else { - return (EILSEQ); - } - } - - if (hi) - return (EINVAL); - - *utf16len = u16l; - *utf8len = u8l; - - return (0); -} - -int -uconv_u32tou16(const uint32_t *u32s, size_t *utf32len, - uint16_t *u16s, size_t *utf16len, int flag) -{ - int inendian; - int outendian; - size_t u16l; - size_t u32l; - uint32_t hi; - uint32_t lo; - boolean_t do_not_ignore_null; - - if (u32s == NULL || utf32len == NULL) - return (EILSEQ); - - if (u16s == NULL || utf16len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u16l = u32l = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - if ((flag & UCONV_IN_ACCEPT_BOM) && - check_bom32(u32s, *utf32len, &inendian)) - u32l++; - - inendian &= UCONV_IN_NAT_ENDIAN; - outendian &= UCONV_OUT_NAT_ENDIAN; - - if (*utf32len > 0 && *utf16len > 0 && (flag & UCONV_OUT_EMIT_BOM)) - u16s[u16l++] = (outendian) ? UCONV_BOM_NORMAL : - UCONV_BOM_SWAPPED; - - for (; u32l < *utf32len; u32l++) { - if (u32s[u32l] == 0 && do_not_ignore_null) - break; - - hi = (inendian) ? u32s[u32l] : BSWAP_32(u32s[u32l]); - - /* - * Anything bigger than the Unicode coding space, i.e., - * Unicode scalar value bigger than U+10FFFF, is an illegal - * character. - */ - if (hi > UCONV_UNICODE_MAX) - return (EILSEQ); - - /* - * Anything bigger than U+FFFF must be converted into - * a surrogate pair in UTF-16. - */ - if (hi >= UCONV_U16_START) { - lo = ((hi - UCONV_U16_START) % UCONV_U16_BIT_SHIFT) + - UCONV_U16_LO_MIN; - hi = ((hi - UCONV_U16_START) / UCONV_U16_BIT_SHIFT) + - UCONV_U16_HI_MIN; - - if ((u16l + 1) >= *utf16len) - return (E2BIG); - - if (outendian) { - u16s[u16l++] = (uint16_t)hi; - u16s[u16l++] = (uint16_t)lo; - } else { - u16s[u16l++] = BSWAP_16(((uint16_t)hi)); - u16s[u16l++] = BSWAP_16(((uint16_t)lo)); - } - } else { - if (u16l >= *utf16len) - return (E2BIG); - u16s[u16l++] = (outendian) ? (uint16_t)hi : - BSWAP_16(((uint16_t)hi)); - } - } - - *utf16len = u16l; - *utf32len = u32l; - - return (0); -} - -int -uconv_u32tou8(const uint32_t *u32s, size_t *utf32len, - uchar_t *u8s, size_t *utf8len, int flag) -{ - int inendian; - int outendian; - size_t u32l; - size_t u8l; - uint32_t lo; - boolean_t do_not_ignore_null; - - if (u32s == NULL || utf32len == NULL) - return (EILSEQ); - - if (u8s == NULL || utf8len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u32l = u8l = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - if ((flag & UCONV_IN_ACCEPT_BOM) && - check_bom32(u32s, *utf32len, &inendian)) - u32l++; - - inendian &= UCONV_IN_NAT_ENDIAN; - - for (; u32l < *utf32len; u32l++) { - if (u32s[u32l] == 0 && do_not_ignore_null) - break; - - lo = (inendian) ? u32s[u32l] : BSWAP_32(u32s[u32l]); - - if (lo <= UCONV_U8_ONE_BYTE) { - if (u8l >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)lo; - } else if (lo <= UCONV_U8_TWO_BYTES) { - if ((u8l + 1) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xc0 | ((lo & 0x07c0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x003f)); - } else if (lo <= UCONV_U8_THREE_BYTES) { - if ((u8l + 2) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xe0 | ((lo & 0x0f000) >> 12)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x00fc0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x0003f)); - } else if (lo <= UCONV_U8_FOUR_BYTES) { - if ((u8l + 3) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xf0 | ((lo & 0x01c0000) >> 18)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x003f000) >> 12)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x0000fc0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x000003f)); - } else { - return (EILSEQ); - } - } - - *utf32len = u32l; - *utf8len = u8l; - - return (0); -} - -int -uconv_u8tou16(const uchar_t *u8s, size_t *utf8len, - uint16_t *u16s, size_t *utf16len, int flag) -{ - int inendian; - int outendian; - size_t u16l; - size_t u8l; - uint32_t hi; - uint32_t lo; - int remaining_bytes; - int first_b; - boolean_t do_not_ignore_null; - - if (u8s == NULL || utf8len == NULL) - return (EILSEQ); - - if (u16s == NULL || utf16len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u16l = u8l = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - outendian &= UCONV_OUT_NAT_ENDIAN; - - if (*utf8len > 0 && *utf16len > 0 && (flag & UCONV_OUT_EMIT_BOM)) - u16s[u16l++] = (outendian) ? UCONV_BOM_NORMAL : - UCONV_BOM_SWAPPED; - - for (; u8l < *utf8len; ) { - if (u8s[u8l] == 0 && do_not_ignore_null) - break; - - /* - * Collect a UTF-8 character and convert it to a UTF-32 - * character. In doing so, we screen out illegally formed - * UTF-8 characters and treat such as illegal characters. - * The algorithm at below also screens out anything bigger - * than the U+10FFFF. - * - * See Unicode 3.1 UTF-8 Corrigendum and Unicode 3.2 for - * more details on the illegal values of UTF-8 character - * bytes. - */ - hi = (uint32_t)u8s[u8l++]; - - if (hi > UCONV_ASCII_MAX) { - if ((remaining_bytes = remaining_bytes_tbl[hi]) == 0) - return (EILSEQ); - - first_b = hi; - hi = hi & u8_masks_tbl[remaining_bytes]; - - for (; remaining_bytes > 0; remaining_bytes--) { - /* - * If we have no more bytes, the current - * UTF-8 character is incomplete. - */ - if (u8l >= *utf8len) - return (EINVAL); - - lo = (uint32_t)u8s[u8l++]; - - if (first_b) { - if (lo < valid_min_2nd_byte[first_b] || - lo > valid_max_2nd_byte[first_b]) - return (EILSEQ); - first_b = 0; - } else if (lo < UCONV_U8_BYTE_MIN || - lo > UCONV_U8_BYTE_MAX) { - return (EILSEQ); - } - hi = (hi << UCONV_U8_BIT_SHIFT) | - (lo & UCONV_U8_BIT_MASK); - } - } - - if (hi >= UCONV_U16_START) { - lo = ((hi - UCONV_U16_START) % UCONV_U16_BIT_SHIFT) + - UCONV_U16_LO_MIN; - hi = ((hi - UCONV_U16_START) / UCONV_U16_BIT_SHIFT) + - UCONV_U16_HI_MIN; - - if ((u16l + 1) >= *utf16len) - return (E2BIG); - - if (outendian) { - u16s[u16l++] = (uint16_t)hi; - u16s[u16l++] = (uint16_t)lo; - } else { - u16s[u16l++] = BSWAP_16(((uint16_t)hi)); - u16s[u16l++] = BSWAP_16(((uint16_t)lo)); - } - } else { - if (u16l >= *utf16len) - return (E2BIG); - - u16s[u16l++] = (outendian) ? (uint16_t)hi : - BSWAP_16(((uint16_t)hi)); - } - } - - *utf16len = u16l; - *utf8len = u8l; - - return (0); -} - -int -uconv_u8tou32(const uchar_t *u8s, size_t *utf8len, - uint32_t *u32s, size_t *utf32len, int flag) -{ - int inendian; - int outendian; - size_t u32l; - size_t u8l; - uint32_t hi; - uint32_t c; - int remaining_bytes; - int first_b; - boolean_t do_not_ignore_null; - - if (u8s == NULL || utf8len == NULL) - return (EILSEQ); - - if (u32s == NULL || utf32len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u32l = u8l = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - outendian &= UCONV_OUT_NAT_ENDIAN; - - if (*utf8len > 0 && *utf32len > 0 && (flag & UCONV_OUT_EMIT_BOM)) - u32s[u32l++] = (outendian) ? UCONV_BOM_NORMAL : - UCONV_BOM_SWAPPED_32; - - for (; u8l < *utf8len; ) { - if (u8s[u8l] == 0 && do_not_ignore_null) - break; - - hi = (uint32_t)u8s[u8l++]; - - if (hi > UCONV_ASCII_MAX) { - if ((remaining_bytes = remaining_bytes_tbl[hi]) == 0) - return (EILSEQ); - - first_b = hi; - hi = hi & u8_masks_tbl[remaining_bytes]; - - for (; remaining_bytes > 0; remaining_bytes--) { - if (u8l >= *utf8len) - return (EINVAL); - - c = (uint32_t)u8s[u8l++]; - - if (first_b) { - if (c < valid_min_2nd_byte[first_b] || - c > valid_max_2nd_byte[first_b]) - return (EILSEQ); - first_b = 0; - } else if (c < UCONV_U8_BYTE_MIN || - c > UCONV_U8_BYTE_MAX) { - return (EILSEQ); - } - hi = (hi << UCONV_U8_BIT_SHIFT) | - (c & UCONV_U8_BIT_MASK); - } - } - - if (u32l >= *utf32len) - return (E2BIG); - - u32s[u32l++] = (outendian) ? hi : BSWAP_32(hi); - } - - *utf32len = u32l; - *utf8len = u8l; - - return (0); -} - -#if defined(_KERNEL) -EXPORT_SYMBOL(uconv_u16tou32); -EXPORT_SYMBOL(uconv_u16tou8); -EXPORT_SYMBOL(uconv_u32tou16); -EXPORT_SYMBOL(uconv_u32tou8); -EXPORT_SYMBOL(uconv_u8tou16); -EXPORT_SYMBOL(uconv_u8tou32); -#endif From 91bd12dfeb9abe432530615eaf2295d76da24b54 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Sat, 2 Nov 2024 08:43:25 +1100 Subject: [PATCH 19/33] zfs(4): remove "experimental" from zfs_bclone_enabled I think we've done enough experiments. Sponsored-by: https://despairlabs.com/sponsor/ Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: George Melikov Signed-off-by: Rob Norris Closes #16189 Closes #16712 --- man/man4/zfs.4 | 7 ++++--- module/zfs/zfs_vnops.c | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index c9f6ed0dece3..da027798f962 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -18,7 +18,7 @@ .\" .\" Copyright (c) 2024, Klara, Inc. .\" -.Dd October 2, 2024 +.Dd November 1, 2024 .Dt ZFS 4 .Os . @@ -1333,9 +1333,10 @@ results in vector instructions from the respective CPU instruction set being used. . .It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enable the experimental block cloning feature. +Enables access to the block cloning feature. If this setting is 0, then even if feature@block_cloning is enabled, -attempts to clone blocks will act as though the feature is disabled. +using functions and system calls that attempt to clone blocks will act as +though the feature is disabled. . .It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 0799f17758bf..c01a9cf5d0b2 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -58,9 +58,9 @@ #include /* - * Enable the experimental block cloning feature. If this setting is 0, then - * even if feature@block_cloning is enabled, attempts to clone blocks will act - * as though the feature is disabled. + * Enables access to the block cloning feature. If this setting is 0, then even + * if feature@block_cloning is enabled, using functions and system calls that + * attempt to clone blocks will act as though the feature is disabled. */ int zfs_bclone_enabled = 1; From b16e096198ba9e530c9fd3a955cde66b6a5efc8e Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 4 Nov 2024 19:42:06 -0500 Subject: [PATCH 20/33] Reduce dirty records memory usage Small block workloads may use a very large number of dirty records. During simple block cloning test due to BRT still using 4KB blocks I can easily see up to 2.5M of those used. Before this change dbuf_dirty_record_t structures representing them were allocated via kmem_zalloc(), that rounded their size up to 512 bytes. Introduction of specialized kmem cache allows to reduce the size from 512 to 408 bytes. Additionally, since override and raw params in dirty records are mutually exclusive, puting them into a union allows to reduce structure size down to 368 bytes, increasing the saving to 28%, that can be a 0.5GB or more of RAM. Reviewed-by: Brian Behlendorf Reviewed-by: Brian Atkinson Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #16694 --- include/sys/dbuf.h | 26 +++++++++++++++++--------- module/zfs/dbuf.c | 21 ++++++++++++++++----- module/zfs/dmu.c | 9 ++++----- module/zfs/dmu_direct.c | 1 + module/zfs/dnode_sync.c | 2 +- 5 files changed, 39 insertions(+), 20 deletions(-) diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index c9bfb9a8026c..e69464809a42 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -171,7 +171,6 @@ typedef struct dbuf_dirty_record { * gets COW'd in a subsequent transaction group. */ arc_buf_t *dr_data; - blkptr_t dr_overridden_by; override_states_t dr_override_state; uint8_t dr_copies; boolean_t dr_nopwrite; @@ -179,14 +178,21 @@ typedef struct dbuf_dirty_record { boolean_t dr_diowrite; boolean_t dr_has_raw_params; - /* - * If dr_has_raw_params is set, the following crypt - * params will be set on the BP that's written. - */ - boolean_t dr_byteorder; - uint8_t dr_salt[ZIO_DATA_SALT_LEN]; - uint8_t dr_iv[ZIO_DATA_IV_LEN]; - uint8_t dr_mac[ZIO_DATA_MAC_LEN]; + /* Override and raw params are mutually exclusive. */ + union { + blkptr_t dr_overridden_by; + struct { + /* + * If dr_has_raw_params is set, the + * following crypt params will be set + * on the BP that's written. + */ + boolean_t dr_byteorder; + uint8_t dr_salt[ZIO_DATA_SALT_LEN]; + uint8_t dr_iv[ZIO_DATA_IV_LEN]; + uint8_t dr_mac[ZIO_DATA_MAC_LEN]; + }; + }; } dl; struct dirty_lightweight_leaf { /* @@ -346,6 +352,8 @@ typedef struct dbuf_hash_table { typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t); +extern kmem_cache_t *dbuf_dirty_kmem_cache; + uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level, const uint64_t offset); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index df9368fc8bdb..b1419d96f4ef 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -182,6 +182,7 @@ static void dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr); * Global data structures and functions for the dbuf cache. */ static kmem_cache_t *dbuf_kmem_cache; +kmem_cache_t *dbuf_dirty_kmem_cache; static taskq_t *dbu_evict_taskq; static kthread_t *dbuf_cache_evict_thread; @@ -966,6 +967,8 @@ dbuf_init(void) dbuf_kmem_cache = kmem_cache_create("dmu_buf_impl_t", sizeof (dmu_buf_impl_t), 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); + dbuf_dirty_kmem_cache = kmem_cache_create("dbuf_dirty_record_t", + sizeof (dbuf_dirty_record_t), 0, NULL, NULL, NULL, NULL, NULL, 0); for (int i = 0; i < hmsize; i++) mutex_init(&h->hash_mutexes[i], NULL, MUTEX_NOLOCKDEP, NULL); @@ -1041,6 +1044,7 @@ dbuf_fini(void) sizeof (kmutex_t)); kmem_cache_destroy(dbuf_kmem_cache); + kmem_cache_destroy(dbuf_dirty_kmem_cache); taskq_destroy(dbu_evict_taskq); mutex_enter(&dbuf_evict_lock); @@ -2343,7 +2347,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * to make a copy of it so that the changes we make in this * transaction group won't leak out when we sync the older txg. */ - dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); + dr = kmem_cache_alloc(dbuf_dirty_kmem_cache, KM_SLEEP); + memset(dr, 0, sizeof (*dr)); list_link_init(&dr->dr_dirty_node); list_link_init(&dr->dr_dbuf_node); dr->dr_dnode = dn; @@ -2526,7 +2531,7 @@ dbuf_undirty_bonus(dbuf_dirty_record_t *dr) mutex_destroy(&dr->dt.di.dr_mtx); list_destroy(&dr->dt.di.dr_children); } - kmem_free(dr, sizeof (dbuf_dirty_record_t)); + kmem_cache_free(dbuf_dirty_kmem_cache, dr); ASSERT3U(db->db_dirtycnt, >, 0); db->db_dirtycnt -= 1; } @@ -2616,7 +2621,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) } } - kmem_free(dr, sizeof (dbuf_dirty_record_t)); + kmem_cache_free(dbuf_dirty_kmem_cache, dr); ASSERT(db->db_dirtycnt > 0); db->db_dirtycnt -= 1; @@ -2941,7 +2946,7 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder, * (see dbuf_sync_dnode_leaf_crypt()). */ ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT); - ASSERT3U(db->db_level, ==, 0); + ASSERT0(db->db_level); ASSERT(db->db_objset->os_raw_receive); dmu_buf_will_dirty_impl(db_fake, @@ -2950,6 +2955,7 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder, dr = dbuf_find_dirty_eq(db, tx->tx_txg); ASSERT3P(dr, !=, NULL); + ASSERT3U(dr->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN); dr->dt.dl.dr_has_raw_params = B_TRUE; dr->dt.dl.dr_byteorder = byteorder; @@ -2964,10 +2970,14 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx) struct dirty_leaf *dl; dbuf_dirty_record_t *dr; + ASSERT3U(db->db.db_object, !=, DMU_META_DNODE_OBJECT); + ASSERT0(db->db_level); + dr = list_head(&db->db_dirty_records); ASSERT3P(dr, !=, NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; + ASSERT0(dl->dr_has_raw_params); dl->dr_overridden_by = *bp; dl->dr_override_state = DR_OVERRIDDEN; BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, dr->dr_txg); @@ -3040,6 +3050,7 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, ASSERT3P(dr, !=, NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; + ASSERT0(dl->dr_has_raw_params); encode_embedded_bp_compressed(&dl->dr_overridden_by, data, comp, uncompressed_size, compressed_size); BPE_SET_ETYPE(&dl->dr_overridden_by, etype); @@ -5083,7 +5094,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted, zio->io_txg); - kmem_free(dr, sizeof (dbuf_dirty_record_t)); + kmem_cache_free(dbuf_dirty_kmem_cache, dr); } static void diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 3f87cfe6bee9..362415a25895 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1895,6 +1895,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) mutex_enter(&db->db_mtx); ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC); if (zio->io_error == 0) { + ASSERT0(dr->dt.dl.dr_has_raw_params); dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE); if (dr->dt.dl.dr_nopwrite) { blkptr_t *bp = zio->io_bp; @@ -2190,6 +2191,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) return (SET_ERROR(EALREADY)); } + ASSERT0(dr->dt.dl.dr_has_raw_params); ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC; mutex_exit(&db->db_mtx); @@ -2657,6 +2659,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, db = (dmu_buf_impl_t *)dbuf; bp = &bps[i]; + ASSERT3U(db->db.db_object, !=, DMU_META_DNODE_OBJECT); ASSERT0(db->db_level); ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(db->db_blkid != DMU_SPILL_BLKID); @@ -2672,11 +2675,6 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, db = (dmu_buf_impl_t *)dbuf; bp = &bps[i]; - ASSERT0(db->db_level); - ASSERT(db->db_blkid != DMU_BONUS_BLKID); - ASSERT(db->db_blkid != DMU_SPILL_BLKID); - ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp)); - dmu_buf_will_clone_or_dio(dbuf, tx); mutex_enter(&db->db_mtx); @@ -2685,6 +2683,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, VERIFY(dr != NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; + ASSERT0(dl->dr_has_raw_params); dl->dr_overridden_by = *bp; if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) { if (!BP_IS_EMBEDDED(bp)) { diff --git a/module/zfs/dmu_direct.c b/module/zfs/dmu_direct.c index ed96e7515bc7..40b78b519f49 100644 --- a/module/zfs/dmu_direct.c +++ b/module/zfs/dmu_direct.c @@ -180,6 +180,7 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx) if (list_next(&db->db_dirty_records, dr_head) != NULL) zp.zp_nopwrite = B_FALSE; + ASSERT0(dr_head->dt.dl.dr_has_raw_params); ASSERT3S(dr_head->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN); dr_head->dt.dl.dr_override_state = DR_IN_DMU_SYNC; diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index f67dad002319..122d7d0d17d8 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -566,7 +566,7 @@ dnode_undirty_dbufs(list_t *list) mutex_destroy(&dr->dt.di.dr_mtx); list_destroy(&dr->dt.di.dr_children); } - kmem_free(dr, sizeof (dbuf_dirty_record_t)); + kmem_cache_free(dbuf_dirty_kmem_cache, dr); dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE); } } From c8aed9f97329bb328ea4fe677defdcb20d703bb3 Mon Sep 17 00:00:00 2001 From: Uglymotha Date: Tue, 5 Nov 2024 01:44:38 +0100 Subject: [PATCH 21/33] Verify parent_dev before calling udev_device_get_sysattr_value Not all udev devices have parent devices. Calling udev_device_get_ functions yield an assertion error if called with a NULL pointer. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Sietse Co-authored-by: Sietse Closes #16705 Closes #16717 --- cmd/zed/zed_disk_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c index 32a8789d3001..4a2b7398e922 100644 --- a/cmd/zed/zed_disk_event.c +++ b/cmd/zed/zed_disk_event.c @@ -139,7 +139,8 @@ dev_event_nvlist(struct udev_device *dev) * is /dev/sda. */ struct udev_device *parent_dev = udev_device_get_parent(dev); - if ((value = udev_device_get_sysattr_value(parent_dev, "size")) + if (parent_dev != NULL && + (value = udev_device_get_sysattr_value(parent_dev, "size")) != NULL) { uint64_t numval = DEV_BSIZE; From 5a2333b10fd20eb5a11bbcd01da785b59063df11 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 4 Nov 2024 20:16:32 -0500 Subject: [PATCH 22/33] CI: Automate some GitHub PR status labels manipulations - Set/remove "Work in Progress"/"Code Review Needed" for drafts. - Remove "Accepted", "Inactive", "Revision Needed" and "Stale" on pushes and reopens. I hope this reduce chances of PRs being forgotten after requested modifications done due to stale labels. It is better to have no labels than incorrect ones saying there is nothing to look at. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #16721 --- .github/workflows/labels.yml | 49 ++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 .github/workflows/labels.yml diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml new file mode 100644 index 000000000000..6193c8afeae9 --- /dev/null +++ b/.github/workflows/labels.yml @@ -0,0 +1,49 @@ +name: labels + +on: + pull_request_target: + types: [ opened, synchronize, reopened, converted_to_draft, ready_for_review ] + +permissions: + pull-requests: write + +jobs: + open: + runs-on: ubuntu-latest + if: ${{ github.event.action == 'opened' && github.event.pull_request.draft }} + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ github.event.pull_request.html_url }} + run: | + gh pr edit $ISSUE --add-label "Status: Work in Progress" + + push: + runs-on: ubuntu-latest + if: ${{ github.event.action == 'synchronize' || github.event.action == 'reopened' }} + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ github.event.pull_request.html_url }} + run: | + gh pr edit $ISSUE --remove-label "Status: Accepted,Status: Inactive,Status: Revision Needed,Status: Stale" + + draft: + runs-on: ubuntu-latest + if: ${{ github.event.action == 'converted_to_draft' }} + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ github.event.pull_request.html_url }} + run: | + gh pr edit $ISSUE --remove-label "Status: Accepted,Status: Code Review Needed,Status: Inactive,Status: Revision Needed,Status: Stale" --add-label "Status: Work in Progress" + + rfr: + runs-on: ubuntu-latest + if: ${{ github.event.action == 'ready_for_review' }} + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ github.event.pull_request.html_url }} + run: | + gh pr edit $ISSUE --remove-label "Status: Accepted,Status: Inactive,Status: Revision Needed,Status: Stale,Status: Work in Progress" --add-label "Status: Code Review Needed" From 5e726779f55d04d6b6c9bfb4945fd7c805120a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 30 Oct 2024 12:41:45 +0100 Subject: [PATCH 23/33] module: unicode: u8_textprep_data.h tables: 2 => U8_UNICODE_LATEST+1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ahelenia Ziemiańska Closes #16704 --- include/sys/u8_textprep_data.h | 56 +++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/include/sys/u8_textprep_data.h b/include/sys/u8_textprep_data.h index 2a97966ee56e..f9598c4d928c 100644 --- a/include/sys/u8_textprep_data.h +++ b/include/sys/u8_textprep_data.h @@ -146,7 +146,7 @@ typedef struct { * The common b1_tbl for combining class, decompositions, tolower, and * toupper case conversion mappings. */ -static const uchar_t u8_common_b1_tbl[2][256] = { +static const uchar_t u8_common_b1_tbl[U8_UNICODE_LATEST + 1][256] = { { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -217,7 +217,8 @@ static const uchar_t u8_common_b1_tbl[2][256] = { }, }; -static const uchar_t u8_combining_class_b2_tbl[2][2][256] = { +static const uchar_t u8_combining_class_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = +{ { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -363,7 +364,8 @@ static const uchar_t u8_combining_class_b2_tbl[2][2][256] = { }; -static const uchar_t u8_combining_class_b3_tbl[2][9][256] = { +static const uchar_t u8_combining_class_b3_tbl[U8_UNICODE_LATEST + 1][9][256] = +{ { { /* Third byte table 0. */ N_, N_, N_, N_, N_, N_, N_, N_, @@ -986,7 +988,8 @@ static const uchar_t u8_combining_class_b3_tbl[2][9][256] = { * Unlike other b4_tbl, the b4_tbl for combining class data has * the combining class values not indices to the final tables. */ -static const uchar_t u8_combining_class_b4_tbl[2][55][256] = { +static const uchar_t u8_combining_class_b4_tbl[U8_UNICODE_LATEST + 1][55][256] = +{ { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -4733,7 +4736,7 @@ static const uchar_t u8_combining_class_b4_tbl[2][55][256] = { }, }; -static const uchar_t u8_composition_b1_tbl[2][256] = { +static const uchar_t u8_composition_b1_tbl[U8_UNICODE_LATEST + 1][256] = { { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -4804,7 +4807,7 @@ static const uchar_t u8_composition_b1_tbl[2][256] = { }, }; -static const uchar_t u8_composition_b2_tbl[2][1][256] = { +static const uchar_t u8_composition_b2_tbl[U8_UNICODE_LATEST + 1][1][256] = { { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -4882,7 +4885,9 @@ static const uchar_t u8_composition_b2_tbl[2][1][256] = { }; -static const u8_displacement_t u8_composition_b3_tbl[2][5][256] = { +static const u8_displacement_t u8_composition_b3_tbl[ + U8_UNICODE_LATEST + 1][5][256] = +{ { { /* Third byte table 0. */ { 0x8000, 0 }, { N_, 0 }, { N_, 0 }, @@ -5769,7 +5774,7 @@ static const u8_displacement_t u8_composition_b3_tbl[2][5][256] = { }, }; -static const uchar_t u8_composition_b4_tbl[2][41][257] = { +static const uchar_t u8_composition_b4_tbl[U8_UNICODE_LATEST + 1][41][257] = { { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -8646,7 +8651,9 @@ static const uchar_t u8_composition_b4_tbl[2][41][257] = { }, }; -static const uint16_t u8_composition_b4_16bit_tbl[2][5][257] = { +static const uint16_t u8_composition_b4_16bit_tbl[ + U8_UNICODE_LATEST + 1][5][257] = +{ { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -9003,7 +9010,7 @@ static const uint16_t u8_composition_b4_16bit_tbl[2][5][257] = { }, }; -static const uchar_t u8_composition_final_tbl[2][6623] = { +static const uchar_t u8_composition_final_tbl[U8_UNICODE_LATEST + 1][6623] = { { 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_, @@ -10666,7 +10673,7 @@ static const uchar_t u8_composition_final_tbl[2][6623] = { }, }; -static const uchar_t u8_decomp_b2_tbl[2][2][256] = { +static const uchar_t u8_decomp_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -10812,7 +10819,8 @@ static const uchar_t u8_decomp_b2_tbl[2][2][256] = { }; -static const u8_displacement_t u8_decomp_b3_tbl[2][8][256] = { +static const u8_displacement_t u8_decomp_b3_tbl[U8_UNICODE_LATEST + 1][8][256] = +{ { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -12227,7 +12235,7 @@ static const u8_displacement_t u8_decomp_b3_tbl[2][8][256] = { }, }; -static const uchar_t u8_decomp_b4_tbl[2][118][257] = { +static const uchar_t u8_decomp_b4_tbl[U8_UNICODE_LATEST + 1][118][257] = { { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -20494,7 +20502,7 @@ static const uchar_t u8_decomp_b4_tbl[2][118][257] = { }, }; -static const uint16_t u8_decomp_b4_16bit_tbl[2][30][257] = { +static const uint16_t u8_decomp_b4_16bit_tbl[U8_UNICODE_LATEST + 1][30][257] = { { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -22601,7 +22609,7 @@ static const uint16_t u8_decomp_b4_16bit_tbl[2][30][257] = { }, }; -static const uchar_t u8_decomp_final_tbl[2][19370] = { +static const uchar_t u8_decomp_final_tbl[U8_UNICODE_LATEST + 1][19370] = { { 0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84, 0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20, @@ -27452,7 +27460,7 @@ static const uchar_t u8_decomp_final_tbl[2][19370] = { }, }; -static const uchar_t u8_case_common_b2_tbl[2][2][256] = { +static const uchar_t u8_case_common_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -27598,7 +27606,9 @@ static const uchar_t u8_case_common_b2_tbl[2][2][256] = { }; -static const u8_displacement_t u8_tolower_b3_tbl[2][5][256] = { +static const u8_displacement_t u8_tolower_b3_tbl[ + U8_UNICODE_LATEST + 1][5][256] = +{ { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -28265,7 +28275,7 @@ static const u8_displacement_t u8_tolower_b3_tbl[2][5][256] = { }, }; -static const uchar_t u8_tolower_b4_tbl[2][36][257] = { +static const uchar_t u8_tolower_b4_tbl[U8_UNICODE_LATEST + 1][36][257] = { { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -30792,7 +30802,7 @@ static const uchar_t u8_tolower_b4_tbl[2][36][257] = { }, }; -static const uchar_t u8_tolower_final_tbl[2][2299] = { +static const uchar_t u8_tolower_final_tbl[U8_UNICODE_LATEST + 1][2299] = { { 0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3, 0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7, @@ -31375,7 +31385,9 @@ static const uchar_t u8_tolower_final_tbl[2][2299] = { }, }; -static const u8_displacement_t u8_toupper_b3_tbl[2][5][256] = { +static const u8_displacement_t u8_toupper_b3_tbl[ + U8_UNICODE_LATEST + 1][5][256] = +{ { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -32042,7 +32054,7 @@ static const u8_displacement_t u8_toupper_b3_tbl[2][5][256] = { }, }; -static const uchar_t u8_toupper_b4_tbl[2][39][257] = { +static const uchar_t u8_toupper_b4_tbl[U8_UNICODE_LATEST + 1][39][257] = { { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -34779,7 +34791,7 @@ static const uchar_t u8_toupper_b4_tbl[2][39][257] = { }, }; -static const uchar_t u8_toupper_final_tbl[2][2318] = { +static const uchar_t u8_toupper_final_tbl[U8_UNICODE_LATEST + 1][2318] = { { 0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82, 0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86, From e17a9698faf985ad20e0bc6ed56a0b8f2ca184e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 30 Oct 2024 12:50:30 +0100 Subject: [PATCH 24/33] module: unicode: #ifdef out unused U8_UNICODE_320 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ahelenia Ziemiańska Closes #16704 --- include/sys/u8_textprep.h | 4 ++++ include/sys/u8_textprep_data.h | 44 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/include/sys/u8_textprep.h b/include/sys/u8_textprep.h index 1ec25613a63a..00b3217d8d6f 100644 --- a/include/sys/u8_textprep.h +++ b/include/sys/u8_textprep.h @@ -68,8 +68,12 @@ extern "C" { #define U8_TEXTPREP_IGNORE_INVALID (0x00020000) #define U8_TEXTPREP_NOWAIT (0x00040000) +#if 0 #define U8_UNICODE_320 (0) #define U8_UNICODE_500 (1) +#else +#define U8_UNICODE_500 (0) +#endif #define U8_UNICODE_LATEST (U8_UNICODE_500) #define U8_VALIDATE_ENTIRE (0x00100000) diff --git a/include/sys/u8_textprep_data.h b/include/sys/u8_textprep_data.h index f9598c4d928c..39139540a8d1 100644 --- a/include/sys/u8_textprep_data.h +++ b/include/sys/u8_textprep_data.h @@ -147,6 +147,7 @@ typedef struct { * toupper case conversion mappings. */ static const uchar_t u8_common_b1_tbl[U8_UNICODE_LATEST + 1][256] = { +#ifdef U8_UNICODE_320 { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -181,6 +182,7 @@ static const uchar_t u8_common_b1_tbl[U8_UNICODE_LATEST + 1][256] = { 1, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, }, +#endif { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -219,6 +221,7 @@ static const uchar_t u8_common_b1_tbl[U8_UNICODE_LATEST + 1][256] = { static const uchar_t u8_combining_class_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { +#ifdef U8_UNICODE_320 { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -290,6 +293,7 @@ static const uchar_t u8_combining_class_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = }, }, +#endif { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -366,6 +370,7 @@ static const uchar_t u8_combining_class_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = static const uchar_t u8_combining_class_b3_tbl[U8_UNICODE_LATEST + 1][9][256] = { +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ N_, N_, N_, N_, N_, N_, N_, N_, @@ -674,6 +679,7 @@ static const uchar_t u8_combining_class_b3_tbl[U8_UNICODE_LATEST + 1][9][256] = N_, N_, N_, N_, N_, N_, N_, N_, }, }, +#endif { { /* Third byte table 0. */ N_, N_, N_, N_, N_, N_, N_, N_, @@ -990,6 +996,7 @@ static const uchar_t u8_combining_class_b3_tbl[U8_UNICODE_LATEST + 1][9][256] = */ static const uchar_t u8_combining_class_b4_tbl[U8_UNICODE_LATEST + 1][55][256] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -2862,6 +2869,7 @@ static const uchar_t u8_combining_class_b4_tbl[U8_UNICODE_LATEST + 1][55][256] = 0, 0, 0, 0, 0, 0, 0, 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -4737,6 +4745,7 @@ static const uchar_t u8_combining_class_b4_tbl[U8_UNICODE_LATEST + 1][55][256] = }; static const uchar_t u8_composition_b1_tbl[U8_UNICODE_LATEST + 1][256] = { +#ifdef U8_UNICODE_320 { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -4771,6 +4780,7 @@ static const uchar_t u8_composition_b1_tbl[U8_UNICODE_LATEST + 1][256] = { N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, }, +#endif { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -4808,6 +4818,7 @@ static const uchar_t u8_composition_b1_tbl[U8_UNICODE_LATEST + 1][256] = { }; static const uchar_t u8_composition_b2_tbl[U8_UNICODE_LATEST + 1][1][256] = { +#ifdef U8_UNICODE_320 { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -4845,6 +4856,7 @@ static const uchar_t u8_composition_b2_tbl[U8_UNICODE_LATEST + 1][1][256] = { }, }, +#endif { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -4888,6 +4900,7 @@ static const uchar_t u8_composition_b2_tbl[U8_UNICODE_LATEST + 1][1][256] = { static const u8_displacement_t u8_composition_b3_tbl[ U8_UNICODE_LATEST + 1][5][256] = { +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ { 0x8000, 0 }, { N_, 0 }, { N_, 0 }, @@ -5330,6 +5343,7 @@ static const u8_displacement_t u8_composition_b3_tbl[ { N_, 0 }, }, }, +#endif { { /* Third byte table 0. */ { 0x8000, 0 }, { N_, 0 }, { N_, 0 }, @@ -5775,6 +5789,7 @@ static const u8_displacement_t u8_composition_b3_tbl[ }; static const uchar_t u8_composition_b4_tbl[U8_UNICODE_LATEST + 1][41][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -7212,6 +7227,7 @@ static const uchar_t u8_composition_b4_tbl[U8_UNICODE_LATEST + 1][41][257] = { 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -8654,6 +8670,7 @@ static const uchar_t u8_composition_b4_tbl[U8_UNICODE_LATEST + 1][41][257] = { static const uint16_t u8_composition_b4_16bit_tbl[ U8_UNICODE_LATEST + 1][5][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -8831,6 +8848,7 @@ static const uint16_t u8_composition_b4_16bit_tbl[ 362, }, }, +#endif { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -9011,6 +9029,7 @@ static const uint16_t u8_composition_b4_16bit_tbl[ }; static const uchar_t u8_composition_final_tbl[U8_UNICODE_LATEST + 1][6623] = { +#ifdef U8_UNICODE_320 { 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_, @@ -9841,6 +9860,7 @@ static const uchar_t u8_composition_final_tbl[U8_UNICODE_LATEST + 1][6623] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, +#endif { 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_, @@ -10674,6 +10694,7 @@ static const uchar_t u8_composition_final_tbl[U8_UNICODE_LATEST + 1][6623] = { }; static const uchar_t u8_decomp_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { +#ifdef U8_UNICODE_320 { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -10745,6 +10766,7 @@ static const uchar_t u8_decomp_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { }, }, +#endif { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -10821,6 +10843,7 @@ static const uchar_t u8_decomp_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { static const u8_displacement_t u8_decomp_b3_tbl[U8_UNICODE_LATEST + 1][8][256] = { +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -11527,6 +11550,7 @@ static const u8_displacement_t u8_decomp_b3_tbl[U8_UNICODE_LATEST + 1][8][256] = { N_, 0 }, }, }, +#endif { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -12236,6 +12260,7 @@ static const u8_displacement_t u8_decomp_b3_tbl[U8_UNICODE_LATEST + 1][8][256] = }; static const uchar_t u8_decomp_b4_tbl[U8_UNICODE_LATEST + 1][118][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -16368,6 +16393,7 @@ static const uchar_t u8_decomp_b4_tbl[U8_UNICODE_LATEST + 1][118][257] = { 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -20503,6 +20529,7 @@ static const uchar_t u8_decomp_b4_tbl[U8_UNICODE_LATEST + 1][118][257] = { }; static const uint16_t u8_decomp_b4_16bit_tbl[U8_UNICODE_LATEST + 1][30][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -21555,6 +21582,7 @@ static const uint16_t u8_decomp_b4_16bit_tbl[U8_UNICODE_LATEST + 1][30][257] = { 0, }, }, +#endif { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -22610,6 +22638,7 @@ static const uint16_t u8_decomp_b4_16bit_tbl[U8_UNICODE_LATEST + 1][30][257] = { }; static const uchar_t u8_decomp_final_tbl[U8_UNICODE_LATEST + 1][19370] = { +#ifdef U8_UNICODE_320 { 0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84, 0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20, @@ -25034,6 +25063,7 @@ static const uchar_t u8_decomp_final_tbl[U8_UNICODE_LATEST + 1][19370] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, +#endif { 0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84, 0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20, @@ -27461,6 +27491,7 @@ static const uchar_t u8_decomp_final_tbl[U8_UNICODE_LATEST + 1][19370] = { }; static const uchar_t u8_case_common_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { +#ifdef U8_UNICODE_320 { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -27532,6 +27563,7 @@ static const uchar_t u8_case_common_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { }, }, +#endif { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -27609,6 +27641,7 @@ static const uchar_t u8_case_common_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { static const u8_displacement_t u8_tolower_b3_tbl[ U8_UNICODE_LATEST + 1][5][256] = { +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -27941,6 +27974,7 @@ static const u8_displacement_t u8_tolower_b3_tbl[ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, }, }, +#endif { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -28276,6 +28310,7 @@ static const u8_displacement_t u8_tolower_b3_tbl[ }; static const uchar_t u8_tolower_b4_tbl[U8_UNICODE_LATEST + 1][36][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -29538,6 +29573,7 @@ static const uchar_t u8_tolower_b4_tbl[U8_UNICODE_LATEST + 1][36][257] = { 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -30803,6 +30839,7 @@ static const uchar_t u8_tolower_b4_tbl[U8_UNICODE_LATEST + 1][36][257] = { }; static const uchar_t u8_tolower_final_tbl[U8_UNICODE_LATEST + 1][2299] = { +#ifdef U8_UNICODE_320 { 0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3, 0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7, @@ -31093,6 +31130,7 @@ static const uchar_t u8_tolower_final_tbl[U8_UNICODE_LATEST + 1][2299] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, +#endif { 0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3, 0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7, @@ -31388,6 +31426,7 @@ static const uchar_t u8_tolower_final_tbl[U8_UNICODE_LATEST + 1][2299] = { static const u8_displacement_t u8_toupper_b3_tbl[ U8_UNICODE_LATEST + 1][5][256] = { +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -31720,6 +31759,7 @@ static const u8_displacement_t u8_toupper_b3_tbl[ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, }, }, +#endif { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -32055,6 +32095,7 @@ static const u8_displacement_t u8_toupper_b3_tbl[ }; static const uchar_t u8_toupper_b4_tbl[U8_UNICODE_LATEST + 1][39][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -33422,6 +33463,7 @@ static const uchar_t u8_toupper_b4_tbl[U8_UNICODE_LATEST + 1][39][257] = { 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -34792,6 +34834,7 @@ static const uchar_t u8_toupper_b4_tbl[U8_UNICODE_LATEST + 1][39][257] = { }; static const uchar_t u8_toupper_final_tbl[U8_UNICODE_LATEST + 1][2318] = { +#ifdef U8_UNICODE_320 { 0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82, 0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86, @@ -35084,6 +35127,7 @@ static const uchar_t u8_toupper_final_tbl[U8_UNICODE_LATEST + 1][2318] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, +#endif { 0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82, 0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86, From 60c202cca47fe4b7284b76bcd5aaef881182cf92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 30 Oct 2024 12:58:12 +0100 Subject: [PATCH 25/33] module: unicode: remove unused tolower transformations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the previous patch this yields $ size -G ./module/zfs.ko ./module/zfs.new.ko text data bss total filename 2865126 1597982 755768 5218876 ./module/zfs.ko 2864038 1429784 755768 5049590 ./module/zfs.new.ko -1088 -168198 -1k -164k Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ahelenia Ziemiańska Closes #16704 --- include/sys/u8_textprep.h | 4 ++++ include/sys/u8_textprep_data.h | 2 ++ module/unicode/u8_textprep.c | 37 +++++++++++++++++++++++++++++----- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/include/sys/u8_textprep.h b/include/sys/u8_textprep.h index 00b3217d8d6f..b0bae5833acd 100644 --- a/include/sys/u8_textprep.h +++ b/include/sys/u8_textprep.h @@ -45,7 +45,9 @@ extern "C" { */ #define U8_STRCMP_CS (0x00000001) #define U8_STRCMP_CI_UPPER (0x00000002) +#if 0 #define U8_STRCMP_CI_LOWER (0x00000004) +#endif #define U8_CANON_DECOMP (0x00000010) #define U8_COMPAT_DECOMP (0x00000020) @@ -57,7 +59,9 @@ extern "C" { #define U8_STRCMP_NFKC (U8_COMPAT_DECOMP | U8_CANON_COMP) #define U8_TEXTPREP_TOUPPER (U8_STRCMP_CI_UPPER) +#ifdef U8_STRCMP_CI_LOWER #define U8_TEXTPREP_TOLOWER (U8_STRCMP_CI_LOWER) +#endif #define U8_TEXTPREP_NFD (U8_STRCMP_NFD) #define U8_TEXTPREP_NFC (U8_STRCMP_NFC) diff --git a/include/sys/u8_textprep_data.h b/include/sys/u8_textprep_data.h index 39139540a8d1..bd6af0e8f661 100644 --- a/include/sys/u8_textprep_data.h +++ b/include/sys/u8_textprep_data.h @@ -27638,6 +27638,7 @@ static const uchar_t u8_case_common_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { }; +#ifdef U8_STRCMP_CI_LOWER static const u8_displacement_t u8_tolower_b3_tbl[ U8_UNICODE_LATEST + 1][5][256] = { @@ -31422,6 +31423,7 @@ static const uchar_t u8_tolower_final_tbl[U8_UNICODE_LATEST + 1][2299] = { 0x90, 0x91, 0x8F, }, }; +#endif static const u8_displacement_t u8_toupper_b3_tbl[ U8_UNICODE_LATEST + 1][5][256] = diff --git a/module/unicode/u8_textprep.c b/module/unicode/u8_textprep.c index 49e22c88cde7..281396678ec2 100644 --- a/module/unicode/u8_textprep.c +++ b/module/unicode/u8_textprep.c @@ -527,6 +527,7 @@ do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper) for (i = 0; start_id < end_id; start_id++) u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id]; } else { +#ifdef U8_STRCMP_CI_LOWER b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id; if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) return ((size_t)sz); @@ -541,6 +542,9 @@ do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper) for (i = 0; start_id < end_id; start_id++) u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id]; +#else + __builtin_unreachable(); +#endif } /* @@ -1753,7 +1757,11 @@ do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2, s2last = s2 + n2; is_it_toupper = flag & U8_TEXTPREP_TOUPPER; +#ifdef U8_STRCMP_CI_LOWER is_it_tolower = flag & U8_TEXTPREP_TOLOWER; +#else + is_it_tolower = 0; +#endif canonical_decomposition = flag & U8_CANON_DECOMP; compatibility_decomposition = flag & U8_COMPAT_DECOMP; canonical_composition = flag & U8_CANON_COMP; @@ -1870,12 +1878,22 @@ u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv, if (flag == 0) { flag = U8_STRCMP_CS; } else { - f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER | - U8_STRCMP_CI_LOWER); +#ifdef U8_STRCMP_CI_LOWER + f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER + | U8_STRCMP_CI_LOWER); +#else + f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER); +#endif if (f == 0) { flag |= U8_STRCMP_CS; - } else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER && - f != U8_STRCMP_CI_LOWER) { + } +#ifdef U8_STRCMP_CI_LOWER + else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER && + f != U8_STRCMP_CI_LOWER) +#else + else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER) +#endif + { *errnum = EBADF; flag = U8_STRCMP_CS; } @@ -1908,10 +1926,13 @@ u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv, if (flag == U8_STRCMP_CI_UPPER) { return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2, B_TRUE, errnum)); - } else if (flag == U8_STRCMP_CI_LOWER) { + } +#ifdef U8_STRCMP_CI_LOWER + else if (flag == U8_STRCMP_CI_LOWER) { return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2, B_FALSE, errnum)); } +#endif return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2, flag, errnum)); @@ -1945,11 +1966,13 @@ u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen, return ((size_t)-1); } +#ifdef U8_TEXTPREP_TOLOWER f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER); if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) { *errnum = EBADF; return ((size_t)-1); } +#endif f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP); if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC && @@ -1974,7 +1997,11 @@ u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen, do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL); do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID); is_it_toupper = flag & U8_TEXTPREP_TOUPPER; +#ifdef U8_TEXTPREP_TOLOWER is_it_tolower = flag & U8_TEXTPREP_TOLOWER; +#else + is_it_tolower = 0; +#endif ret_val = 0; From f38e2d239fe3654725f92600a4f7754c4b1f56dd Mon Sep 17 00:00:00 2001 From: tstabrawa <59430211+tstabrawa@users.noreply.github.com> Date: Sun, 3 Nov 2024 15:30:23 -0600 Subject: [PATCH 26/33] Revert "Avoid BUG in migrate_folio_extra" This reverts commit b052035990594408899fa32fd4ad6603b75b6c6d. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Reviewed-by: Brian Atkinson Signed-off-by: tstabrawa <59430211+tstabrawa@users.noreply.github.com> Closes #16568 Closes #16723 --- config/kernel-vfs-invalidate_folio.m4 | 33 ------------------- config/kernel-vfs-release_folio.m4 | 32 ------------------- config/kernel.m4 | 4 --- module/os/linux/zfs/zfs_vnops_os.c | 17 ---------- module/os/linux/zfs/zfs_znode_os.c | 8 ----- module/os/linux/zfs/zpl_file.c | 46 --------------------------- 6 files changed, 140 deletions(-) delete mode 100644 config/kernel-vfs-invalidate_folio.m4 delete mode 100644 config/kernel-vfs-release_folio.m4 diff --git a/config/kernel-vfs-invalidate_folio.m4 b/config/kernel-vfs-invalidate_folio.m4 deleted file mode 100644 index 61a5c8478af1..000000000000 --- a/config/kernel-vfs-invalidate_folio.m4 +++ /dev/null @@ -1,33 +0,0 @@ -dnl # -dnl # Linux 5.18 uses invalidate_folio in lieu of invalidate_page -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_INVALIDATE_FOLIO], [ - ZFS_LINUX_TEST_SRC([vfs_has_invalidate_folio], [ - #include - - static void - test_invalidate_folio(struct folio *folio, size_t offset, - size_t len) { - (void) folio; (void) offset; (void) len; - return; - } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .invalidate_folio = test_invalidate_folio, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_INVALIDATE_FOLIO], [ - dnl # - dnl # Linux 5.18 uses invalidate_folio in lieu of invalidate_page - dnl # - AC_MSG_CHECKING([whether invalidate_folio exists]) - ZFS_LINUX_TEST_RESULT([vfs_has_invalidate_folio], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_INVALIDATE_FOLIO, 1, [invalidate_folio exists]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-vfs-release_folio.m4 b/config/kernel-vfs-release_folio.m4 deleted file mode 100644 index f31db5677fd3..000000000000 --- a/config/kernel-vfs-release_folio.m4 +++ /dev/null @@ -1,32 +0,0 @@ -dnl # -dnl # Linux 5.19 uses release_folio in lieu of releasepage -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RELEASE_FOLIO], [ - ZFS_LINUX_TEST_SRC([vfs_has_release_folio], [ - #include - - static bool - test_release_folio(struct folio *folio, gfp_t gfp) { - (void) folio; (void) gfp; - return (0); - } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .release_folio = test_release_folio, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_RELEASE_FOLIO], [ - dnl # - dnl # Linux 5.19 uses release_folio in lieu of releasepage - dnl # - AC_MSG_CHECKING([whether release_folio exists]) - ZFS_LINUX_TEST_RESULT([vfs_has_release_folio], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_RELEASE_FOLIO, 1, [release_folio exists]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 556df58082f9..761f9310753a 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -77,8 +77,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO - ZFS_AC_KERNEL_SRC_VFS_RELEASE_FOLIO - ZFS_AC_KERNEL_SRC_VFS_INVALIDATE_FOLIO ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO ZFS_AC_KERNEL_SRC_VFS_READPAGES @@ -189,8 +187,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_VFS_READ_FOLIO - ZFS_AC_KERNEL_VFS_RELEASE_FOLIO - ZFS_AC_KERNEL_VFS_INVALIDATE_FOLIO ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_DIRECT_IO ZFS_AC_KERNEL_VFS_READPAGES diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 469197220859..dd9fd760b9c2 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -260,15 +260,6 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os) } else { ClearPageError(pp); SetPageUptodate(pp); - if (!PagePrivate(pp)) { - /* - * Set private bit so page migration - * will wait for us to finish writeback - * before calling migrate_folio(). - */ - SetPagePrivate(pp); - get_page(pp); - } if (mapping_writably_mapped(mp)) flush_dcache_page(pp); @@ -4090,14 +4081,6 @@ zfs_fillpage(struct inode *ip, struct page *pp) } else { ClearPageError(pp); SetPageUptodate(pp); - if (!PagePrivate(pp)) { - /* - * Set private bit so page migration will wait for us to - * finish writeback before calling migrate_folio(). - */ - SetPagePrivate(pp); - get_page(pp); - } } return (error); diff --git a/module/os/linux/zfs/zfs_znode_os.c b/module/os/linux/zfs/zfs_znode_os.c index bc1e17f086d9..bbaca2f58394 100644 --- a/module/os/linux/zfs/zfs_znode_os.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -1577,14 +1577,6 @@ zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len) mark_page_accessed(pp); SetPageUptodate(pp); ClearPageError(pp); - if (!PagePrivate(pp)) { - /* - * Set private bit so page migration will wait for us to - * finish writeback before calling migrate_folio(). - */ - SetPagePrivate(pp); - get_page(pp); - } unlock_page(pp); put_page(pp); } diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 50c63695dcc8..4d1bf1d5477f 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -607,42 +607,6 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) return (zpl_putpage(pp, wbc, &for_sync)); } -static int -zpl_releasepage(struct page *pp, gfp_t gfp) -{ - if (PagePrivate(pp)) { - ClearPagePrivate(pp); - put_page(pp); - } - return (1); -} - -#ifdef HAVE_VFS_RELEASE_FOLIO -static bool -zpl_release_folio(struct folio *folio, gfp_t gfp) -{ - return (zpl_releasepage(&folio->page, gfp)); -} -#endif - -#ifdef HAVE_VFS_INVALIDATE_FOLIO -static void -zpl_invalidate_folio(struct folio *folio, size_t offset, size_t len) -{ - if ((offset == 0) && (len == PAGE_SIZE)) { - zpl_releasepage(&folio->page, 0); - } -} -#else -static void -zpl_invalidatepage(struct page *pp, unsigned int offset, unsigned int len) -{ - if ((offset == 0) && (len == PAGE_SIZE)) { - zpl_releasepage(pp, 0); - } -} -#endif - /* * The flag combination which matches the behavior of zfs_space() is * FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE @@ -1126,16 +1090,6 @@ const struct address_space_operations zpl_address_space_operations = { #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO .dirty_folio = filemap_dirty_folio, #endif -#ifdef HAVE_VFS_RELEASE_FOLIO - .release_folio = zpl_release_folio, -#else - .releasepage = zpl_releasepage, -#endif -#ifdef HAVE_VFS_INVALIDATE_FOLIO - .invalidate_folio = zpl_invalidate_folio, -#else - .invalidatepage = zpl_invalidatepage, -#endif }; const struct file_operations zpl_file_operations = { From 7b6e9675daefcc94393293c46e4672e66490757d Mon Sep 17 00:00:00 2001 From: tstabrawa <59430211+tstabrawa@users.noreply.github.com> Date: Sun, 3 Nov 2024 16:00:20 -0600 Subject: [PATCH 27/33] Use simple folio migration function Avoids using fallback_migrate_folio, which starts unnecessary writeback (leading to BUG in migrate_folio_extra). Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Reviewed-by: Brian Atkinson Signed-off-by: tstabrawa <59430211+tstabrawa@users.noreply.github.com> Closes #16568 Closes #16723 --- config/kernel-vfs-migrate_folio.m4 | 27 +++++++++++++++++++++++++++ config/kernel.m4 | 2 ++ module/os/linux/zfs/zpl_file.c | 6 ++++++ 3 files changed, 35 insertions(+) create mode 100644 config/kernel-vfs-migrate_folio.m4 diff --git a/config/kernel-vfs-migrate_folio.m4 b/config/kernel-vfs-migrate_folio.m4 new file mode 100644 index 000000000000..186cd0581a17 --- /dev/null +++ b/config/kernel-vfs-migrate_folio.m4 @@ -0,0 +1,27 @@ +dnl # +dnl # Linux 6.0 uses migrate_folio in lieu of migrate_page +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_MIGRATE_FOLIO], [ + ZFS_LINUX_TEST_SRC([vfs_has_migrate_folio], [ + #include + #include + + static const struct address_space_operations + aops __attribute__ ((unused)) = { + .migrate_folio = migrate_folio, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_MIGRATE_FOLIO], [ + dnl # + dnl # Linux 6.0 uses migrate_folio in lieu of migrate_page + dnl # + AC_MSG_CHECKING([whether migrate_folio exists]) + ZFS_LINUX_TEST_RESULT([vfs_has_migrate_folio], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_MIGRATE_FOLIO, 1, [migrate_folio exists]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 761f9310753a..78f178ff27ac 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -77,6 +77,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO + ZFS_AC_KERNEL_SRC_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO ZFS_AC_KERNEL_SRC_VFS_READPAGES @@ -187,6 +188,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_VFS_READ_FOLIO + ZFS_AC_KERNEL_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_DIRECT_IO ZFS_AC_KERNEL_VFS_READPAGES diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 4d1bf1d5477f..f6e014327717 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -28,6 +28,7 @@ #include #endif #include +#include #include #include #include @@ -1090,6 +1091,11 @@ const struct address_space_operations zpl_address_space_operations = { #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO .dirty_folio = filemap_dirty_folio, #endif +#ifdef HAVE_VFS_MIGRATE_FOLIO + .migrate_folio = migrate_folio, +#else + .migratepage = migrate_page, +#endif }; const struct file_operations zpl_file_operations = { From 5945676bcc5d8b45554c93ea08a0d1f654c7075e Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Wed, 6 Nov 2024 11:52:01 -0800 Subject: [PATCH 28/33] ZFS send should use spill block prefetched from send_reader_thread Currently, even though send_reader_thread prefetches spill block, do_dump() will not use it and issues its own blocking arc_read. This causes significant performance degradation when sending datasets with lots of spill blocks. For unmodified spill blocks, we also create send_range struct for them in send_reader_thread and issue prefetches for them. We piggyback them on the dnode send_range instead of enqueueing them so we don't break send_range_after check. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Chunwei Chen Co-authored-by: david.chen Closes #16701 --- module/zfs/dmu_send.c | 126 +++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 62 deletions(-) diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index a174972e9b57..aeb8ff3b6688 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -180,6 +180,8 @@ struct send_range { */ dnode_phys_t *dnp; blkptr_t bp; + /* Piggyback unmodified spill block */ + struct send_range *spill_range; } object; struct srr { uint32_t datablksz; @@ -231,6 +233,8 @@ range_free(struct send_range *range) size_t size = sizeof (dnode_phys_t) * (range->sru.object.dnp->dn_extra_slots + 1); kmem_free(range->sru.object.dnp, size); + if (range->sru.object.spill_range) + range_free(range->sru.object.spill_range); } else if (range->type == DATA) { mutex_enter(&range->sru.data.lock); while (range->sru.data.io_outstanding) @@ -617,7 +621,7 @@ dump_spill(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object, drrs->drr_length = blksz; drrs->drr_toguid = dscp->dsc_toguid; - /* See comment in dump_dnode() for full details */ + /* See comment in piggyback_unmodified_spill() for full details */ if (zfs_send_unmodified_spill_blocks && (BP_GET_LOGICAL_BIRTH(bp) <= dscp->dsc_fromtxg)) { drrs->drr_flags |= DRR_SPILL_UNMODIFIED; @@ -793,35 +797,6 @@ dump_dnode(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object, (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0) return (SET_ERROR(EINTR)); - /* - * Send DRR_SPILL records for unmodified spill blocks. This is useful - * because changing certain attributes of the object (e.g. blocksize) - * can cause old versions of ZFS to incorrectly remove a spill block. - * Including these records in the stream forces an up to date version - * to always be written ensuring they're never lost. Current versions - * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can - * ignore these unmodified spill blocks. - */ - if (zfs_send_unmodified_spill_blocks && - (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && - (BP_GET_LOGICAL_BIRTH(DN_SPILL_BLKPTR(dnp)) <= dscp->dsc_fromtxg)) { - struct send_range record; - blkptr_t *bp = DN_SPILL_BLKPTR(dnp); - - memset(&record, 0, sizeof (struct send_range)); - record.type = DATA; - record.object = object; - record.eos_marker = B_FALSE; - record.start_blkid = DMU_SPILL_BLKID; - record.end_blkid = record.start_blkid + 1; - record.sru.data.bp = *bp; - record.sru.data.obj_type = dnp->dn_type; - record.sru.data.datablksz = BP_GET_LSIZE(bp); - - if (do_dump(dscp, &record) != 0) - return (SET_ERROR(EINTR)); - } - if (dscp->dsc_err != 0) return (SET_ERROR(EINTR)); @@ -911,6 +886,9 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) case OBJECT: err = dump_dnode(dscp, &range->sru.object.bp, range->object, range->sru.object.dnp); + /* Dump piggybacked unmodified spill block */ + if (!err && range->sru.object.spill_range) + err = do_dump(dscp, range->sru.object.spill_range); return (err); case OBJECT_RANGE: { ASSERT3U(range->start_blkid + 1, ==, range->end_blkid); @@ -939,34 +917,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) ASSERT3U(srdp->datablksz, ==, BP_GET_LSIZE(bp)); ASSERT3U(range->start_blkid + 1, ==, range->end_blkid); - if (BP_GET_TYPE(bp) == DMU_OT_SA) { - arc_flags_t aflags = ARC_FLAG_WAIT; - zio_flag_t zioflags = ZIO_FLAG_CANFAIL; - if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) { - ASSERT(BP_IS_PROTECTED(bp)); - zioflags |= ZIO_FLAG_RAW; - } - - zbookmark_phys_t zb; - ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID); - zb.zb_objset = dmu_objset_id(dscp->dsc_os); - zb.zb_object = range->object; - zb.zb_level = 0; - zb.zb_blkid = range->start_blkid; - - arc_buf_t *abuf = NULL; - if (!dscp->dsc_dso->dso_dryrun && arc_read(NULL, spa, - bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, - zioflags, &aflags, &zb) != 0) - return (SET_ERROR(EIO)); - - err = dump_spill(dscp, bp, zb.zb_object, - (abuf == NULL ? NULL : abuf->b_data)); - if (abuf != NULL) - arc_buf_destroy(abuf, &abuf); - return (err); - } if (send_do_embed(bp, dscp->dsc_featureflags)) { err = dump_write_embedded(dscp, range->object, range->start_blkid * srdp->datablksz, @@ -975,8 +926,9 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) } ASSERT(range->object > dscp->dsc_resume_object || (range->object == dscp->dsc_resume_object && + (range->start_blkid == DMU_SPILL_BLKID || range->start_blkid * srdp->datablksz >= - dscp->dsc_resume_offset)); + dscp->dsc_resume_offset))); /* it's a level-0 block of a regular object */ mutex_enter(&srdp->lock); @@ -1006,8 +958,6 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) ASSERT(dscp->dsc_dso->dso_dryrun || srdp->abuf != NULL || srdp->abd != NULL); - uint64_t offset = range->start_blkid * srdp->datablksz; - char *data = NULL; if (srdp->abd != NULL) { data = abd_to_buf(srdp->abd); @@ -1016,6 +966,14 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) data = srdp->abuf->b_data; } + if (BP_GET_TYPE(bp) == DMU_OT_SA) { + ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID); + err = dump_spill(dscp, bp, range->object, data); + return (err); + } + + uint64_t offset = range->start_blkid * srdp->datablksz; + /* * If we have large blocks stored on disk but the send flags * don't allow us to send large blocks, we split the data from @@ -1098,6 +1056,8 @@ range_alloc(enum type type, uint64_t object, uint64_t start_blkid, range->sru.data.io_outstanding = 0; range->sru.data.io_err = 0; range->sru.data.io_compressed = B_FALSE; + } else if (type == OBJECT) { + range->sru.object.spill_range = NULL; } return (range); } @@ -1742,6 +1702,45 @@ enqueue_range(struct send_reader_thread_arg *srta, bqueue_t *q, dnode_t *dn, bqueue_enqueue(q, range, datablksz); } +/* + * Send DRR_SPILL records for unmodified spill blocks. This is useful + * because changing certain attributes of the object (e.g. blocksize) + * can cause old versions of ZFS to incorrectly remove a spill block. + * Including these records in the stream forces an up to date version + * to always be written ensuring they're never lost. Current versions + * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can + * ignore these unmodified spill blocks. + * + * We piggyback the spill_range to dnode range instead of enqueueing it + * so send_range_after won't complain. + */ +static uint64_t +piggyback_unmodified_spill(struct send_reader_thread_arg *srta, + struct send_range *range) +{ + ASSERT3U(range->type, ==, OBJECT); + + dnode_phys_t *dnp = range->sru.object.dnp; + uint64_t fromtxg = srta->smta->to_arg->fromtxg; + + if (!zfs_send_unmodified_spill_blocks || + !(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) || + !(BP_GET_LOGICAL_BIRTH(DN_SPILL_BLKPTR(dnp)) <= fromtxg)) + return (0); + + blkptr_t *bp = DN_SPILL_BLKPTR(dnp); + struct send_range *spill_range = range_alloc(DATA, range->object, + DMU_SPILL_BLKID, DMU_SPILL_BLKID+1, B_FALSE); + spill_range->sru.data.bp = *bp; + spill_range->sru.data.obj_type = dnp->dn_type; + spill_range->sru.data.datablksz = BP_GET_LSIZE(bp); + + issue_data_read(srta, spill_range); + range->sru.object.spill_range = spill_range; + + return (BP_GET_LSIZE(bp)); +} + /* * This thread is responsible for two things: First, it retrieves the correct * blkptr in the to ds if we need to send the data because of something from @@ -1773,17 +1772,20 @@ send_reader_thread(void *arg) uint64_t last_obj_exists = B_TRUE; while (!range->eos_marker && !srta->cancel && smta->error == 0 && err == 0) { + uint64_t spill = 0; switch (range->type) { case DATA: issue_data_read(srta, range); bqueue_enqueue(outq, range, range->sru.data.datablksz); range = get_next_range_nofree(inq, range); break; - case HOLE: case OBJECT: + spill = piggyback_unmodified_spill(srta, range); + zfs_fallthrough; + case HOLE: case OBJECT_RANGE: case REDACT: // Redacted blocks must exist - bqueue_enqueue(outq, range, sizeof (*range)); + bqueue_enqueue(outq, range, sizeof (*range) + spill); range = get_next_range_nofree(inq, range); break; case PREVIOUSLY_REDACTED: { From 187f93137265feb2ea84f109168dffce2d91e7ef Mon Sep 17 00:00:00 2001 From: Brian Atkinson Date: Thu, 7 Nov 2024 14:11:05 -0500 Subject: [PATCH 29/33] Update ABD stats for linear page Linux a10e552 updated abd_free_linear_page() to no longer call abd_update_scatter_stat(). This meant that linear pages that were not attached to Direct I/O requests were not doing waste accounting for the ARC. This led to performance issues due to incorrect ARC accounting that resulted in 100% of CPU time being spent in arc_evict() during prolonged I/O workloads with the ARC. The call to abd_update_scatter_stats() is now conditionally called in abd_free_linear_page() when the ABD is not from a Direct I/O request. Reviewed-by: Mark Maybee Reviewed-by: Tony Nguyen Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Brian Atkinson Closes #16729 --- module/os/linux/zfs/abd_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 303af48cf3af..04ab8bbca352 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -701,6 +701,8 @@ abd_free_linear_page(abd_t *abd) /* When backed by user page unmap it */ if (abd_is_from_pages(abd)) zfs_kunmap(sg_page(sg)); + else + abd_update_scatter_stats(abd, ABDSTAT_DECR); abd->abd_flags &= ~ABD_FLAG_LINEAR; abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE; From 4a7a0a0290118382a2c7be320c6e14d72b0637a1 Mon Sep 17 00:00:00 2001 From: Sam James Date: Thu, 7 Nov 2024 19:20:37 +0000 Subject: [PATCH 30/33] Use instead of When building on musl, we get: ``` In file included from tests/zfs-tests/cmd/getversion.c:22: /usr/include/sys/fcntl.h:1:2: error: #warning redirecting incorrect #include to [-Werror=cpp] 1 | #warning redirecting incorrect #include to In file included from module/os/linux/zfs/vdev_file.c:36: /usr/include/sys/fcntl.h:1:2: error: #warning redirecting incorrect #include to [-Werror=cpp] 1 | #warning redirecting incorrect #include to ``` Bug: https://bugs.gentoo.org/925235 Reviewed-by: Brian Behlendorf Signed-off-by: Sam James Closes #15925 --- module/os/linux/zfs/vdev_file.c | 4 +++- tests/zfs-tests/cmd/getversion.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/module/os/linux/zfs/vdev_file.c b/module/os/linux/zfs/vdev_file.c index 4bffb6412ffd..2cab6532487a 100644 --- a/module/os/linux/zfs/vdev_file.c +++ b/module/os/linux/zfs/vdev_file.c @@ -33,11 +33,13 @@ #include #include #include -#include #include #include #ifdef _KERNEL #include +#include +#else +#include #endif /* * Virtual device vector for files. diff --git a/tests/zfs-tests/cmd/getversion.c b/tests/zfs-tests/cmd/getversion.c index 62c1c5b6abc0..1e026b92d17d 100644 --- a/tests/zfs-tests/cmd/getversion.c +++ b/tests/zfs-tests/cmd/getversion.c @@ -19,9 +19,9 @@ */ #include -#include #include #include +#include #include #include #include From 57fc5971f6be634dc63642f15b37cc2349ed4bab Mon Sep 17 00:00:00 2001 From: Umer Saleem Date: Fri, 8 Nov 2024 00:31:14 +0500 Subject: [PATCH 31/33] JSON: fix user properties output for zfs list This commit fixes JSON output for zfs list when user properties are requested with -o flag. This case needed to be handled specifically since zfs_prop_to_name does not return property name for user properties, instead it is stored in pl->pl_user_prop. Reviewed-by: Ameer Hamza Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Umer Saleem Closes #16732 --- cmd/zfs/zfs_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 97397726c709..7836f5909f4a 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -3761,8 +3761,13 @@ collect_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) if (cb->cb_json) { if (pl->pl_prop == ZFS_PROP_NAME) continue; + const char *prop_name; + if (pl->pl_prop != ZPROP_USERPROP) + prop_name = zfs_prop_to_name(pl->pl_prop); + else + prop_name = pl->pl_user_prop; if (zprop_nvlist_one_property( - zfs_prop_to_name(pl->pl_prop), propstr, + prop_name, propstr, sourcetype, source, NULL, props, cb->cb_json_as_int) != 0) nomem(); From 3a0a142f1c41f15be1c50e1ddff8416584b9fc49 Mon Sep 17 00:00:00 2001 From: Umer Saleem Date: Fri, 8 Nov 2024 15:09:47 +0500 Subject: [PATCH 32/33] JSON: fix user properties output for zpool list This commit fixes JSON output for zpool list when user properties are requested with -o flag. This case needed to be handled specifically since zpool_prop_to_name does not return property name for user properties, instead it is stored in pl->pl_user_prop. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Umer Saleem Closes #16734 --- cmd/zpool/zpool_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 6a45a063d91a..4458b902de31 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -6882,8 +6882,13 @@ collect_pool(zpool_handle_t *zhp, list_cbdata_t *cb) if (cb->cb_json) { if (pl->pl_prop == ZPOOL_PROP_NAME) continue; + const char *prop_name; + if (pl->pl_prop != ZPROP_USERPROP) + prop_name = zpool_prop_to_name(pl->pl_prop); + else + prop_name = pl->pl_user_prop; (void) zprop_nvlist_one_property( - zpool_prop_to_name(pl->pl_prop), propstr, + prop_name, propstr, sourcetype, NULL, NULL, props, cb->cb_json_as_int); } else { /* From 1c9a4c8cb44d5f865c29e3df3f019872329554b3 Mon Sep 17 00:00:00 2001 From: Umer Saleem Date: Fri, 8 Nov 2024 15:13:05 +0500 Subject: [PATCH 33/33] Fix user properties output for zpool list In zpool_get_user_prop, when called from zpool_expand_proplist and collect_pool, we often have zpool_props present in zpool_handle_t equal to NULL. This mostly happens when only one user property is requested using zpool list -o . Checking for this case and correctly initializing the zpool_props field in zpool_handle_t fixes this issue. Interestingly, this issue does not occur if we query any other property like name or guid along with a user property with -o flag because while accessing properties like guid, zpool_prop_get_int is called which checks for this case specifically and calls zpool_get_all_props. Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Umer Saleem Closes #16734 --- lib/libzfs/libzfs_pool.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 44f2c6f19dff..f256535e8ea0 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -471,13 +471,15 @@ int zpool_get_userprop(zpool_handle_t *zhp, const char *propname, char *buf, size_t len, zprop_source_t *srctype) { - nvlist_t *nv, *nvl; + nvlist_t *nv; uint64_t ival; const char *value; zprop_source_t source = ZPROP_SRC_LOCAL; - nvl = zhp->zpool_props; - if (nvlist_lookup_nvlist(nvl, propname, &nv) == 0) { + if (zhp->zpool_props == NULL) + zpool_get_all_props(zhp); + + if (nvlist_lookup_nvlist(zhp->zpool_props, propname, &nv) == 0) { if (nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0) source = ival; verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);