mirror of
https://github.com/freebsd/freebsd-src.git
synced 2024-11-26 20:12:44 +00:00
zfs: merge openzfs/zfs@b10992582
Notable upstream pull request merges: #15892 -multiple Fast Dedup: Introduce the FDT on-disk format and feature flag #15893 -multiple Fast Dedup: “flat” DDT entry format #15895 -multiple Fast Dedup: FDT-log feature #162396be8bf555
zpool: Provide GUID to zpool-reguid(8) with -g #16277 -multiple Fast Dedup: prune unique entries #163165807de90a
Fix null ptr deref when renaming a zvol with snaps and snapdev=visible #1634377a797a38
Enable L2 cache of all (MRU+MFU) metadata but MFU data only #1644683f359245
FreeBSD: fix build without kernel option MAC #16449963e6c9f3
Fix incorrect error report on vdev attach/replace #16505b10992582
spa_prop_get: require caller to supply output nvlist Obtained from: OpenZFS OpenZFS commit:b109925820
This commit is contained in:
commit
e2df9bb441
@ -103,6 +103,7 @@ CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd
|
||||
CFLAGS+= -I${SRCTOP}/sys
|
||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/module/icp/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include
|
||||
CFLAGS+= -include ${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
|
||||
CFLAGS+= -DHAVE_ISSETUGID
|
||||
CFLAGS+= -include ${SRCTOP}/sys/modules/zfs/zfs_config.h
|
||||
|
@ -100,6 +100,7 @@ CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd
|
||||
CFLAGS+= -I${SRCTOP}/sys
|
||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/module/icp/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include
|
||||
CFLAGS+= -include ${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
|
||||
CFLAGS+= -DHAVE_ISSETUGID -UHAVE_AVX -DRESCUE
|
||||
CFLAGS+= -include ${SRCTOP}/sys/modules/zfs/zfs_config.h
|
||||
|
@ -63,10 +63,10 @@ KERNEL_C = \
|
||||
zfs_fletcher_superscalar4.c \
|
||||
zfs_namecheck.c \
|
||||
zfs_prop.c \
|
||||
zfs_valstr.c \
|
||||
zpool_prop.c \
|
||||
zprop_common.c
|
||||
|
||||
|
||||
ARCH_C =
|
||||
.if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386"
|
||||
ARCH_C += zfs_fletcher_intel.c \
|
||||
@ -92,6 +92,7 @@ CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include/os/freebsd
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libshare
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/ck/include
|
||||
CFLAGS+= -I${SRCTOP}/sys
|
||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
||||
|
@ -1,5 +1,7 @@
|
||||
ZFSTOP= ${SRCTOP}/sys/contrib/openzfs
|
||||
|
||||
.PATH: ${ZFSTOP}/lib/libzpool
|
||||
|
||||
# ZFS_COMMON_SRCS
|
||||
.PATH: ${ZFSTOP}/module/zfs
|
||||
.PATH: ${ZFSTOP}/module/zcommon
|
||||
@ -14,8 +16,6 @@ ZFSTOP= ${SRCTOP}/sys/contrib/openzfs
|
||||
|
||||
.PATH: ${ZFSTOP}/module/os/linux/zfs
|
||||
|
||||
.PATH: ${ZFSTOP}/lib/libzpool
|
||||
|
||||
.if exists(${SRCTOP}/sys/cddl/contrib/opensolaris/common/atomic/${MACHINE_ARCH}/opensolaris_atomic.S)
|
||||
.PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/atomic/${MACHINE_ARCH}
|
||||
ATOMIC_SRCS= opensolaris_atomic.S
|
||||
@ -34,6 +34,7 @@ PACKAGE= zfs
|
||||
LIB= zpool
|
||||
|
||||
USER_C = \
|
||||
abd_os.c \
|
||||
kernel.c \
|
||||
taskq.c \
|
||||
util.c
|
||||
@ -51,7 +52,6 @@ KERNEL_C = \
|
||||
zpool_prop.c \
|
||||
zprop_common.c \
|
||||
abd.c \
|
||||
abd_os.c \
|
||||
aggsum.c \
|
||||
arc.c \
|
||||
arc_os.c \
|
||||
@ -67,6 +67,7 @@ KERNEL_C = \
|
||||
dbuf.c \
|
||||
dbuf_stats.c \
|
||||
ddt.c \
|
||||
ddt_log.c \
|
||||
ddt_stats.c \
|
||||
ddt_zap.c \
|
||||
dmu.c \
|
||||
@ -255,6 +256,7 @@ CFLAGS+= \
|
||||
-I${ZFSTOP}/include \
|
||||
-I${ZFSTOP}/lib/libspl/include \
|
||||
-I${ZFSTOP}/lib/libspl/include/os/freebsd \
|
||||
-I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include \
|
||||
-I${SRCTOP}/sys \
|
||||
-I${ZFSTOP}/include/os/freebsd/zfs \
|
||||
-I${SRCTOP}/cddl/compat/opensolaris/include \
|
||||
|
@ -22,6 +22,7 @@ MAN= \
|
||||
zpool-create.8 \
|
||||
zpool-destroy.8 \
|
||||
zpool-detach.8 \
|
||||
zpool-ddtprune.8 \
|
||||
zpool-events.8 \
|
||||
zpool-export.8 \
|
||||
zpool-features.7 \
|
||||
@ -66,6 +67,7 @@ CFLAGS+= \
|
||||
-I${ZFSTOP}/include \
|
||||
-I${ZFSTOP}/lib/libspl/include \
|
||||
-I${ZFSTOP}/lib/libspl/include/os/freebsd \
|
||||
-I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include \
|
||||
-I${SRCTOP}/sys \
|
||||
-I${SRCTOP}/cddl/compat/opensolaris/include \
|
||||
-I${ZFSTOP}/cmd/zpool \
|
||||
|
@ -15,6 +15,7 @@ CFLAGS+= \
|
||||
-I${ZFSTOP}/include \
|
||||
-I${ZFSTOP}/lib/libspl/include \
|
||||
-I${ZFSTOP}/lib/libspl/include/os/freebsd \
|
||||
-I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include \
|
||||
-I${SRCTOP}/sys \
|
||||
-I${SRCTOP}/cddl/compat/opensolaris/include \
|
||||
-I${ZFSTOP}/module/icp/include \
|
||||
|
@ -21,9 +21,11 @@ SYMLINKS= ${BINDIR}/zstream ${BINDIR}/zstreamdump
|
||||
WARNS?= 2
|
||||
CFLAGS+= \
|
||||
-DIN_BASE \
|
||||
-DZFS_DEBUG \
|
||||
-I${ZFSTOP}/include \
|
||||
-I${ZFSTOP}/lib/libspl/include \
|
||||
-I${ZFSTOP}/lib/libspl/include/os/freebsd \
|
||||
-I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include \
|
||||
-I${SRCTOP}/sys \
|
||||
-I${SRCTOP}/cddl/compat/opensolaris/include \
|
||||
-I${ZFSTOP}/module/icp/include \
|
||||
|
@ -15,6 +15,7 @@ CFLAGS+= \
|
||||
-I${ZFSTOP}/include \
|
||||
-I${ZFSTOP}/lib/libspl/include \
|
||||
-I${ZFSTOP}/lib/libspl/include/os/freebsd \
|
||||
-I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include \
|
||||
-I${SRCTOP}/cddl/compat/opensolaris/include \
|
||||
-I${ZFSTOP}/module/icp/include \
|
||||
-include ${ZFSTOP}/include/os/freebsd/spl/sys/ccompile.h \
|
||||
|
@ -18,6 +18,7 @@ CFLAGS+= \
|
||||
-I${ZFSTOP}/lib/libspl/include \
|
||||
-I${ZFSTOP}/lib/libspl/include/os/freebsd \
|
||||
-I${ZFSTOP}/lib/libspl/include/os/freebsd/spl \
|
||||
-I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include \
|
||||
-I${SRCTOP}/sys \
|
||||
-include ${ZFSTOP}/include/os/freebsd/spl/sys/ccompile.h \
|
||||
-DHAVE_ISSETUGID
|
||||
|
@ -17,6 +17,7 @@ CFLAGS+= -DIN_BASE
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include
|
||||
CFLAGS+= -I${SRCTOP}/sys
|
||||
CFLAGS+= -include ${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
|
||||
CFLAGS+= -I${SRCTOP}/cddl/usr.sbin
|
||||
|
@ -12,6 +12,7 @@ CFLAGS+= -DIN_BASE
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include
|
||||
CFLAGS+= -I${SRCTOP}/sys
|
||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/module/icp/include
|
||||
|
@ -57,6 +57,7 @@ CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include/os/freebsd
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzfs
|
||||
CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libzpool/include
|
||||
CFLAGS+= -I${SRCTOP}/sys
|
||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
||||
CFLAGS+= -include ${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
|
||||
|
@ -107,7 +107,7 @@ typedef struct zio_checksum_info {
|
||||
#include "skein_zfs.c"
|
||||
|
||||
#ifdef HAS_ZSTD_ZFS
|
||||
extern int zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len,
|
||||
extern int zfs_zstd_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n);
|
||||
#endif
|
||||
|
||||
@ -191,7 +191,7 @@ static zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
|
||||
{NULL, zle_decompress, 64, "zle"},
|
||||
{NULL, lz4_decompress, 0, "lz4"},
|
||||
#ifdef HAS_ZSTD_ZFS
|
||||
{NULL, zfs_zstd_decompress, ZIO_ZSTD_LEVEL_DEFAULT, "zstd"}
|
||||
{NULL, zfs_zstd_decompress_buf, ZIO_ZSTD_LEVEL_DEFAULT, "zstd"}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -238,6 +238,7 @@ contrib/openzfs/module/zcommon/zfs_fletcher_superscalar.c optional zfs compile-
|
||||
contrib/openzfs/module/zcommon/zfs_fletcher_superscalar4.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zcommon/zfs_namecheck.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zcommon/zfs_prop.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zcommon/zfs_valstr.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zcommon/zpool_prop.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zcommon/zprop_common.c optional zfs compile-with "${ZFS_C}"
|
||||
|
||||
@ -270,6 +271,7 @@ contrib/openzfs/module/zfs/dbuf.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/dbuf_stats.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/dataset_kstats.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/ddt.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/ddt_log.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/ddt_stats.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/ddt_zap.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/dmu.c optional zfs compile-with "${ZFS_C}"
|
||||
|
@ -6,5 +6,5 @@ Release: 1
|
||||
Release-Tags: relext
|
||||
License: CDDL
|
||||
Author: OpenZFS
|
||||
Linux-Maximum: 6.9
|
||||
Linux-Maximum: 6.10
|
||||
Linux-Minimum: 3.10
|
||||
|
@ -24,7 +24,7 @@ zfs_ids_to_path_LDADD = \
|
||||
libzfs.la
|
||||
|
||||
|
||||
zhack_CPPFLAGS = $(AM_CPPFLAGS) $(FORCEDEBUG_CPPFLAGS)
|
||||
zhack_CPPFLAGS = $(AM_CPPFLAGS) $(LIBZPOOL_CPPFLAGS)
|
||||
|
||||
sbin_PROGRAMS += zhack
|
||||
CPPCHECKTARGETS += zhack
|
||||
@ -39,7 +39,7 @@ zhack_LDADD = \
|
||||
|
||||
|
||||
ztest_CFLAGS = $(AM_CFLAGS) $(KERNEL_CFLAGS)
|
||||
ztest_CPPFLAGS = $(AM_CPPFLAGS) $(FORCEDEBUG_CPPFLAGS)
|
||||
ztest_CPPFLAGS = $(AM_CPPFLAGS) $(LIBZPOOL_CPPFLAGS)
|
||||
|
||||
sbin_PROGRAMS += ztest
|
||||
CPPCHECKTARGETS += ztest
|
||||
|
@ -269,8 +269,7 @@ main(int argc, char **argv)
|
||||
return (MOUNT_USAGE);
|
||||
}
|
||||
|
||||
if (!zfsutil || sloppy ||
|
||||
libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
|
||||
if (sloppy || libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
|
||||
zfs_adjust_mount_options(zhp, mntpoint, mntopts, mtabopt);
|
||||
}
|
||||
|
||||
@ -337,7 +336,7 @@ main(int argc, char **argv)
|
||||
dataset, mntpoint, mntflags, zfsflags, mntopts, mtabopt);
|
||||
|
||||
if (!fake) {
|
||||
if (zfsutil && !sloppy &&
|
||||
if (!remount && !sloppy &&
|
||||
!libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
|
||||
error = zfs_mount_at(zhp, mntopts, mntflags, mntpoint);
|
||||
if (error) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
raidz_test_CFLAGS = $(AM_CFLAGS) $(KERNEL_CFLAGS)
|
||||
raidz_test_CPPFLAGS = $(AM_CPPFLAGS) $(FORCEDEBUG_CPPFLAGS)
|
||||
raidz_test_CPPFLAGS = $(AM_CPPFLAGS) $(LIBZPOOL_CPPFLAGS)
|
||||
|
||||
bin_PROGRAMS += raidz_test
|
||||
CPPCHECKTARGETS += raidz_test
|
||||
|
@ -1,4 +1,4 @@
|
||||
zdb_CPPFLAGS = $(AM_CPPFLAGS) $(FORCEDEBUG_CPPFLAGS)
|
||||
zdb_CPPFLAGS = $(AM_CPPFLAGS) $(LIBZPOOL_CPPFLAGS)
|
||||
zdb_CFLAGS = $(AM_CFLAGS) $(LIBCRYPTO_CFLAGS)
|
||||
|
||||
sbin_PROGRAMS += zdb
|
||||
|
@ -33,7 +33,7 @@
|
||||
* under sponsorship from the FreeBSD Foundation.
|
||||
* Copyright (c) 2021 Allan Jude
|
||||
* Copyright (c) 2021 Toomas Soome <tsoome@me.com>
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
* Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
|
||||
*/
|
||||
|
||||
@ -1914,23 +1914,25 @@ dump_log_spacemaps(spa_t *spa)
|
||||
}
|
||||
|
||||
static void
|
||||
dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
|
||||
dump_ddt_entry(const ddt_t *ddt, const ddt_lightweight_entry_t *ddlwe,
|
||||
uint64_t index)
|
||||
{
|
||||
const ddt_phys_t *ddp = dde->dde_phys;
|
||||
const ddt_key_t *ddk = &dde->dde_key;
|
||||
const char *types[4] = { "ditto", "single", "double", "triple" };
|
||||
const ddt_key_t *ddk = &ddlwe->ddlwe_key;
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
blkptr_t blk;
|
||||
int p;
|
||||
|
||||
for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
|
||||
if (ddp->ddp_phys_birth == 0)
|
||||
for (p = 0; p < DDT_NPHYS(ddt); p++) {
|
||||
const ddt_univ_phys_t *ddp = &ddlwe->ddlwe_phys;
|
||||
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
|
||||
|
||||
if (ddt_phys_birth(ddp, v) == 0)
|
||||
continue;
|
||||
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
|
||||
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, v, &blk);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
|
||||
(void) printf("index %llx refcnt %llu %s %s\n",
|
||||
(u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
|
||||
types[p], blkbuf);
|
||||
(void) printf("index %llx refcnt %llu phys %d %s\n",
|
||||
(u_longlong_t)index, (u_longlong_t)ddt_phys_refcnt(ddp, v),
|
||||
p, blkbuf);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1956,11 +1958,37 @@ dump_dedup_ratio(const ddt_stat_t *dds)
|
||||
dedup, compress, copies, dedup * compress / copies);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_ddt_log(ddt_t *ddt)
|
||||
{
|
||||
for (int n = 0; n < 2; n++) {
|
||||
ddt_log_t *ddl = &ddt->ddt_log[n];
|
||||
|
||||
uint64_t count = avl_numnodes(&ddl->ddl_tree);
|
||||
if (count == 0)
|
||||
continue;
|
||||
|
||||
printf(DMU_POOL_DDT_LOG ": %lu log entries\n",
|
||||
zio_checksum_table[ddt->ddt_checksum].ci_name, n, count);
|
||||
|
||||
if (dump_opt['D'] < 4)
|
||||
continue;
|
||||
|
||||
ddt_lightweight_entry_t ddlwe;
|
||||
uint64_t index = 0;
|
||||
for (ddt_log_entry_t *ddle = avl_first(&ddl->ddl_tree);
|
||||
ddle; ddle = AVL_NEXT(&ddl->ddl_tree, ddle)) {
|
||||
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, &ddlwe);
|
||||
dump_ddt_entry(ddt, &ddlwe, index++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
|
||||
{
|
||||
char name[DDT_NAMELEN];
|
||||
ddt_entry_t dde;
|
||||
ddt_lightweight_entry_t ddlwe;
|
||||
uint64_t walk = 0;
|
||||
dmu_object_info_t doi;
|
||||
uint64_t count, dspace, mspace;
|
||||
@ -2001,8 +2029,8 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
|
||||
|
||||
(void) printf("%s contents:\n\n", name);
|
||||
|
||||
while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
|
||||
dump_dde(ddt, &dde, walk);
|
||||
while ((error = ddt_object_walk(ddt, type, class, &walk, &ddlwe)) == 0)
|
||||
dump_ddt_entry(ddt, &ddlwe, walk);
|
||||
|
||||
ASSERT3U(error, ==, ENOENT);
|
||||
|
||||
@ -2017,7 +2045,7 @@ dump_all_ddts(spa_t *spa)
|
||||
|
||||
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
|
||||
ddt_t *ddt = spa->spa_ddt[c];
|
||||
if (!ddt)
|
||||
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
|
||||
continue;
|
||||
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
|
||||
for (ddt_class_t class = 0; class < DDT_CLASSES;
|
||||
@ -2025,6 +2053,7 @@ dump_all_ddts(spa_t *spa)
|
||||
dump_ddt(ddt, type, class);
|
||||
}
|
||||
}
|
||||
dump_ddt_log(ddt);
|
||||
}
|
||||
|
||||
ddt_get_dedup_stats(spa, &dds_total);
|
||||
@ -2043,6 +2072,32 @@ dump_all_ddts(spa_t *spa)
|
||||
}
|
||||
|
||||
dump_dedup_ratio(&dds_total);
|
||||
|
||||
/*
|
||||
* Dump a histogram of unique class entry age
|
||||
*/
|
||||
if (dump_opt['D'] == 3 && getenv("ZDB_DDT_UNIQUE_AGE_HIST") != NULL) {
|
||||
ddt_age_histo_t histogram;
|
||||
|
||||
(void) printf("DDT walk unique, building age histogram...\n");
|
||||
ddt_prune_walk(spa, 0, &histogram);
|
||||
|
||||
/*
|
||||
* print out histogram for unique entry class birth
|
||||
*/
|
||||
if (histogram.dah_entries > 0) {
|
||||
(void) printf("%5s %9s %4s\n",
|
||||
"age", "blocks", "amnt");
|
||||
(void) printf("%5s %9s %4s\n",
|
||||
"-----", "---------", "----");
|
||||
for (int i = 0; i < HIST_BINS; i++) {
|
||||
(void) printf("%5d %9d %4d%%\n", 1 << i,
|
||||
(int)histogram.dah_age_histo[i],
|
||||
(int)((histogram.dah_age_histo[i] * 100) /
|
||||
histogram.dah_entries));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -3287,9 +3342,45 @@ fuid_table_destroy(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up DDT internal state. ddt_lookup() adds entries to ddt_tree, which on
|
||||
* a live pool are normally cleaned up during ddt_sync(). We can't do that (and
|
||||
* wouldn't want to anyway), but if we don't clean up the presence of stuff on
|
||||
* ddt_tree will trip asserts in ddt_table_free(). So, we clean up ourselves.
|
||||
*
|
||||
* Note that this is not a particularly efficient way to do this, but
|
||||
* ddt_remove() is the only public method that can do the work we need, and it
|
||||
* requires the right locks and etc to do the job. This is only ever called
|
||||
* during zdb shutdown so efficiency is not especially important.
|
||||
*/
|
||||
static void
|
||||
zdb_ddt_cleanup(spa_t *spa)
|
||||
{
|
||||
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
|
||||
ddt_t *ddt = spa->spa_ddt[c];
|
||||
if (!ddt)
|
||||
continue;
|
||||
|
||||
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
ddt_enter(ddt);
|
||||
ddt_entry_t *dde = avl_first(&ddt->ddt_tree), *next;
|
||||
while (dde) {
|
||||
next = AVL_NEXT(&ddt->ddt_tree, dde);
|
||||
dde->dde_io = NULL;
|
||||
ddt_remove(ddt, dde);
|
||||
dde = next;
|
||||
}
|
||||
ddt_exit(ddt);
|
||||
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
zdb_exit(int reason)
|
||||
{
|
||||
if (spa != NULL)
|
||||
zdb_ddt_cleanup(spa);
|
||||
|
||||
if (os != NULL) {
|
||||
close_objset(os, FTAG);
|
||||
} else if (spa != NULL) {
|
||||
@ -4592,7 +4683,6 @@ dump_l2arc_log_blocks(int fd, const l2arc_dev_hdr_phys_t *l2dhdr,
|
||||
l2arc_log_blk_phys_t this_lb;
|
||||
uint64_t asize;
|
||||
l2arc_log_blkptr_t lbps[2];
|
||||
abd_t *abd;
|
||||
zio_cksum_t cksum;
|
||||
int failed = 0;
|
||||
l2arc_dev_t dev;
|
||||
@ -4646,20 +4736,25 @@ dump_l2arc_log_blocks(int fd, const l2arc_dev_hdr_phys_t *l2dhdr,
|
||||
switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) {
|
||||
case ZIO_COMPRESS_OFF:
|
||||
break;
|
||||
default:
|
||||
abd = abd_alloc_for_io(asize, B_TRUE);
|
||||
default: {
|
||||
abd_t *abd = abd_alloc_linear(asize, B_TRUE);
|
||||
abd_copy_from_buf_off(abd, &this_lb, 0, asize);
|
||||
if (zio_decompress_data(L2BLK_GET_COMPRESS(
|
||||
(&lbps[0])->lbp_prop), abd, &this_lb,
|
||||
asize, sizeof (this_lb), NULL) != 0) {
|
||||
abd_t dabd;
|
||||
abd_get_from_buf_struct(&dabd, &this_lb,
|
||||
sizeof (this_lb));
|
||||
int err = zio_decompress_data(L2BLK_GET_COMPRESS(
|
||||
(&lbps[0])->lbp_prop), abd, &dabd,
|
||||
asize, sizeof (this_lb), NULL);
|
||||
abd_free(&dabd);
|
||||
abd_free(abd);
|
||||
if (err != 0) {
|
||||
(void) printf("L2ARC block decompression "
|
||||
"failed\n");
|
||||
abd_free(abd);
|
||||
goto out;
|
||||
}
|
||||
abd_free(abd);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
|
||||
byteswap_uint64_array(&this_lb, sizeof (this_lb));
|
||||
@ -5633,7 +5728,6 @@ static void
|
||||
zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
dmu_object_type_t type)
|
||||
{
|
||||
uint64_t refcnt = 0;
|
||||
int i;
|
||||
|
||||
ASSERT(type < ZDB_OT_TOTAL);
|
||||
@ -5641,8 +5735,167 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
if (zilog && zil_bp_tree_add(zilog, bp) != 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* This flag controls if we will issue a claim for the block while
|
||||
* counting it, to ensure that all blocks are referenced in space maps.
|
||||
* We don't issue claims if we're not doing leak tracking, because it's
|
||||
* expensive if the user isn't interested. We also don't claim the
|
||||
* second or later occurences of cloned or dedup'd blocks, because we
|
||||
* already claimed them the first time.
|
||||
*/
|
||||
boolean_t do_claim = !dump_opt['L'];
|
||||
|
||||
spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
|
||||
blkptr_t tempbp;
|
||||
if (BP_GET_DEDUP(bp)) {
|
||||
/*
|
||||
* Dedup'd blocks are special. We need to count them, so we can
|
||||
* later uncount them when reporting leaked space, and we must
|
||||
* only claim them once.
|
||||
*
|
||||
* We use the existing dedup system to track what we've seen.
|
||||
* The first time we see a block, we do a ddt_lookup() to see
|
||||
* if it exists in the DDT. If we're doing leak tracking, we
|
||||
* claim the block at this time.
|
||||
*
|
||||
* Each time we see a block, we reduce the refcount in the
|
||||
* entry by one, and add to the size and count of dedup'd
|
||||
* blocks to report at the end.
|
||||
*/
|
||||
|
||||
ddt_t *ddt = ddt_select(zcb->zcb_spa, bp);
|
||||
|
||||
ddt_enter(ddt);
|
||||
|
||||
/*
|
||||
* Find the block. This will create the entry in memory, but
|
||||
* we'll know if that happened by its refcount.
|
||||
*/
|
||||
ddt_entry_t *dde = ddt_lookup(ddt, bp);
|
||||
|
||||
/*
|
||||
* ddt_lookup() can return NULL if this block didn't exist
|
||||
* in the DDT and creating it would take the DDT over its
|
||||
* quota. Since we got the block from disk, it must exist in
|
||||
* the DDT, so this can't happen. However, when unique entries
|
||||
* are pruned, the dedup bit can be set with no corresponding
|
||||
* entry in the DDT.
|
||||
*/
|
||||
if (dde == NULL) {
|
||||
ddt_exit(ddt);
|
||||
goto skipped;
|
||||
}
|
||||
|
||||
/* Get the phys for this variant */
|
||||
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
|
||||
|
||||
/*
|
||||
* This entry may have multiple sets of DVAs. We must claim
|
||||
* each set the first time we see them in a real block on disk,
|
||||
* or count them on subsequent occurences. We don't have a
|
||||
* convenient way to track the first time we see each variant,
|
||||
* so we repurpose dde_io as a set of "seen" flag bits. We can
|
||||
* do this safely in zdb because it never writes, so it will
|
||||
* never have a writing zio for this block in that pointer.
|
||||
*/
|
||||
boolean_t seen = !!(((uintptr_t)dde->dde_io) & (1 << v));
|
||||
if (!seen)
|
||||
dde->dde_io =
|
||||
(void *)(((uintptr_t)dde->dde_io) | (1 << v));
|
||||
|
||||
/* Consume a reference for this block. */
|
||||
if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0)
|
||||
ddt_phys_decref(dde->dde_phys, v);
|
||||
|
||||
/*
|
||||
* If this entry has a single flat phys, it may have been
|
||||
* extended with additional DVAs at some time in its life.
|
||||
* This block might be from before it was fully extended, and
|
||||
* so have fewer DVAs.
|
||||
*
|
||||
* If this is the first time we've seen this block, and we
|
||||
* claimed it as-is, then we would miss the claim on some
|
||||
* number of DVAs, which would then be seen as leaked.
|
||||
*
|
||||
* In all cases, if we've had fewer DVAs, then the asize would
|
||||
* be too small, and would lead to the pool apparently using
|
||||
* more space than allocated.
|
||||
*
|
||||
* To handle this, we copy the canonical set of DVAs from the
|
||||
* entry back to the block pointer before we claim it.
|
||||
*/
|
||||
if (v == DDT_PHYS_FLAT) {
|
||||
ASSERT3U(BP_GET_BIRTH(bp), ==,
|
||||
ddt_phys_birth(dde->dde_phys, v));
|
||||
tempbp = *bp;
|
||||
ddt_bp_fill(dde->dde_phys, v, &tempbp,
|
||||
BP_GET_BIRTH(bp));
|
||||
bp = &tempbp;
|
||||
}
|
||||
|
||||
if (seen) {
|
||||
/*
|
||||
* The second or later time we see this block,
|
||||
* it's a duplicate and we count it.
|
||||
*/
|
||||
zcb->zcb_dedup_asize += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_dedup_blocks++;
|
||||
|
||||
/* Already claimed, don't do it again. */
|
||||
do_claim = B_FALSE;
|
||||
}
|
||||
|
||||
ddt_exit(ddt);
|
||||
} else if (zcb->zcb_brt_is_active &&
|
||||
brt_maybe_exists(zcb->zcb_spa, bp)) {
|
||||
/*
|
||||
* Cloned blocks are special. We need to count them, so we can
|
||||
* later uncount them when reporting leaked space, and we must
|
||||
* only claim them once.
|
||||
*
|
||||
* To do this, we keep our own in-memory BRT. For each block
|
||||
* we haven't seen before, we look it up in the real BRT and
|
||||
* if its there, we note it and its refcount then proceed as
|
||||
* normal. If we see the block again, we count it as a clone
|
||||
* and then give it no further consideration.
|
||||
*/
|
||||
zdb_brt_entry_t zbre_search, *zbre;
|
||||
avl_index_t where;
|
||||
|
||||
zbre_search.zbre_dva = bp->blk_dva[0];
|
||||
zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
|
||||
if (zbre == NULL) {
|
||||
/* Not seen before; track it */
|
||||
uint64_t refcnt =
|
||||
brt_entry_get_refcount(zcb->zcb_spa, bp);
|
||||
if (refcnt > 0) {
|
||||
zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
|
||||
UMEM_NOFAIL);
|
||||
zbre->zbre_dva = bp->blk_dva[0];
|
||||
zbre->zbre_refcount = refcnt;
|
||||
avl_insert(&zcb->zcb_brt, zbre, where);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Second or later occurrence, count it and take a
|
||||
* refcount.
|
||||
*/
|
||||
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_clone_blocks++;
|
||||
|
||||
zbre->zbre_refcount--;
|
||||
if (zbre->zbre_refcount == 0) {
|
||||
avl_remove(&zcb->zcb_brt, zbre);
|
||||
umem_free(zbre, sizeof (zdb_brt_entry_t));
|
||||
}
|
||||
|
||||
/* Already claimed, don't do it again. */
|
||||
do_claim = B_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
skipped:
|
||||
for (i = 0; i < 4; i++) {
|
||||
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
|
||||
int t = (i & 1) ? type : ZDB_OT_TOTAL;
|
||||
@ -5745,71 +5998,12 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_asize_total += BP_GET_ASIZE(bp);
|
||||
|
||||
if (zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) {
|
||||
/*
|
||||
* Cloned blocks are special. We need to count them, so we can
|
||||
* later uncount them when reporting leaked space, and we must
|
||||
* only claim them them once.
|
||||
*
|
||||
* To do this, we keep our own in-memory BRT. For each block
|
||||
* we haven't seen before, we look it up in the real BRT and
|
||||
* if its there, we note it and its refcount then proceed as
|
||||
* normal. If we see the block again, we count it as a clone
|
||||
* and then give it no further consideration.
|
||||
*/
|
||||
zdb_brt_entry_t zbre_search, *zbre;
|
||||
avl_index_t where;
|
||||
|
||||
zbre_search.zbre_dva = bp->blk_dva[0];
|
||||
zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
|
||||
if (zbre != NULL) {
|
||||
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_clone_blocks++;
|
||||
|
||||
zbre->zbre_refcount--;
|
||||
if (zbre->zbre_refcount == 0) {
|
||||
avl_remove(&zcb->zcb_brt, zbre);
|
||||
umem_free(zbre, sizeof (zdb_brt_entry_t));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t crefcnt = brt_entry_get_refcount(zcb->zcb_spa, bp);
|
||||
if (crefcnt > 0) {
|
||||
zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
|
||||
UMEM_NOFAIL);
|
||||
zbre->zbre_dva = bp->blk_dva[0];
|
||||
zbre->zbre_refcount = crefcnt;
|
||||
avl_insert(&zcb->zcb_brt, zbre, where);
|
||||
}
|
||||
}
|
||||
|
||||
if (dump_opt['L'])
|
||||
if (!do_claim)
|
||||
return;
|
||||
|
||||
if (BP_GET_DEDUP(bp)) {
|
||||
ddt_t *ddt;
|
||||
ddt_entry_t *dde;
|
||||
|
||||
ddt = ddt_select(zcb->zcb_spa, bp);
|
||||
ddt_enter(ddt);
|
||||
dde = ddt_lookup(ddt, bp, B_FALSE);
|
||||
|
||||
if (dde == NULL) {
|
||||
refcnt = 0;
|
||||
} else {
|
||||
ddt_phys_t *ddp = ddt_phys_select(dde, bp);
|
||||
ddt_phys_decref(ddp);
|
||||
refcnt = ddp->ddp_refcnt;
|
||||
if (ddt_phys_total_refcnt(dde) == 0)
|
||||
ddt_remove(ddt, dde);
|
||||
}
|
||||
ddt_exit(ddt);
|
||||
}
|
||||
|
||||
VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
|
||||
refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa),
|
||||
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
|
||||
VERIFY0(zio_wait(zio_claim(NULL, zcb->zcb_spa,
|
||||
spa_min_claim_txg(zcb->zcb_spa), bp, NULL, NULL,
|
||||
ZIO_FLAG_CANFAIL)));
|
||||
}
|
||||
|
||||
static void
|
||||
@ -6120,49 +6314,6 @@ zdb_load_obsolete_counts(vdev_t *vd)
|
||||
return (counts);
|
||||
}
|
||||
|
||||
static void
|
||||
zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
||||
{
|
||||
ddt_bookmark_t ddb = {0};
|
||||
ddt_entry_t dde;
|
||||
int error;
|
||||
int p;
|
||||
|
||||
ASSERT(!dump_opt['L']);
|
||||
|
||||
while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
|
||||
blkptr_t blk;
|
||||
ddt_phys_t *ddp = dde.dde_phys;
|
||||
|
||||
if (ddb.ddb_class == DDT_CLASS_UNIQUE)
|
||||
return;
|
||||
|
||||
ASSERT(ddt_phys_total_refcnt(&dde) > 1);
|
||||
ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
|
||||
VERIFY(ddt);
|
||||
|
||||
for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
|
||||
if (ddp->ddp_phys_birth == 0)
|
||||
continue;
|
||||
ddt_bp_create(ddb.ddb_checksum,
|
||||
&dde.dde_key, ddp, &blk);
|
||||
if (p == DDT_PHYS_DITTO) {
|
||||
zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
|
||||
} else {
|
||||
zcb->zcb_dedup_asize +=
|
||||
BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
|
||||
zcb->zcb_dedup_blocks++;
|
||||
}
|
||||
}
|
||||
|
||||
ddt_enter(ddt);
|
||||
VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
|
||||
ddt_exit(ddt);
|
||||
}
|
||||
|
||||
ASSERT(error == ENOENT);
|
||||
}
|
||||
|
||||
typedef struct checkpoint_sm_exclude_entry_arg {
|
||||
vdev_t *cseea_vd;
|
||||
uint64_t cseea_checkpoint_size;
|
||||
@ -6546,10 +6697,6 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
||||
(void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
|
||||
increment_indirect_mapping_cb, zcb, NULL);
|
||||
}
|
||||
|
||||
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
zdb_ddt_leak_init(spa, zcb);
|
||||
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
@ -6814,6 +6961,8 @@ dump_block_stats(spa_t *spa)
|
||||
int e, c, err;
|
||||
bp_embedded_type_t i;
|
||||
|
||||
ddt_prefetch_all(spa);
|
||||
|
||||
zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
|
||||
|
||||
if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
|
||||
@ -6938,7 +7087,6 @@ dump_block_stats(spa_t *spa)
|
||||
(u_longlong_t)total_alloc,
|
||||
(dump_opt['L']) ? "unreachable" : "leaked",
|
||||
(longlong_t)(total_alloc - total_found));
|
||||
leaks = B_TRUE;
|
||||
}
|
||||
|
||||
if (tzb->zb_count == 0) {
|
||||
@ -7272,29 +7420,27 @@ dump_simulated_ddt(spa_t *spa)
|
||||
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||
|
||||
while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
|
||||
ddt_stat_t dds;
|
||||
uint64_t refcnt = zdde->zdde_ref_blocks;
|
||||
ASSERT(refcnt != 0);
|
||||
|
||||
dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
|
||||
dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
|
||||
dds.dds_psize = zdde->zdde_ref_psize / refcnt;
|
||||
dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
|
||||
ddt_stat_t *dds = &ddh_total.ddh_stat[highbit64(refcnt) - 1];
|
||||
|
||||
dds.dds_ref_blocks = zdde->zdde_ref_blocks;
|
||||
dds.dds_ref_lsize = zdde->zdde_ref_lsize;
|
||||
dds.dds_ref_psize = zdde->zdde_ref_psize;
|
||||
dds.dds_ref_dsize = zdde->zdde_ref_dsize;
|
||||
dds->dds_blocks += zdde->zdde_ref_blocks / refcnt;
|
||||
dds->dds_lsize += zdde->zdde_ref_lsize / refcnt;
|
||||
dds->dds_psize += zdde->zdde_ref_psize / refcnt;
|
||||
dds->dds_dsize += zdde->zdde_ref_dsize / refcnt;
|
||||
|
||||
ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
|
||||
&dds, 0);
|
||||
dds->dds_ref_blocks += zdde->zdde_ref_blocks;
|
||||
dds->dds_ref_lsize += zdde->zdde_ref_lsize;
|
||||
dds->dds_ref_psize += zdde->zdde_ref_psize;
|
||||
dds->dds_ref_dsize += zdde->zdde_ref_dsize;
|
||||
|
||||
umem_free(zdde, sizeof (*zdde));
|
||||
}
|
||||
|
||||
avl_destroy(&t);
|
||||
|
||||
ddt_histogram_stat(&dds_total, &ddh_total);
|
||||
ddt_histogram_total(&dds_total, &ddh_total);
|
||||
|
||||
(void) printf("Simulated DDT histogram:\n");
|
||||
|
||||
@ -8022,16 +8168,28 @@ dump_mos_leaks(spa_t *spa)
|
||||
|
||||
mos_leak_vdev(spa->spa_root_vdev);
|
||||
|
||||
for (uint64_t class = 0; class < DDT_CLASSES; class++) {
|
||||
for (uint64_t type = 0; type < DDT_TYPES; type++) {
|
||||
for (uint64_t cksum = 0;
|
||||
cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) {
|
||||
ddt_t *ddt = spa->spa_ddt[cksum];
|
||||
if (!ddt)
|
||||
continue;
|
||||
for (uint64_t c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
|
||||
ddt_t *ddt = spa->spa_ddt[c];
|
||||
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
|
||||
continue;
|
||||
|
||||
/* DDT store objects */
|
||||
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
|
||||
for (ddt_class_t class = 0; class < DDT_CLASSES;
|
||||
class++) {
|
||||
mos_obj_refd(ddt->ddt_object[type][class]);
|
||||
}
|
||||
}
|
||||
|
||||
/* FDT container */
|
||||
if (ddt->ddt_version == DDT_VERSION_FDT)
|
||||
mos_obj_refd(ddt->ddt_dir_object);
|
||||
|
||||
/* FDT log objects */
|
||||
if (ddt->ddt_flags & DDT_FLAG_LOG) {
|
||||
mos_obj_refd(ddt->ddt_log[0].ddl_object);
|
||||
mos_obj_refd(ddt->ddt_log[1].ddl_object);
|
||||
}
|
||||
}
|
||||
|
||||
if (spa->spa_brt != NULL) {
|
||||
@ -8499,13 +8657,22 @@ try_decompress_block(abd_t *pabd, uint64_t lsize, uint64_t psize,
|
||||
memset(lbuf, 0x00, lsize);
|
||||
memset(lbuf2, 0xff, lsize);
|
||||
|
||||
abd_t labd, labd2;
|
||||
abd_get_from_buf_struct(&labd, lbuf, lsize);
|
||||
abd_get_from_buf_struct(&labd2, lbuf2, lsize);
|
||||
|
||||
boolean_t ret = B_FALSE;
|
||||
if (zio_decompress_data(cfunc, pabd,
|
||||
lbuf, psize, lsize, NULL) == 0 &&
|
||||
&labd, psize, lsize, NULL) == 0 &&
|
||||
zio_decompress_data(cfunc, pabd,
|
||||
lbuf2, psize, lsize, NULL) == 0 &&
|
||||
&labd2, psize, lsize, NULL) == 0 &&
|
||||
memcmp(lbuf, lbuf2, lsize) == 0)
|
||||
return (B_TRUE);
|
||||
return (B_FALSE);
|
||||
ret = B_TRUE;
|
||||
|
||||
abd_free(&labd2);
|
||||
abd_free(&labd);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
@ -9624,6 +9791,9 @@ retry_lookup:
|
||||
}
|
||||
|
||||
fini:
|
||||
if (spa != NULL)
|
||||
zdb_ddt_cleanup(spa);
|
||||
|
||||
if (os != NULL) {
|
||||
close_objset(os, FTAG);
|
||||
} else if (spa != NULL) {
|
||||
|
@ -844,7 +844,6 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
|
||||
const char *failmode = NULL;
|
||||
boolean_t checkremove = B_FALSE;
|
||||
uint32_t pri = 0;
|
||||
int32_t flags = 0;
|
||||
|
||||
/*
|
||||
* If this is a checksum or I/O error, then toss it into the
|
||||
@ -922,18 +921,28 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
|
||||
}
|
||||
} else if (fmd_nvl_class_match(hdl, nvl,
|
||||
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) {
|
||||
uint64_t flags = 0;
|
||||
int32_t flags32 = 0;
|
||||
/*
|
||||
* We ignore ereports for checksum errors generated by
|
||||
* scrub/resilver I/O to avoid potentially further
|
||||
* degrading the pool while it's being repaired.
|
||||
*
|
||||
* Note that FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS used to
|
||||
* be int32. To allow newer zed to work on older
|
||||
* kernels, if we don't find the flags, we look for
|
||||
* the older ones too.
|
||||
*/
|
||||
if (((nvlist_lookup_uint32(nvl,
|
||||
FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY, &pri) == 0) &&
|
||||
(pri == ZIO_PRIORITY_SCRUB ||
|
||||
pri == ZIO_PRIORITY_REBUILD)) ||
|
||||
((nvlist_lookup_int32(nvl,
|
||||
((nvlist_lookup_uint64(nvl,
|
||||
FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags) == 0) &&
|
||||
(flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)))) {
|
||||
(flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) ||
|
||||
((nvlist_lookup_int32(nvl,
|
||||
FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags32) == 0) &&
|
||||
(flags32 & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)))) {
|
||||
fmd_hdl_debug(hdl, "ignoring '%s' for "
|
||||
"scrub/resilver I/O", class);
|
||||
return;
|
||||
|
@ -75,6 +75,7 @@
|
||||
#include "zpool_util.h"
|
||||
#include "zfs_comutil.h"
|
||||
#include "zfeature_common.h"
|
||||
#include "zfs_valstr.h"
|
||||
|
||||
#include "statcommon.h"
|
||||
|
||||
@ -130,6 +131,8 @@ static int zpool_do_version(int, char **);
|
||||
|
||||
static int zpool_do_wait(int, char **);
|
||||
|
||||
static int zpool_do_ddt_prune(int, char **);
|
||||
|
||||
static int zpool_do_help(int argc, char **argv);
|
||||
|
||||
static zpool_compat_status_t zpool_do_load_compat(
|
||||
@ -170,6 +173,7 @@ typedef enum {
|
||||
HELP_CLEAR,
|
||||
HELP_CREATE,
|
||||
HELP_CHECKPOINT,
|
||||
HELP_DDT_PRUNE,
|
||||
HELP_DESTROY,
|
||||
HELP_DETACH,
|
||||
HELP_EXPORT,
|
||||
@ -426,6 +430,8 @@ static zpool_command_t command_table[] = {
|
||||
{ "sync", zpool_do_sync, HELP_SYNC },
|
||||
{ NULL },
|
||||
{ "wait", zpool_do_wait, HELP_WAIT },
|
||||
{ NULL },
|
||||
{ "ddtprune", zpool_do_ddt_prune, HELP_DDT_PRUNE },
|
||||
};
|
||||
|
||||
#define NCOMMAND (ARRAY_SIZE(command_table))
|
||||
@ -537,7 +543,7 @@ get_usage(zpool_help_t idx)
|
||||
"\t [-o property=value] <pool> <newpool> "
|
||||
"[<device> ...]\n"));
|
||||
case HELP_REGUID:
|
||||
return (gettext("\treguid <pool>\n"));
|
||||
return (gettext("\treguid [-g guid] <pool>\n"));
|
||||
case HELP_SYNC:
|
||||
return (gettext("\tsync [pool] ...\n"));
|
||||
case HELP_VERSION:
|
||||
@ -545,6 +551,8 @@ get_usage(zpool_help_t idx)
|
||||
case HELP_WAIT:
|
||||
return (gettext("\twait [-Hp] [-T d|u] [-t <activity>[,...]] "
|
||||
"<pool> [interval]\n"));
|
||||
case HELP_DDT_PRUNE:
|
||||
return (gettext("\tddtprune -d|-p <amount> <pool>\n"));
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
}
|
||||
@ -2025,7 +2033,7 @@ zpool_do_create(int argc, char **argv)
|
||||
char *end;
|
||||
u_longlong_t ver;
|
||||
|
||||
ver = strtoull(propval, &end, 10);
|
||||
ver = strtoull(propval, &end, 0);
|
||||
if (*end == '\0' &&
|
||||
ver < SPA_VERSION_FEATURES) {
|
||||
enable_pool_features = B_FALSE;
|
||||
@ -8232,19 +8240,32 @@ zpool_do_clear(int argc, char **argv)
|
||||
}
|
||||
|
||||
/*
|
||||
* zpool reguid <pool>
|
||||
* zpool reguid [-g <guid>] <pool>
|
||||
*/
|
||||
int
|
||||
zpool_do_reguid(int argc, char **argv)
|
||||
{
|
||||
uint64_t guid;
|
||||
uint64_t *guidp = NULL;
|
||||
int c;
|
||||
char *endptr;
|
||||
char *poolname;
|
||||
zpool_handle_t *zhp;
|
||||
int ret = 0;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, "")) != -1) {
|
||||
while ((c = getopt(argc, argv, "g:")) != -1) {
|
||||
switch (c) {
|
||||
case 'g':
|
||||
errno = 0;
|
||||
guid = strtoull(optarg, &endptr, 10);
|
||||
if (errno != 0 || *endptr != '\0') {
|
||||
(void) fprintf(stderr,
|
||||
gettext("invalid GUID: %s\n"), optarg);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
guidp = &guid;
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
optopt);
|
||||
@ -8270,7 +8291,7 @@ zpool_do_reguid(int argc, char **argv)
|
||||
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
|
||||
return (1);
|
||||
|
||||
ret = zpool_reguid(zhp);
|
||||
ret = zpool_set_guid(zhp, guidp);
|
||||
|
||||
zpool_close(zhp);
|
||||
return (ret);
|
||||
@ -11916,6 +11937,7 @@ static void
|
||||
zpool_do_events_nvprint(nvlist_t *nvl, int depth)
|
||||
{
|
||||
nvpair_t *nvp;
|
||||
static char flagstr[256];
|
||||
|
||||
for (nvp = nvlist_next_nvpair(nvl, NULL);
|
||||
nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
|
||||
@ -11975,7 +11997,21 @@ zpool_do_events_nvprint(nvlist_t *nvl, int depth)
|
||||
|
||||
case DATA_TYPE_UINT32:
|
||||
(void) nvpair_value_uint32(nvp, &i32);
|
||||
printf(gettext("0x%x"), i32);
|
||||
if (strcmp(name,
|
||||
FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE) == 0 ||
|
||||
strcmp(name,
|
||||
FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE) == 0) {
|
||||
zfs_valstr_zio_stage(i32, flagstr,
|
||||
sizeof (flagstr));
|
||||
printf(gettext("0x%x [%s]"), i32, flagstr);
|
||||
} else if (strcmp(name,
|
||||
FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY) == 0) {
|
||||
zfs_valstr_zio_priority(i32, flagstr,
|
||||
sizeof (flagstr));
|
||||
printf(gettext("0x%x [%s]"), i32, flagstr);
|
||||
} else {
|
||||
printf(gettext("0x%x"), i32);
|
||||
}
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT64:
|
||||
@ -11996,6 +12032,12 @@ zpool_do_events_nvprint(nvlist_t *nvl, int depth)
|
||||
printf(gettext("\"%s\" (0x%llx)"),
|
||||
zpool_state_to_name(i64, VDEV_AUX_NONE),
|
||||
(u_longlong_t)i64);
|
||||
} else if (strcmp(name,
|
||||
FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS) == 0) {
|
||||
zfs_valstr_zio_flag(i64, flagstr,
|
||||
sizeof (flagstr));
|
||||
printf(gettext("0x%llx [%s]"),
|
||||
(u_longlong_t)i64, flagstr);
|
||||
} else {
|
||||
printf(gettext("0x%llx"), (u_longlong_t)i64);
|
||||
}
|
||||
@ -13329,6 +13371,88 @@ found:;
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* zpool ddtprune -d|-p <amount> <pool>
|
||||
*
|
||||
* -d <days> Prune entries <days> old and older
|
||||
* -p <percent> Prune <percent> amount of entries
|
||||
*
|
||||
* Prune single reference entries from DDT to satisfy the amount specified.
|
||||
*/
|
||||
int
|
||||
zpool_do_ddt_prune(int argc, char **argv)
|
||||
{
|
||||
zpool_ddt_prune_unit_t unit = ZPOOL_DDT_PRUNE_NONE;
|
||||
uint64_t amount = 0;
|
||||
zpool_handle_t *zhp;
|
||||
char *endptr;
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "d:p:")) != -1) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
if (unit == ZPOOL_DDT_PRUNE_PERCENTAGE) {
|
||||
(void) fprintf(stderr, gettext("-d cannot be "
|
||||
"combined with -p option\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
errno = 0;
|
||||
amount = strtoull(optarg, &endptr, 0);
|
||||
if (errno != 0 || *endptr != '\0' || amount == 0) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("invalid days value\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
amount *= 86400; /* convert days to seconds */
|
||||
unit = ZPOOL_DDT_PRUNE_AGE;
|
||||
break;
|
||||
case 'p':
|
||||
if (unit == ZPOOL_DDT_PRUNE_AGE) {
|
||||
(void) fprintf(stderr, gettext("-p cannot be "
|
||||
"combined with -d option\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
errno = 0;
|
||||
amount = strtoull(optarg, &endptr, 0);
|
||||
if (errno != 0 || *endptr != '\0' ||
|
||||
amount == 0 || amount > 100) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("invalid percentage value\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
unit = ZPOOL_DDT_PRUNE_PERCENTAGE;
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
optopt);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
}
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
if (unit == ZPOOL_DDT_PRUNE_NONE) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("missing amount option (-d|-p <value>)\n"));
|
||||
usage(B_FALSE);
|
||||
} else if (argc < 1) {
|
||||
(void) fprintf(stderr, gettext("missing pool argument\n"));
|
||||
usage(B_FALSE);
|
||||
} else if (argc > 1) {
|
||||
(void) fprintf(stderr, gettext("too many arguments\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
zhp = zpool_open(g_zfs, argv[0]);
|
||||
if (zhp == NULL)
|
||||
return (-1);
|
||||
|
||||
int error = zpool_ddt_prune(zhp, unit, amount);
|
||||
|
||||
zpool_close(zhp);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
find_command_idx(const char *command, int *idx)
|
||||
{
|
||||
|
@ -1,3 +1,5 @@
|
||||
zstream_CPPFLAGS = $(AM_CPPFLAGS) $(LIBZPOOL_CPPFLAGS)
|
||||
|
||||
sbin_PROGRAMS += zstream
|
||||
CPPCHECKTARGETS += zstream
|
||||
|
||||
|
@ -22,6 +22,8 @@
|
||||
/*
|
||||
* Copyright 2022 Axcient. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* Copyright (c) 2024, Klara, Inc.
|
||||
*/
|
||||
|
||||
#include <err.h>
|
||||
@ -257,83 +259,73 @@ zstream_do_decompress(int argc, char *argv[])
|
||||
ENTRY e = {.key = key};
|
||||
|
||||
p = hsearch(e, FIND);
|
||||
if (p != NULL) {
|
||||
zio_decompress_func_t *xfunc = NULL;
|
||||
switch ((enum zio_compress)(intptr_t)p->data) {
|
||||
case ZIO_COMPRESS_OFF:
|
||||
xfunc = NULL;
|
||||
break;
|
||||
case ZIO_COMPRESS_LZJB:
|
||||
xfunc = lzjb_decompress;
|
||||
break;
|
||||
case ZIO_COMPRESS_GZIP_1:
|
||||
xfunc = gzip_decompress;
|
||||
break;
|
||||
case ZIO_COMPRESS_ZLE:
|
||||
xfunc = zle_decompress;
|
||||
break;
|
||||
case ZIO_COMPRESS_LZ4:
|
||||
xfunc = lz4_decompress_zfs;
|
||||
break;
|
||||
case ZIO_COMPRESS_ZSTD:
|
||||
xfunc = zfs_zstd_decompress;
|
||||
break;
|
||||
default:
|
||||
assert(B_FALSE);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Read and decompress the block
|
||||
*/
|
||||
char *lzbuf = safe_calloc(payload_size);
|
||||
(void) sfread(lzbuf, payload_size, stdin);
|
||||
if (xfunc == NULL) {
|
||||
memcpy(buf, lzbuf, payload_size);
|
||||
drrw->drr_compressiontype =
|
||||
ZIO_COMPRESS_OFF;
|
||||
if (verbose)
|
||||
fprintf(stderr, "Resetting "
|
||||
"compression type to off "
|
||||
"for ino %llu offset "
|
||||
"%llu\n",
|
||||
(u_longlong_t)
|
||||
drrw->drr_object,
|
||||
(u_longlong_t)
|
||||
drrw->drr_offset);
|
||||
} else if (0 != xfunc(lzbuf, buf,
|
||||
payload_size, payload_size, 0)) {
|
||||
/*
|
||||
* The block must not be compressed,
|
||||
* at least not with this compression
|
||||
* type, possibly because it gets
|
||||
* written multiple times in this
|
||||
* stream.
|
||||
*/
|
||||
warnx("decompression failed for "
|
||||
"ino %llu offset %llu",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
memcpy(buf, lzbuf, payload_size);
|
||||
} else if (verbose) {
|
||||
drrw->drr_compressiontype =
|
||||
ZIO_COMPRESS_OFF;
|
||||
fprintf(stderr, "successfully "
|
||||
"decompressed ino %llu "
|
||||
"offset %llu\n",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
} else {
|
||||
drrw->drr_compressiontype =
|
||||
ZIO_COMPRESS_OFF;
|
||||
}
|
||||
free(lzbuf);
|
||||
} else {
|
||||
if (p == NULL) {
|
||||
/*
|
||||
* Read the contents of the block unaltered
|
||||
*/
|
||||
(void) sfread(buf, payload_size, stdin);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read and decompress the block
|
||||
*/
|
||||
enum zio_compress c =
|
||||
(enum zio_compress)(intptr_t)p->data;
|
||||
|
||||
if (c == ZIO_COMPRESS_OFF) {
|
||||
(void) sfread(buf, payload_size, stdin);
|
||||
drrw->drr_compressiontype = 0;
|
||||
drrw->drr_compressed_size = 0;
|
||||
if (verbose)
|
||||
fprintf(stderr,
|
||||
"Resetting compression type to "
|
||||
"off for ino %llu offset %llu\n",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t lsize = drrw->drr_logical_size;
|
||||
ASSERT3U(payload_size, <=, lsize);
|
||||
|
||||
char *lzbuf = safe_calloc(payload_size);
|
||||
(void) sfread(lzbuf, payload_size, stdin);
|
||||
|
||||
abd_t sabd, dabd;
|
||||
abd_get_from_buf_struct(&sabd, lzbuf, payload_size);
|
||||
abd_get_from_buf_struct(&dabd, buf, lsize);
|
||||
int err = zio_decompress_data(c, &sabd, &dabd,
|
||||
payload_size, lsize, NULL);
|
||||
abd_free(&dabd);
|
||||
abd_free(&sabd);
|
||||
|
||||
if (err == 0) {
|
||||
drrw->drr_compressiontype = 0;
|
||||
drrw->drr_compressed_size = 0;
|
||||
payload_size = lsize;
|
||||
if (verbose) {
|
||||
fprintf(stderr,
|
||||
"successfully decompressed "
|
||||
"ino %llu offset %llu\n",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* The block must not be compressed, at least
|
||||
* not with this compression type, possibly
|
||||
* because it gets written multiple times in
|
||||
* this stream.
|
||||
*/
|
||||
warnx("decompression failed for "
|
||||
"ino %llu offset %llu",
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
memcpy(buf, lzbuf, payload_size);
|
||||
}
|
||||
|
||||
free(lzbuf);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -22,10 +22,9 @@
|
||||
/*
|
||||
* Copyright 2022 Axcient. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2022 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2024, Klara, Inc.
|
||||
*/
|
||||
|
||||
#include <err.h>
|
||||
@ -72,7 +71,7 @@ zstream_do_recompress(int argc, char *argv[])
|
||||
dmu_replay_record_t *drr = &thedrr;
|
||||
zio_cksum_t stream_cksum;
|
||||
int c;
|
||||
int level = -1;
|
||||
int level = 0;
|
||||
|
||||
while ((c = getopt(argc, argv, "l:")) != -1) {
|
||||
switch (c) {
|
||||
@ -97,34 +96,22 @@ zstream_do_recompress(int argc, char *argv[])
|
||||
|
||||
if (argc != 1)
|
||||
zstream_usage();
|
||||
int type = 0;
|
||||
zio_compress_info_t *cinfo = NULL;
|
||||
if (0 == strcmp(argv[0], "off")) {
|
||||
type = ZIO_COMPRESS_OFF;
|
||||
cinfo = &zio_compress_table[type];
|
||||
} else if (0 == strcmp(argv[0], "inherit") ||
|
||||
0 == strcmp(argv[0], "empty") ||
|
||||
0 == strcmp(argv[0], "on")) {
|
||||
// Fall through to invalid compression type case
|
||||
} else {
|
||||
for (int i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
|
||||
if (0 == strcmp(zio_compress_table[i].ci_name,
|
||||
argv[0])) {
|
||||
cinfo = &zio_compress_table[i];
|
||||
type = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (cinfo == NULL) {
|
||||
fprintf(stderr, "Invalid compression type %s.\n",
|
||||
argv[0]);
|
||||
exit(2);
|
||||
}
|
||||
|
||||
if (cinfo->ci_compress == NULL) {
|
||||
type = 0;
|
||||
cinfo = &zio_compress_table[0];
|
||||
enum zio_compress ctype;
|
||||
if (strcmp(argv[0], "off") == 0) {
|
||||
ctype = ZIO_COMPRESS_OFF;
|
||||
} else {
|
||||
for (ctype = 0; ctype < ZIO_COMPRESS_FUNCTIONS; ctype++) {
|
||||
if (strcmp(argv[0],
|
||||
zio_compress_table[ctype].ci_name) == 0)
|
||||
break;
|
||||
}
|
||||
if (ctype == ZIO_COMPRESS_FUNCTIONS ||
|
||||
zio_compress_table[ctype].ci_compress == NULL) {
|
||||
fprintf(stderr, "Invalid compression type %s.\n",
|
||||
argv[0]);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
if (isatty(STDIN_FILENO)) {
|
||||
@ -135,6 +122,7 @@ zstream_do_recompress(int argc, char *argv[])
|
||||
exit(1);
|
||||
}
|
||||
|
||||
abd_init();
|
||||
fletcher_4_init();
|
||||
zio_init();
|
||||
zstd_init();
|
||||
@ -247,63 +235,78 @@ zstream_do_recompress(int argc, char *argv[])
|
||||
(void) sfread(buf, payload_size, stdin);
|
||||
break;
|
||||
}
|
||||
if (drrw->drr_compressiontype >=
|
||||
ZIO_COMPRESS_FUNCTIONS) {
|
||||
enum zio_compress dtype = drrw->drr_compressiontype;
|
||||
if (dtype >= ZIO_COMPRESS_FUNCTIONS) {
|
||||
fprintf(stderr, "Invalid compression type in "
|
||||
"stream: %d\n", drrw->drr_compressiontype);
|
||||
"stream: %d\n", dtype);
|
||||
exit(3);
|
||||
}
|
||||
zio_compress_info_t *dinfo =
|
||||
&zio_compress_table[drrw->drr_compressiontype];
|
||||
if (zio_compress_table[dtype].ci_decompress == NULL)
|
||||
dtype = ZIO_COMPRESS_OFF;
|
||||
|
||||
/* Set up buffers to minimize memcpys */
|
||||
char *cbuf, *dbuf;
|
||||
if (cinfo->ci_compress == NULL)
|
||||
if (ctype == ZIO_COMPRESS_OFF)
|
||||
dbuf = buf;
|
||||
else
|
||||
dbuf = safe_calloc(bufsz);
|
||||
|
||||
if (dinfo->ci_decompress == NULL)
|
||||
if (dtype == ZIO_COMPRESS_OFF)
|
||||
cbuf = dbuf;
|
||||
else
|
||||
cbuf = safe_calloc(payload_size);
|
||||
|
||||
/* Read and decompress the payload */
|
||||
(void) sfread(cbuf, payload_size, stdin);
|
||||
if (dinfo->ci_decompress != NULL) {
|
||||
if (0 != dinfo->ci_decompress(cbuf, dbuf,
|
||||
payload_size, MIN(bufsz,
|
||||
drrw->drr_logical_size), dinfo->ci_level)) {
|
||||
if (dtype != ZIO_COMPRESS_OFF) {
|
||||
abd_t cabd, dabd;
|
||||
abd_get_from_buf_struct(&cabd,
|
||||
cbuf, payload_size);
|
||||
abd_get_from_buf_struct(&dabd, dbuf,
|
||||
MIN(bufsz, drrw->drr_logical_size));
|
||||
if (zio_decompress_data(dtype, &cabd, &dabd,
|
||||
payload_size, abd_get_size(&dabd),
|
||||
NULL) != 0) {
|
||||
warnx("decompression type %d failed "
|
||||
"for ino %llu offset %llu",
|
||||
type,
|
||||
dtype,
|
||||
(u_longlong_t)drrw->drr_object,
|
||||
(u_longlong_t)drrw->drr_offset);
|
||||
exit(4);
|
||||
}
|
||||
payload_size = drrw->drr_logical_size;
|
||||
abd_free(&dabd);
|
||||
abd_free(&cabd);
|
||||
free(cbuf);
|
||||
}
|
||||
|
||||
/* Recompress the payload */
|
||||
if (cinfo->ci_compress != NULL) {
|
||||
payload_size = P2ROUNDUP(cinfo->ci_compress(
|
||||
dbuf, buf, drrw->drr_logical_size,
|
||||
MIN(payload_size, bufsz), (level == -1 ?
|
||||
cinfo->ci_level : level)),
|
||||
SPA_MINBLOCKSIZE);
|
||||
if (payload_size != drrw->drr_logical_size) {
|
||||
drrw->drr_compressiontype = type;
|
||||
drrw->drr_compressed_size =
|
||||
payload_size;
|
||||
} else {
|
||||
if (ctype != ZIO_COMPRESS_OFF) {
|
||||
abd_t dabd, abd;
|
||||
abd_get_from_buf_struct(&dabd,
|
||||
dbuf, drrw->drr_logical_size);
|
||||
abd_t *pabd =
|
||||
abd_get_from_buf_struct(&abd, buf, bufsz);
|
||||
size_t csize = zio_compress_data(ctype, &dabd,
|
||||
&pabd, drrw->drr_logical_size, level);
|
||||
size_t rounded =
|
||||
P2ROUNDUP(csize, SPA_MINBLOCKSIZE);
|
||||
if (rounded >= drrw->drr_logical_size) {
|
||||
memcpy(buf, dbuf, payload_size);
|
||||
drrw->drr_compressiontype = 0;
|
||||
drrw->drr_compressed_size = 0;
|
||||
} else {
|
||||
abd_zero_off(pabd, csize,
|
||||
rounded - csize);
|
||||
drrw->drr_compressiontype = ctype;
|
||||
drrw->drr_compressed_size =
|
||||
payload_size = rounded;
|
||||
}
|
||||
abd_free(&abd);
|
||||
abd_free(&dabd);
|
||||
free(dbuf);
|
||||
} else {
|
||||
drrw->drr_compressiontype = type;
|
||||
drrw->drr_compressiontype = 0;
|
||||
drrw->drr_compressed_size = 0;
|
||||
}
|
||||
break;
|
||||
@ -371,6 +374,7 @@ zstream_do_recompress(int argc, char *argv[])
|
||||
fletcher_4_fini();
|
||||
zio_fini();
|
||||
zstd_fini();
|
||||
abd_fini();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -276,6 +276,8 @@ extern unsigned long zio_decompress_fail_fraction;
|
||||
extern unsigned long zfs_reconstruct_indirect_damage_fraction;
|
||||
extern uint64_t raidz_expand_max_reflow_bytes;
|
||||
extern uint_t raidz_expand_pause_point;
|
||||
extern boolean_t ddt_prune_artificial_age;
|
||||
extern boolean_t ddt_dump_prune_histogram;
|
||||
|
||||
|
||||
static ztest_shared_opts_t *ztest_shared_opts;
|
||||
@ -446,6 +448,7 @@ ztest_func_t ztest_fletcher;
|
||||
ztest_func_t ztest_fletcher_incr;
|
||||
ztest_func_t ztest_verify_dnode_bt;
|
||||
ztest_func_t ztest_pool_prefetch_ddt;
|
||||
ztest_func_t ztest_ddt_prune;
|
||||
|
||||
static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
|
||||
static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
|
||||
@ -502,6 +505,7 @@ static ztest_info_t ztest_info[] = {
|
||||
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
|
||||
ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
|
||||
};
|
||||
|
||||
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
|
||||
@ -6747,7 +6751,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
|
||||
load = spa_load_guid(spa);
|
||||
|
||||
(void) pthread_rwlock_wrlock(&ztest_name_lock);
|
||||
error = spa_change_guid(spa);
|
||||
error = spa_change_guid(spa, NULL);
|
||||
zs->zs_guid = spa_guid(spa);
|
||||
(void) pthread_rwlock_unlock(&ztest_name_lock);
|
||||
|
||||
@ -7289,6 +7293,17 @@ ztest_trim(ztest_ds_t *zd, uint64_t id)
|
||||
mutex_exit(&ztest_vdev_lock);
|
||||
}
|
||||
|
||||
void
|
||||
ztest_ddt_prune(ztest_ds_t *zd, uint64_t id)
|
||||
{
|
||||
(void) zd, (void) id;
|
||||
|
||||
spa_t *spa = ztest_spa;
|
||||
uint64_t pct = ztest_random(15) + 1;
|
||||
|
||||
(void) ddt_prune_unique_entries(spa, ZPOOL_DDT_PRUNE_PERCENTAGE, pct);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify pool integrity by running zdb.
|
||||
*/
|
||||
@ -7470,6 +7485,13 @@ ztest_resume_thread(void *arg)
|
||||
{
|
||||
spa_t *spa = arg;
|
||||
|
||||
/*
|
||||
* Synthesize aged DDT entries for ddt prune testing
|
||||
*/
|
||||
ddt_prune_artificial_age = B_TRUE;
|
||||
if (ztest_opts.zo_verbose >= 3)
|
||||
ddt_dump_prune_histogram = B_TRUE;
|
||||
|
||||
while (!ztest_exiting) {
|
||||
if (spa_suspended(spa))
|
||||
ztest_resume(spa);
|
||||
@ -8588,6 +8610,12 @@ ztest_init(ztest_shared_t *zs)
|
||||
if (i == SPA_FEATURE_LOG_SPACEMAP && ztest_random(4) == 0)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* split 50/50 between legacy and fast dedup
|
||||
*/
|
||||
if (i == SPA_FEATURE_FAST_DEDUP && ztest_random(2) != 0)
|
||||
continue;
|
||||
|
||||
VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
|
||||
spa_feature_table[i].fi_uname));
|
||||
fnvlist_add_uint64(props, buf, 0);
|
||||
|
@ -10,7 +10,8 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/module/icp/include \
|
||||
-I$(top_srcdir)/lib/libspl/include \
|
||||
-I$(top_srcdir)/lib/libspl/include/os/@ac_system_l@
|
||||
-I$(top_srcdir)/lib/libspl/include/os/@ac_system_l@ \
|
||||
-I$(top_srcdir)/lib/libzpool/include
|
||||
|
||||
AM_LIBTOOLFLAGS = --silent
|
||||
|
||||
@ -70,4 +71,7 @@ KERNEL_CFLAGS = $(FRAME_LARGER_THAN)
|
||||
LIBRARY_CFLAGS = -no-suppress
|
||||
|
||||
# Forcibly enable asserts/debugging for libzpool &al.
|
||||
FORCEDEBUG_CPPFLAGS = -DDEBUG -UNDEBUG -DZFS_DEBUG
|
||||
# Since ZFS_DEBUG can change shared data structures, all libzpool users must
|
||||
# be compiled with the same flags.
|
||||
# See https://github.com/openzfs/zfs/issues/16476
|
||||
LIBZPOOL_CPPFLAGS = -DDEBUG -UNDEBUG -DZFS_DEBUG
|
||||
|
@ -100,6 +100,7 @@ usr/share/man/man8/zpool-clear.8
|
||||
usr/share/man/man8/zpool-create.8
|
||||
usr/share/man/man8/zpool-destroy.8
|
||||
usr/share/man/man8/zpool-detach.8
|
||||
usr/share/man/man8/zpool-ddtprune.8
|
||||
usr/share/man/man8/zpool-events.8
|
||||
usr/share/man/man8/zpool-export.8
|
||||
usr/share/man/man8/zpool-get.8
|
||||
|
@ -14,6 +14,7 @@ COMMON_H = \
|
||||
zfs_fletcher.h \
|
||||
zfs_namecheck.h \
|
||||
zfs_prop.h \
|
||||
zfs_valstr.h \
|
||||
\
|
||||
sys/abd.h \
|
||||
sys/abd_impl.h \
|
||||
|
@ -300,10 +300,14 @@ _LIBZFS_H int zpool_trim(zpool_handle_t *, pool_trim_func_t, nvlist_t *,
|
||||
|
||||
_LIBZFS_H int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
|
||||
_LIBZFS_H int zpool_reguid(zpool_handle_t *);
|
||||
_LIBZFS_H int zpool_set_guid(zpool_handle_t *, const uint64_t *);
|
||||
_LIBZFS_H int zpool_reopen_one(zpool_handle_t *, void *);
|
||||
|
||||
_LIBZFS_H int zpool_sync_one(zpool_handle_t *, void *);
|
||||
|
||||
_LIBZFS_H int zpool_ddt_prune(zpool_handle_t *, zpool_ddt_prune_unit_t,
|
||||
uint64_t);
|
||||
|
||||
_LIBZFS_H int zpool_vdev_online(zpool_handle_t *, const char *, int,
|
||||
vdev_state_t *);
|
||||
_LIBZFS_H int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
|
||||
|
@ -161,6 +161,9 @@ _LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);
|
||||
|
||||
_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);
|
||||
|
||||
_LIBZFS_CORE_H int lzc_ddt_prune(const char *, zpool_ddt_prune_unit_t,
|
||||
uint64_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -77,6 +77,8 @@ noinst_HEADERS = \
|
||||
%D%/spl/sys/zmod.h \
|
||||
%D%/spl/sys/zone.h \
|
||||
\
|
||||
%D%/zfs/sys/abd_os.h \
|
||||
%D%/zfs/sys/abd_impl_os.h \
|
||||
%D%/zfs/sys/arc_os.h \
|
||||
%D%/zfs/sys/freebsd_crypto.h \
|
||||
%D%/zfs/sys/freebsd_event.h \
|
||||
|
41
sys/contrib/openzfs/include/os/freebsd/zfs/sys/abd_impl_os.h
Normal file
41
sys/contrib/openzfs/include/os/freebsd/zfs/sys/abd_impl_os.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_IMPL_OS_H
|
||||
#define _ABD_IMPL_OS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define abd_enter_critical(flags) critical_enter()
|
||||
#define abd_exit_critical(flags) critical_exit()
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ABD_IMPL_OS_H */
|
46
sys/contrib/openzfs/include/os/freebsd/zfs/sys/abd_os.h
Normal file
46
sys/contrib/openzfs/include/os/freebsd/zfs/sys/abd_os.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_OS_H
|
||||
#define _ABD_OS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct abd_scatter {
|
||||
uint_t abd_offset;
|
||||
void *abd_chunks[1]; /* actually variable-length */
|
||||
};
|
||||
|
||||
struct abd_linear {
|
||||
void *abd_buf;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ABD_H */
|
@ -20,6 +20,8 @@ kernel_linux_HEADERS = \
|
||||
|
||||
kernel_sysdir = $(kerneldir)/sys
|
||||
kernel_sys_HEADERS = \
|
||||
%D%/zfs/sys/abd_os.h \
|
||||
%D%/zfs/sys/abd_impl_os.h \
|
||||
%D%/zfs/sys/policy.h \
|
||||
%D%/zfs/sys/trace_acl.h \
|
||||
%D%/zfs/sys/trace_arc.h \
|
||||
|
@ -20,6 +20,10 @@
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
* Copyright (c) 2024, Syneto
|
||||
*/
|
||||
|
||||
#ifndef _SPL_TASKQ_H
|
||||
#define _SPL_TASKQ_H
|
||||
@ -33,6 +37,9 @@
|
||||
#include <sys/thread.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/wmsum.h>
|
||||
|
||||
typedef struct kstat_s kstat_t;
|
||||
|
||||
#define TASKQ_NAMELEN 31
|
||||
|
||||
@ -74,6 +81,32 @@ typedef enum tq_lock_role {
|
||||
typedef unsigned long taskqid_t;
|
||||
typedef void (task_func_t)(void *);
|
||||
|
||||
typedef struct taskq_sums {
|
||||
/* gauges (inc/dec counters, current value) */
|
||||
wmsum_t tqs_threads_active; /* threads running a task */
|
||||
wmsum_t tqs_threads_idle; /* threads waiting for work */
|
||||
wmsum_t tqs_threads_total; /* total threads */
|
||||
wmsum_t tqs_tasks_pending; /* tasks waiting to execute */
|
||||
wmsum_t tqs_tasks_priority; /* hi-pri tasks waiting */
|
||||
wmsum_t tqs_tasks_total; /* total waiting tasks */
|
||||
wmsum_t tqs_tasks_delayed; /* tasks deferred to future */
|
||||
wmsum_t tqs_entries_free; /* task entries on free list */
|
||||
|
||||
/* counters (inc only, since taskq creation) */
|
||||
wmsum_t tqs_threads_created; /* threads created */
|
||||
wmsum_t tqs_threads_destroyed; /* threads destroyed */
|
||||
wmsum_t tqs_tasks_dispatched; /* tasks dispatched */
|
||||
wmsum_t tqs_tasks_dispatched_delayed; /* tasks delayed to future */
|
||||
wmsum_t tqs_tasks_executed_normal; /* normal pri tasks executed */
|
||||
wmsum_t tqs_tasks_executed_priority; /* high pri tasks executed */
|
||||
wmsum_t tqs_tasks_executed; /* total tasks executed */
|
||||
wmsum_t tqs_tasks_delayed_requeued; /* delayed tasks requeued */
|
||||
wmsum_t tqs_tasks_cancelled; /* tasks cancelled before run */
|
||||
wmsum_t tqs_thread_wakeups; /* total thread wakeups */
|
||||
wmsum_t tqs_thread_wakeups_nowork; /* thread woken but no tasks */
|
||||
wmsum_t tqs_thread_sleeps; /* total thread sleeps */
|
||||
} taskq_sums_t;
|
||||
|
||||
typedef struct taskq {
|
||||
spinlock_t tq_lock; /* protects taskq_t */
|
||||
char *tq_name; /* taskq name */
|
||||
@ -105,6 +138,8 @@ typedef struct taskq {
|
||||
struct hlist_node tq_hp_cb_node;
|
||||
boolean_t tq_hp_support;
|
||||
unsigned long lastspawnstop; /* when to purge dynamic */
|
||||
taskq_sums_t tq_sums;
|
||||
kstat_t *tq_ksp;
|
||||
} taskq_t;
|
||||
|
||||
typedef struct taskq_ent {
|
||||
@ -123,6 +158,13 @@ typedef struct taskq_ent {
|
||||
#define TQENT_FLAG_PREALLOC 0x1
|
||||
#define TQENT_FLAG_CANCEL 0x2
|
||||
|
||||
/* bits 2-3 are which list tqent is on */
|
||||
#define TQENT_LIST_NONE 0x0
|
||||
#define TQENT_LIST_PENDING 0x4
|
||||
#define TQENT_LIST_PRIORITY 0x8
|
||||
#define TQENT_LIST_DELAY 0xc
|
||||
#define TQENT_LIST_MASK 0xc
|
||||
|
||||
typedef struct taskq_thread {
|
||||
struct list_head tqt_thread_list;
|
||||
struct list_head tqt_active_list;
|
||||
|
41
sys/contrib/openzfs/include/os/linux/zfs/sys/abd_impl_os.h
Normal file
41
sys/contrib/openzfs/include/os/linux/zfs/sys/abd_impl_os.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_IMPL_OS_H
|
||||
#define _ABD_IMPL_OS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define abd_enter_critical(flags) local_irq_save(flags)
|
||||
#define abd_exit_critical(flags) local_irq_restore(flags)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ABD_IMPL_OS_H */
|
62
sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h
Normal file
62
sys/contrib/openzfs/include/os/linux/zfs/sys/abd_os.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_OS_H
|
||||
#define _ABD_OS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct abd_scatter {
|
||||
uint_t abd_offset;
|
||||
uint_t abd_nents;
|
||||
struct scatterlist *abd_sgl;
|
||||
};
|
||||
|
||||
struct abd_linear {
|
||||
void *abd_buf;
|
||||
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
|
||||
};
|
||||
|
||||
typedef struct abd abd_t;
|
||||
|
||||
typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
|
||||
int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
|
||||
void *);
|
||||
|
||||
/*
|
||||
* Linux ABD bio functions
|
||||
* Note: these are only needed to support vdev_classic. See comment in
|
||||
* vdev_disk.c.
|
||||
*/
|
||||
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
|
||||
unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ABD_H */
|
@ -30,6 +30,7 @@
|
||||
#include <sys/debug.h>
|
||||
#include <sys/zfs_refcount.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/abd_os.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -44,8 +45,7 @@ typedef enum abd_flags {
|
||||
ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */
|
||||
ABD_FLAG_GANG = 1 << 6, /* mult ABDs chained together */
|
||||
ABD_FLAG_GANG_FREE = 1 << 7, /* gang ABD is responsible for mem */
|
||||
ABD_FLAG_ZEROS = 1 << 8, /* ABD for zero-filled buffer */
|
||||
ABD_FLAG_ALLOCD = 1 << 9, /* we allocated the abd_t */
|
||||
ABD_FLAG_ALLOCD = 1 << 8, /* we allocated the abd_t */
|
||||
} abd_flags_t;
|
||||
|
||||
typedef struct abd {
|
||||
@ -58,19 +58,8 @@ typedef struct abd {
|
||||
#endif
|
||||
kmutex_t abd_mtx;
|
||||
union {
|
||||
struct abd_scatter {
|
||||
uint_t abd_offset;
|
||||
#if defined(__FreeBSD__) && defined(_KERNEL)
|
||||
void *abd_chunks[1]; /* actually variable-length */
|
||||
#else
|
||||
uint_t abd_nents;
|
||||
struct scatterlist *abd_sgl;
|
||||
#endif
|
||||
} abd_scatter;
|
||||
struct abd_linear {
|
||||
void *abd_buf;
|
||||
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
|
||||
} abd_linear;
|
||||
struct abd_scatter abd_scatter;
|
||||
struct abd_linear abd_linear;
|
||||
struct abd_gang {
|
||||
list_t abd_gang_chain;
|
||||
} abd_gang;
|
||||
@ -79,9 +68,6 @@ typedef struct abd {
|
||||
|
||||
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
|
||||
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
|
||||
#endif
|
||||
|
||||
extern int zfs_abd_scatter_enabled;
|
||||
|
||||
@ -107,6 +93,7 @@ abd_t *abd_get_offset_size(abd_t *, size_t, size_t);
|
||||
abd_t *abd_get_offset_struct(abd_t *, abd_t *, size_t, size_t);
|
||||
abd_t *abd_get_zeros(size_t);
|
||||
abd_t *abd_get_from_buf(void *, size_t);
|
||||
abd_t *abd_get_from_buf_struct(abd_t *, void *, size_t);
|
||||
void abd_cache_reap_now(void);
|
||||
|
||||
/*
|
||||
@ -128,10 +115,6 @@ void abd_release_ownership_of_buf(abd_t *);
|
||||
int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
|
||||
int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
|
||||
abd_iter_func2_t *, void *);
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
|
||||
void *);
|
||||
#endif
|
||||
void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
|
||||
void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
|
||||
void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
|
||||
@ -225,16 +208,6 @@ abd_get_size(abd_t *abd)
|
||||
void abd_init(void);
|
||||
void abd_fini(void);
|
||||
|
||||
/*
|
||||
* Linux ABD bio functions
|
||||
* Note: these are only needed to support vdev_classic. See comment in
|
||||
* vdev_disk.c.
|
||||
*/
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
|
||||
unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -28,6 +28,7 @@
|
||||
#define _ABD_IMPL_H
|
||||
|
||||
#include <sys/abd.h>
|
||||
#include <sys/abd_impl_os.h>
|
||||
#include <sys/wmsum.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -111,19 +112,6 @@ void abd_iter_page(struct abd_iter *);
|
||||
#define ABD_LINEAR_BUF(abd) (abd->abd_u.abd_linear.abd_buf)
|
||||
#define ABD_GANG(abd) (abd->abd_u.abd_gang)
|
||||
|
||||
#if defined(_KERNEL)
|
||||
#if defined(__FreeBSD__)
|
||||
#define abd_enter_critical(flags) critical_enter()
|
||||
#define abd_exit_critical(flags) critical_exit()
|
||||
#else
|
||||
#define abd_enter_critical(flags) local_irq_save(flags)
|
||||
#define abd_exit_critical(flags) local_irq_restore(flags)
|
||||
#endif
|
||||
#else /* !_KERNEL */
|
||||
#define abd_enter_critical(flags) ((void)0)
|
||||
#define abd_exit_critical(flags) ((void)0)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -39,6 +39,13 @@ extern "C" {
|
||||
|
||||
struct abd;
|
||||
|
||||
/*
|
||||
* DDT-wide feature flags. These are set in ddt_flags by ddt_configure().
|
||||
*/
|
||||
#define DDT_FLAG_FLAT (1 << 0) /* single extensible phys */
|
||||
#define DDT_FLAG_LOG (1 << 1) /* dedup log (journal) */
|
||||
#define DDT_FLAG_MASK (DDT_FLAG_FLAT|DDT_FLAG_LOG)
|
||||
|
||||
/*
|
||||
* DDT on-disk storage object types. Each one corresponds to specific
|
||||
* implementation, see ddt_ops_t. The value itself is not stored on disk.
|
||||
@ -120,30 +127,80 @@ typedef struct {
|
||||
* characteristics of the stored block, such as its location on disk (DVAs),
|
||||
* birth txg and ref count.
|
||||
*
|
||||
* Note that an entry has an array of four ddt_phys_t, one for each number of
|
||||
* DVAs (copies= property) and another for additional "ditto" copies. Most
|
||||
* users of ddt_phys_t will handle indexing into or counting the phys they
|
||||
* want.
|
||||
* The "traditional" entry has an array of four, one for each number of DVAs
|
||||
* (copies= property) and another for additional "ditto" copies. Users of the
|
||||
* traditional struct will specify the variant (index) of the one they want.
|
||||
*
|
||||
* The newer "flat" entry has only a single form that is specified using the
|
||||
* DDT_PHYS_FLAT variant.
|
||||
*
|
||||
* Since the value size varies, use one of the size macros when interfacing
|
||||
* with the ddt zap.
|
||||
*/
|
||||
typedef struct {
|
||||
dva_t ddp_dva[SPA_DVAS_PER_BP];
|
||||
uint64_t ddp_refcnt;
|
||||
uint64_t ddp_phys_birth;
|
||||
} ddt_phys_t;
|
||||
|
||||
#define DDT_PHYS_MAX (4)
|
||||
|
||||
/*
|
||||
* Named indexes into the ddt_phys_t array in each entry.
|
||||
* Note - this can be used in a flexible array and allocated for
|
||||
* a specific size (ddp_trad or ddp_flat). So be careful not to
|
||||
* copy using "=" assignment but instead use ddt_phys_copy().
|
||||
*/
|
||||
typedef union {
|
||||
/*
|
||||
* Traditional physical payload value for DDT zap (256 bytes)
|
||||
*/
|
||||
struct {
|
||||
dva_t ddp_dva[SPA_DVAS_PER_BP];
|
||||
uint64_t ddp_refcnt;
|
||||
uint64_t ddp_phys_birth;
|
||||
} ddp_trad[DDT_PHYS_MAX];
|
||||
|
||||
/*
|
||||
* Flat physical payload value for DDT zap (72 bytes)
|
||||
*/
|
||||
struct {
|
||||
dva_t ddp_dva[SPA_DVAS_PER_BP];
|
||||
uint64_t ddp_refcnt;
|
||||
uint64_t ddp_phys_birth; /* txg based from BP */
|
||||
uint64_t ddp_class_start; /* in realtime seconds */
|
||||
} ddp_flat;
|
||||
} ddt_univ_phys_t;
|
||||
|
||||
/*
|
||||
* This enum denotes which variant of a ddt_univ_phys_t to target. For
|
||||
* a traditional DDT entry, it represents the indexes into the ddp_trad
|
||||
* array. Any consumer of a ddt_univ_phys_t needs to know which variant
|
||||
* is being targeted.
|
||||
*
|
||||
* Note, we no longer generate new DDT_PHYS_DITTO-type blocks. However,
|
||||
* we maintain the ability to free existing dedup-ditto blocks.
|
||||
*/
|
||||
enum ddt_phys_type {
|
||||
|
||||
typedef enum {
|
||||
DDT_PHYS_DITTO = 0,
|
||||
DDT_PHYS_SINGLE = 1,
|
||||
DDT_PHYS_DOUBLE = 2,
|
||||
DDT_PHYS_TRIPLE = 3,
|
||||
DDT_PHYS_TYPES
|
||||
};
|
||||
DDT_PHYS_FLAT = 4,
|
||||
DDT_PHYS_NONE = 5
|
||||
} ddt_phys_variant_t;
|
||||
|
||||
#define DDT_PHYS_VARIANT(ddt, p) \
|
||||
(ASSERT((p) < DDT_PHYS_NONE), \
|
||||
((ddt)->ddt_flags & DDT_FLAG_FLAT ? DDT_PHYS_FLAT : (p)))
|
||||
|
||||
#define DDT_TRAD_PHYS_SIZE sizeof (((ddt_univ_phys_t *)0)->ddp_trad)
|
||||
#define DDT_FLAT_PHYS_SIZE sizeof (((ddt_univ_phys_t *)0)->ddp_flat)
|
||||
|
||||
#define _DDT_PHYS_SWITCH(ddt, flat, trad) \
|
||||
(((ddt)->ddt_flags & DDT_FLAG_FLAT) ? (flat) : (trad))
|
||||
|
||||
#define DDT_PHYS_SIZE(ddt) _DDT_PHYS_SWITCH(ddt, \
|
||||
DDT_FLAT_PHYS_SIZE, DDT_TRAD_PHYS_SIZE)
|
||||
|
||||
#define DDT_NPHYS(ddt) _DDT_PHYS_SWITCH(ddt, 1, DDT_PHYS_MAX)
|
||||
#define DDT_PHYS_FOR_COPIES(ddt, p) _DDT_PHYS_SWITCH(ddt, 0, p)
|
||||
#define DDT_PHYS_IS_DITTO(ddt, p) _DDT_PHYS_SWITCH(ddt, 0, (p == 0))
|
||||
|
||||
/*
|
||||
* A "live" entry, holding changes to an entry made this txg, and other data to
|
||||
@ -153,17 +210,27 @@ enum ddt_phys_type {
|
||||
/* State flags for dde_flags */
|
||||
#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
|
||||
#define DDE_FLAG_OVERQUOTA (1 << 1) /* entry unusable, no space */
|
||||
#define DDE_FLAG_LOGGED (1 << 2) /* loaded from log */
|
||||
|
||||
/*
|
||||
* Additional data to support entry update or repair. This is fixed size
|
||||
* because its relatively rarely used.
|
||||
*/
|
||||
typedef struct {
|
||||
/* copy of data after a repair read, to be rewritten */
|
||||
abd_t *dde_repair_abd;
|
||||
|
||||
/* original phys contents before update, for error handling */
|
||||
ddt_univ_phys_t dde_orig_phys;
|
||||
|
||||
/* in-flight update IOs */
|
||||
zio_t *dde_lead_zio[DDT_PHYS_MAX];
|
||||
} ddt_entry_io_t;
|
||||
|
||||
typedef struct {
|
||||
/* key must be first for ddt_key_compare */
|
||||
ddt_key_t dde_key; /* ddt_tree key */
|
||||
ddt_phys_t dde_phys[DDT_PHYS_TYPES]; /* on-disk data */
|
||||
|
||||
/* in-flight update IOs */
|
||||
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
|
||||
|
||||
/* copy of data after a repair read, to be rewritten */
|
||||
struct abd *dde_repair_abd;
|
||||
ddt_key_t dde_key; /* ddt_tree key */
|
||||
avl_node_t dde_node; /* ddt_tree_node */
|
||||
|
||||
/* storage type and class the entry was loaded from */
|
||||
ddt_type_t dde_type;
|
||||
@ -173,9 +240,35 @@ typedef struct {
|
||||
kcondvar_t dde_cv; /* signaled when load completes */
|
||||
uint64_t dde_waiters; /* count of waiters on dde_cv */
|
||||
|
||||
avl_node_t dde_node; /* ddt_tree node */
|
||||
ddt_entry_io_t *dde_io; /* IO support, when required */
|
||||
|
||||
ddt_univ_phys_t dde_phys[]; /* flexible -- allocated size varies */
|
||||
} ddt_entry_t;
|
||||
|
||||
/*
|
||||
* A lightweight entry is for short-lived or transient uses, like iterating or
|
||||
* inspecting, when you don't care where it came from.
|
||||
*/
|
||||
typedef struct {
|
||||
ddt_key_t ddlwe_key;
|
||||
ddt_type_t ddlwe_type;
|
||||
ddt_class_t ddlwe_class;
|
||||
ddt_univ_phys_t ddlwe_phys;
|
||||
} ddt_lightweight_entry_t;
|
||||
|
||||
/*
|
||||
* In-core DDT log. A separate struct to make it easier to switch between the
|
||||
* appending and flushing logs.
|
||||
*/
|
||||
typedef struct {
|
||||
avl_tree_t ddl_tree; /* logged entries */
|
||||
uint32_t ddl_flags; /* flags for this log */
|
||||
uint64_t ddl_object; /* log object id */
|
||||
uint64_t ddl_length; /* on-disk log size */
|
||||
uint64_t ddl_first_txg; /* txg log became active */
|
||||
ddt_key_t ddl_checkpoint; /* last checkpoint */
|
||||
} ddt_log_t;
|
||||
|
||||
/*
|
||||
* In-core DDT object. This covers all entries and stats for a the whole pool
|
||||
* for a given checksum type.
|
||||
@ -184,23 +277,49 @@ typedef struct {
|
||||
kmutex_t ddt_lock; /* protects changes to all fields */
|
||||
|
||||
avl_tree_t ddt_tree; /* "live" (changed) entries this txg */
|
||||
avl_tree_t ddt_log_tree; /* logged entries */
|
||||
|
||||
avl_tree_t ddt_repair_tree; /* entries being repaired */
|
||||
|
||||
enum zio_checksum ddt_checksum; /* checksum algorithm in use */
|
||||
spa_t *ddt_spa; /* pool this ddt is on */
|
||||
objset_t *ddt_os; /* ddt objset (always MOS) */
|
||||
ddt_log_t ddt_log[2]; /* active/flushing logs */
|
||||
ddt_log_t *ddt_log_active; /* pointers into ddt_log */
|
||||
ddt_log_t *ddt_log_flushing; /* swapped when flush starts */
|
||||
|
||||
hrtime_t ddt_flush_start; /* log flush start this txg */
|
||||
uint32_t ddt_flush_pass; /* log flush pass this txg */
|
||||
|
||||
int32_t ddt_flush_count; /* entries flushed this txg */
|
||||
int32_t ddt_flush_min; /* min rem entries to flush */
|
||||
int32_t ddt_log_ingest_rate; /* rolling log ingest rate */
|
||||
int32_t ddt_log_flush_rate; /* rolling log flush rate */
|
||||
int32_t ddt_log_flush_time_rate; /* avg time spent flushing */
|
||||
|
||||
uint64_t ddt_flush_force_txg; /* flush hard before this txg */
|
||||
|
||||
kstat_t *ddt_ksp; /* kstats context */
|
||||
|
||||
enum zio_checksum ddt_checksum; /* checksum algorithm in use */
|
||||
spa_t *ddt_spa; /* pool this ddt is on */
|
||||
objset_t *ddt_os; /* ddt objset (always MOS) */
|
||||
|
||||
uint64_t ddt_dir_object; /* MOS dir holding ddt objects */
|
||||
uint64_t ddt_version; /* DDT version */
|
||||
uint64_t ddt_flags; /* FDT option flags */
|
||||
|
||||
/* per-type/per-class entry store objects */
|
||||
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
|
||||
|
||||
/* object ids for whole-ddt and per-type/per-class stats */
|
||||
/* object ids for stored, logged and per-type/per-class stats */
|
||||
uint64_t ddt_stat_object;
|
||||
ddt_object_t ddt_log_stats;
|
||||
ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
|
||||
|
||||
/* type/class stats by power-2-sized referenced blocks */
|
||||
ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
|
||||
ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
|
||||
|
||||
/* log stats power-2-sized referenced blocks */
|
||||
ddt_histogram_t ddt_log_histogram;
|
||||
} ddt_t;
|
||||
|
||||
/*
|
||||
@ -215,20 +334,36 @@ typedef struct {
|
||||
uint64_t ddb_cursor;
|
||||
} ddt_bookmark_t;
|
||||
|
||||
extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
|
||||
uint64_t txg);
|
||||
extern void ddt_bp_fill(const ddt_univ_phys_t *ddp, ddt_phys_variant_t v,
|
||||
blkptr_t *bp, uint64_t txg);
|
||||
extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
|
||||
const ddt_phys_t *ddp, blkptr_t *bp);
|
||||
const ddt_univ_phys_t *ddp, ddt_phys_variant_t v, blkptr_t *bp);
|
||||
|
||||
extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
|
||||
extern void ddt_phys_clear(ddt_phys_t *ddp);
|
||||
extern void ddt_phys_addref(ddt_phys_t *ddp);
|
||||
extern void ddt_phys_decref(ddt_phys_t *ddp);
|
||||
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
|
||||
extern void ddt_phys_extend(ddt_univ_phys_t *ddp, ddt_phys_variant_t v,
|
||||
const blkptr_t *bp);
|
||||
extern void ddt_phys_copy(ddt_univ_phys_t *dst, const ddt_univ_phys_t *src,
|
||||
ddt_phys_variant_t v);
|
||||
extern void ddt_phys_clear(ddt_univ_phys_t *ddp, ddt_phys_variant_t v);
|
||||
extern void ddt_phys_addref(ddt_univ_phys_t *ddp, ddt_phys_variant_t v);
|
||||
extern uint64_t ddt_phys_decref(ddt_univ_phys_t *ddp, ddt_phys_variant_t v);
|
||||
extern uint64_t ddt_phys_refcnt(const ddt_univ_phys_t *ddp,
|
||||
ddt_phys_variant_t v);
|
||||
extern ddt_phys_variant_t ddt_phys_select(const ddt_t *ddt,
|
||||
const ddt_entry_t *dde, const blkptr_t *bp);
|
||||
extern uint64_t ddt_phys_birth(const ddt_univ_phys_t *ddp,
|
||||
ddt_phys_variant_t v);
|
||||
extern int ddt_phys_dva_count(const ddt_univ_phys_t *ddp, ddt_phys_variant_t v,
|
||||
boolean_t encrypted);
|
||||
|
||||
extern void ddt_histogram_add_entry(ddt_t *ddt, ddt_histogram_t *ddh,
|
||||
const ddt_lightweight_entry_t *ddlwe);
|
||||
extern void ddt_histogram_sub_entry(ddt_t *ddt, ddt_histogram_t *ddh,
|
||||
const ddt_lightweight_entry_t *ddlwe);
|
||||
|
||||
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
|
||||
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
|
||||
extern void ddt_histogram_total(ddt_stat_t *dds, const ddt_histogram_t *ddh);
|
||||
extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
|
||||
|
||||
extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
|
||||
extern uint64_t ddt_get_ddt_dsize(spa_t *spa);
|
||||
extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
|
||||
@ -243,7 +378,7 @@ extern void ddt_enter(ddt_t *ddt);
|
||||
extern void ddt_exit(ddt_t *ddt);
|
||||
extern void ddt_init(void);
|
||||
extern void ddt_fini(void);
|
||||
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
|
||||
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp);
|
||||
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
|
||||
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
|
||||
extern void ddt_prefetch_all(spa_t *spa);
|
||||
@ -251,6 +386,8 @@ extern void ddt_prefetch_all(spa_t *spa);
|
||||
extern boolean_t ddt_class_contains(spa_t *spa, ddt_class_t max_class,
|
||||
const blkptr_t *bp);
|
||||
|
||||
extern void ddt_alloc_entry_io(ddt_entry_t *dde);
|
||||
|
||||
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
|
||||
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
|
||||
|
||||
@ -260,10 +397,17 @@ extern void ddt_create(spa_t *spa);
|
||||
extern int ddt_load(spa_t *spa);
|
||||
extern void ddt_unload(spa_t *spa);
|
||||
extern void ddt_sync(spa_t *spa, uint64_t txg);
|
||||
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
|
||||
|
||||
extern void ddt_walk_init(spa_t *spa, uint64_t txg);
|
||||
extern boolean_t ddt_walk_ready(spa_t *spa);
|
||||
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb,
|
||||
ddt_lightweight_entry_t *ddlwe);
|
||||
|
||||
extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);
|
||||
|
||||
extern int ddt_prune_unique_entries(spa_t *spa, zpool_ddt_prune_unit_t unit,
|
||||
uint64_t amount);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -28,11 +28,132 @@
|
||||
#define _SYS_DDT_IMPL_H
|
||||
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/bitops.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* DDT version numbers */
|
||||
#define DDT_VERSION_LEGACY (0)
|
||||
#define DDT_VERSION_FDT (1)
|
||||
|
||||
/* Dummy version to signal that configure is still necessary */
|
||||
#define DDT_VERSION_UNCONFIGURED (UINT64_MAX)
|
||||
|
||||
/* Names of interesting objects in the DDT root dir */
|
||||
#define DDT_DIR_VERSION "version"
|
||||
#define DDT_DIR_FLAGS "flags"
|
||||
|
||||
/* Fill a lightweight entry from a live entry. */
|
||||
#define DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, ddlwe) do { \
|
||||
memset((ddlwe), 0, sizeof (*ddlwe)); \
|
||||
(ddlwe)->ddlwe_key = (dde)->dde_key; \
|
||||
(ddlwe)->ddlwe_type = (dde)->dde_type; \
|
||||
(ddlwe)->ddlwe_class = (dde)->dde_class; \
|
||||
memcpy(&(ddlwe)->ddlwe_phys, (dde)->dde_phys, DDT_PHYS_SIZE(ddt)); \
|
||||
} while (0)
|
||||
|
||||
#define DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe) do { \
|
||||
memset((ddlwe), 0, sizeof (*ddlwe)); \
|
||||
(ddlwe)->ddlwe_key = (ddle)->ddle_key; \
|
||||
(ddlwe)->ddlwe_type = (ddle)->ddle_type; \
|
||||
(ddlwe)->ddlwe_class = (ddle)->ddle_class; \
|
||||
memcpy(&(ddlwe)->ddlwe_phys, (ddle)->ddle_phys, DDT_PHYS_SIZE(ddt)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* An entry on the log tree. These are "frozen", and a record of what's in
|
||||
* the on-disk log. They can't be used in place, but can be "loaded" back into
|
||||
* the live tree.
|
||||
*/
|
||||
typedef struct {
|
||||
ddt_key_t ddle_key; /* ddt_log_tree key */
|
||||
avl_node_t ddle_node; /* ddt_log_tree node */
|
||||
|
||||
ddt_type_t ddle_type; /* storage type */
|
||||
ddt_class_t ddle_class; /* storage class */
|
||||
|
||||
/* extra allocation for flat/trad phys */
|
||||
ddt_univ_phys_t ddle_phys[];
|
||||
} ddt_log_entry_t;
|
||||
|
||||
/* On-disk log record types. */
|
||||
typedef enum {
|
||||
DLR_INVALID = 0, /* end of block marker */
|
||||
DLR_ENTRY = 1, /* an entry to add or replace in the log tree */
|
||||
} ddt_log_record_type_t;
|
||||
|
||||
/* On-disk log record header. */
|
||||
typedef struct {
|
||||
/*
|
||||
* dlr_info is a packed u64, use the DLR_GET/DLR_SET macros below to
|
||||
* access it.
|
||||
*
|
||||
* bits 0-7: record type (ddt_log_record_type_t)
|
||||
* bits 8-15: length of record header+payload
|
||||
* bits 16-47: reserved, all zero
|
||||
* bits 48-55: if type==DLR_ENTRY, storage type (ddt_type)
|
||||
* otherwise all zero
|
||||
* bits 56-63: if type==DLR_ENTRY, storage class (ddt_class)
|
||||
* otherwise all zero
|
||||
*/
|
||||
uint64_t dlr_info;
|
||||
uint8_t dlr_payload[];
|
||||
} ddt_log_record_t;
|
||||
|
||||
#define DLR_GET_TYPE(dlr) BF64_GET((dlr)->dlr_info, 0, 8)
|
||||
#define DLR_SET_TYPE(dlr, v) BF64_SET((dlr)->dlr_info, 0, 8, v)
|
||||
#define DLR_GET_RECLEN(dlr) BF64_GET((dlr)->dlr_info, 8, 16)
|
||||
#define DLR_SET_RECLEN(dlr, v) BF64_SET((dlr)->dlr_info, 8, 16, v)
|
||||
#define DLR_GET_ENTRY_TYPE(dlr) BF64_GET((dlr)->dlr_info, 48, 8)
|
||||
#define DLR_SET_ENTRY_TYPE(dlr, v) BF64_SET((dlr)->dlr_info, 48, 8, v)
|
||||
#define DLR_GET_ENTRY_CLASS(dlr) BF64_GET((dlr)->dlr_info, 56, 8)
|
||||
#define DLR_SET_ENTRY_CLASS(dlr, v) BF64_SET((dlr)->dlr_info, 56, 8, v)
|
||||
|
||||
/* Payload for DLR_ENTRY. */
|
||||
typedef struct {
|
||||
ddt_key_t dlre_key;
|
||||
ddt_univ_phys_t dlre_phys[];
|
||||
} ddt_log_record_entry_t;
|
||||
|
||||
/* Log flags (ddl_flags, dlh_flags) */
|
||||
#define DDL_FLAG_FLUSHING (1 << 0) /* this log is being flushed */
|
||||
#define DDL_FLAG_CHECKPOINT (1 << 1) /* header has a checkpoint */
|
||||
|
||||
/* On-disk log header, stored in the bonus buffer. */
|
||||
typedef struct {
|
||||
/*
|
||||
* dlh_info is a packed u64, use the DLH_GET/DLH_SET macros below to
|
||||
* access it.
|
||||
*
|
||||
* bits 0-7: log version
|
||||
* bits 8-15: log flags
|
||||
* bits 16-63: reserved, all zero
|
||||
*/
|
||||
uint64_t dlh_info;
|
||||
|
||||
uint64_t dlh_length; /* log size in bytes */
|
||||
uint64_t dlh_first_txg; /* txg this log went active */
|
||||
ddt_key_t dlh_checkpoint; /* last checkpoint */
|
||||
} ddt_log_header_t;
|
||||
|
||||
#define DLH_GET_VERSION(dlh) BF64_GET((dlh)->dlh_info, 0, 8)
|
||||
#define DLH_SET_VERSION(dlh, v) BF64_SET((dlh)->dlh_info, 0, 8, v)
|
||||
#define DLH_GET_FLAGS(dlh) BF64_GET((dlh)->dlh_info, 8, 8)
|
||||
#define DLH_SET_FLAGS(dlh, v) BF64_SET((dlh)->dlh_info, 8, 8, v)
|
||||
|
||||
/* DDT log update state */
|
||||
typedef struct {
|
||||
dmu_tx_t *dlu_tx; /* tx the update is being applied to */
|
||||
dnode_t *dlu_dn; /* log object dnode */
|
||||
dmu_buf_t **dlu_dbp; /* array of block buffer pointers */
|
||||
int dlu_ndbp; /* number of block buffer pointers */
|
||||
uint16_t dlu_reclen; /* cached length of record */
|
||||
uint64_t dlu_block; /* block for next entry */
|
||||
uint64_t dlu_offset; /* offset for next entry */
|
||||
} ddt_log_update_t;
|
||||
|
||||
/*
|
||||
* Ops vector to access a specific DDT object type.
|
||||
*/
|
||||
@ -42,25 +163,53 @@ typedef struct {
|
||||
boolean_t prehash);
|
||||
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
||||
int (*ddt_op_lookup)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
|
||||
const ddt_key_t *ddk, void *phys, size_t psize);
|
||||
int (*ddt_op_contains)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk);
|
||||
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk);
|
||||
void (*ddt_op_prefetch_all)(objset_t *os, uint64_t object);
|
||||
int (*ddt_op_update)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize,
|
||||
const ddt_key_t *ddk, const void *phys, size_t psize,
|
||||
dmu_tx_t *tx);
|
||||
int (*ddt_op_remove)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk, dmu_tx_t *tx);
|
||||
int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
|
||||
ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
|
||||
ddt_key_t *ddk, void *phys, size_t psize);
|
||||
int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
|
||||
} ddt_ops_t;
|
||||
|
||||
extern const ddt_ops_t ddt_zap_ops;
|
||||
|
||||
extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
|
||||
/* Dedup log API */
|
||||
extern void ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx,
|
||||
ddt_log_update_t *dlu);
|
||||
extern void ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *dde,
|
||||
ddt_log_update_t *dlu);
|
||||
extern void ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu);
|
||||
|
||||
extern boolean_t ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl,
|
||||
ddt_lightweight_entry_t *ddlwe);
|
||||
|
||||
extern boolean_t ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
|
||||
ddt_lightweight_entry_t *ddlwe);
|
||||
extern boolean_t ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl,
|
||||
const ddt_key_t *ddk);
|
||||
|
||||
extern void ddt_log_checkpoint(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe,
|
||||
dmu_tx_t *tx);
|
||||
extern void ddt_log_truncate(ddt_t *ddt, dmu_tx_t *tx);
|
||||
|
||||
extern boolean_t ddt_log_swap(ddt_t *ddt, dmu_tx_t *tx);
|
||||
|
||||
extern void ddt_log_destroy(ddt_t *ddt, dmu_tx_t *tx);
|
||||
|
||||
extern int ddt_log_load(ddt_t *ddt);
|
||||
extern void ddt_log_alloc(ddt_t *ddt);
|
||||
extern void ddt_log_free(ddt_t *ddt);
|
||||
|
||||
extern void ddt_log_init(void);
|
||||
extern void ddt_log_fini(void);
|
||||
|
||||
/*
|
||||
* These are only exposed so that zdb can access them. Try not to use them
|
||||
@ -68,22 +217,59 @@ extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
|
||||
* them up.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We use a histogram to convert a percentage request into a
|
||||
* cutoff value where entries older than the cutoff get pruned.
|
||||
*
|
||||
* The histogram bins represent hours in power-of-two increments.
|
||||
* 16 bins covers up to four years.
|
||||
*/
|
||||
#define HIST_BINS 16
|
||||
|
||||
typedef struct ddt_age_histo {
|
||||
uint64_t dah_entries;
|
||||
uint64_t dah_age_histo[HIST_BINS];
|
||||
} ddt_age_histo_t;
|
||||
|
||||
void ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram);
|
||||
|
||||
#if defined(_KERNEL) || !defined(ZFS_DEBUG)
|
||||
#define ddt_dump_age_histogram(histo, cutoff) ((void)0)
|
||||
#else
|
||||
static inline void
|
||||
ddt_dump_age_histogram(ddt_age_histo_t *histogram, uint64_t cutoff)
|
||||
{
|
||||
if (histogram->dah_entries == 0)
|
||||
return;
|
||||
|
||||
(void) printf("DDT prune unique class age, %llu hour cutoff\n",
|
||||
(u_longlong_t)(gethrestime_sec() - cutoff)/3600);
|
||||
(void) printf("%5s %9s %4s\n", "age", "blocks", "amnt");
|
||||
(void) printf("%5s %9s %4s\n", "-----", "---------", "----");
|
||||
for (int i = 0; i < HIST_BINS; i++) {
|
||||
(void) printf("%5d %9llu %4d%%\n", 1<<i,
|
||||
(u_longlong_t)histogram->dah_age_histo[i],
|
||||
(int)((histogram->dah_age_histo[i] * 100) /
|
||||
histogram->dah_entries));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enough room to expand DMU_POOL_DDT format for all possible DDT
|
||||
* checksum/class/type combinations.
|
||||
*/
|
||||
#define DDT_NAMELEN 32
|
||||
|
||||
extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
|
||||
extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt,
|
||||
const ddt_univ_phys_t *ddp);
|
||||
|
||||
extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
|
||||
|
||||
extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
|
||||
|
||||
extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
|
||||
char *name);
|
||||
extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
|
||||
uint64_t *walk, ddt_entry_t *dde);
|
||||
uint64_t *walk, ddt_lightweight_entry_t *ddlwe);
|
||||
extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
|
||||
uint64_t *count);
|
||||
extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
|
||||
|
@ -375,7 +375,9 @@ typedef struct dmu_buf {
|
||||
#define DMU_POOL_L2CACHE "l2cache"
|
||||
#define DMU_POOL_TMP_USERREFS "tmp_userrefs"
|
||||
#define DMU_POOL_DDT "DDT-%s-%s-%s"
|
||||
#define DMU_POOL_DDT_LOG "DDT-log-%s-%u"
|
||||
#define DMU_POOL_DDT_STATS "DDT-statistics"
|
||||
#define DMU_POOL_DDT_DIR "DDT-%s"
|
||||
#define DMU_POOL_CREATION_VERSION "creation_version"
|
||||
#define DMU_POOL_SCAN "scan"
|
||||
#define DMU_POOL_ERRORSCRUB "error_scrub"
|
||||
|
@ -202,7 +202,7 @@ boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
|
||||
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
|
||||
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
|
||||
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
|
||||
ddt_entry_t *dde, dmu_tx_t *tx);
|
||||
ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx);
|
||||
void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
|
||||
|
@ -1422,7 +1422,7 @@ typedef enum {
|
||||
*/
|
||||
typedef enum zfs_ioc {
|
||||
/*
|
||||
* Core features - 88/128 numbers reserved.
|
||||
* Core features - 89/128 numbers reserved.
|
||||
*/
|
||||
#ifdef __FreeBSD__
|
||||
ZFS_IOC_FIRST = 0,
|
||||
@ -1519,6 +1519,7 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
|
||||
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
|
||||
ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */
|
||||
ZFS_IOC_DDT_PRUNE, /* 0x5a59 */
|
||||
|
||||
/*
|
||||
* Per-platform (Optional) - 8/128 numbers reserved.
|
||||
@ -1655,6 +1656,12 @@ typedef enum {
|
||||
ZPOOL_PREFETCH_DDT
|
||||
} zpool_prefetch_type_t;
|
||||
|
||||
typedef enum {
|
||||
ZPOOL_DDT_PRUNE_NONE,
|
||||
ZPOOL_DDT_PRUNE_AGE, /* in seconds */
|
||||
ZPOOL_DDT_PRUNE_PERCENTAGE, /* 1 - 100 */
|
||||
} zpool_ddt_prune_unit_t;
|
||||
|
||||
/*
|
||||
* Bookmark name values.
|
||||
*/
|
||||
@ -1710,6 +1717,11 @@ typedef enum {
|
||||
#define ZPOOL_INITIALIZE_COMMAND "initialize_command"
|
||||
#define ZPOOL_INITIALIZE_VDEVS "initialize_vdevs"
|
||||
|
||||
/*
|
||||
* The following are names used when invoking ZFS_IOC_POOL_REGUID.
|
||||
*/
|
||||
#define ZPOOL_REGUID_GUID "guid"
|
||||
|
||||
/*
|
||||
* The following are names used when invoking ZFS_IOC_POOL_TRIM.
|
||||
*/
|
||||
@ -1748,6 +1760,12 @@ typedef enum {
|
||||
*/
|
||||
#define ZPOOL_PREFETCH_TYPE "prefetch_type"
|
||||
|
||||
/*
|
||||
* The following are names used when invoking ZFS_IOC_DDT_PRUNE.
|
||||
*/
|
||||
#define DDT_PRUNE_UNIT "ddt_prune_unit"
|
||||
#define DDT_PRUNE_AMOUNT "ddt_prune_amount"
|
||||
|
||||
/*
|
||||
* Flags for ZFS_IOC_VDEV_SET_STATE
|
||||
*/
|
||||
|
@ -572,7 +572,7 @@ typedef struct blkptr {
|
||||
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
|
||||
BP_GET_PSIZE(bp))
|
||||
|
||||
#define BP_ZERO(bp) \
|
||||
#define BP_ZERO_DVAS(bp) \
|
||||
{ \
|
||||
(bp)->blk_dva[0].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[0].dva_word[1] = 0; \
|
||||
@ -580,6 +580,11 @@ typedef struct blkptr {
|
||||
(bp)->blk_dva[1].dva_word[1] = 0; \
|
||||
(bp)->blk_dva[2].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[2].dva_word[1] = 0; \
|
||||
}
|
||||
|
||||
#define BP_ZERO(bp) \
|
||||
{ \
|
||||
BP_ZERO_DVAS(bp); \
|
||||
(bp)->blk_prop = 0; \
|
||||
(bp)->blk_pad[0] = 0; \
|
||||
(bp)->blk_pad[1] = 0; \
|
||||
@ -1087,7 +1092,7 @@ extern void spa_strfree(char *);
|
||||
extern uint64_t spa_generate_guid(spa_t *spa);
|
||||
extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp);
|
||||
extern void spa_freeze(spa_t *spa);
|
||||
extern int spa_change_guid(spa_t *spa);
|
||||
extern int spa_change_guid(spa_t *spa, const uint64_t *guidp);
|
||||
extern void spa_upgrade(spa_t *spa, uint64_t version);
|
||||
extern void spa_evict_all(void);
|
||||
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
|
||||
|
@ -412,6 +412,7 @@ struct spa {
|
||||
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
|
||||
uint64_t spa_dedup_checksum; /* default dedup checksum */
|
||||
uint64_t spa_dspace; /* dspace in normal class */
|
||||
boolean_t spa_active_ddt_prune; /* ddt prune process active */
|
||||
struct brt *spa_brt; /* in-core BRT */
|
||||
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
|
||||
kmutex_t spa_proc_lock; /* protects spa_proc* */
|
||||
|
@ -167,6 +167,9 @@ typedef enum zio_suspend_reason {
|
||||
* This was originally an enum type. However, those are 32-bit and there is no
|
||||
* way to make a 64-bit enum type. Since we ran out of bits for flags, we were
|
||||
* forced to upgrade it to a uint64_t.
|
||||
*
|
||||
* NOTE: PLEASE UPDATE THE BITFIELD STRINGS IN zfs_valstr.c IF YOU ADD ANOTHER
|
||||
* FLAG.
|
||||
*/
|
||||
typedef uint64_t zio_flag_t;
|
||||
/*
|
||||
|
@ -22,7 +22,7 @@
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2019, Allan Jude
|
||||
* Copyright (c) 2019, Klara Inc.
|
||||
* Copyright (c) 2019, 2024, Klara, Inc.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2015, 2016 by Delphix. All rights reserved.
|
||||
*/
|
||||
@ -122,25 +122,15 @@ enum zio_zstd_levels {
|
||||
struct zio_prop;
|
||||
|
||||
/* Common signature for all zio compress functions. */
|
||||
typedef size_t zio_compress_func_t(void *src, void *dst,
|
||||
typedef size_t zio_compress_func_t(abd_t *src, abd_t *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
/* Common signature for all zio decompress functions. */
|
||||
typedef int zio_decompress_func_t(void *src, void *dst,
|
||||
typedef int zio_decompress_func_t(abd_t *src, abd_t *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
/* Common signature for all zio decompress and get level functions. */
|
||||
typedef int zio_decompresslevel_func_t(void *src, void *dst,
|
||||
typedef int zio_decompresslevel_func_t(abd_t *src, abd_t *dst,
|
||||
size_t s_len, size_t d_len, uint8_t *level);
|
||||
/* Common signature for all zio get-compression-level functions. */
|
||||
typedef int zio_getlevel_func_t(void *src, size_t s_len, uint8_t *level);
|
||||
|
||||
|
||||
/*
|
||||
* Common signature for all zio decompress functions using an ABD as input.
|
||||
* This is helpful if you have both compressed ARC and scatter ABDs enabled,
|
||||
* but is not a requirement for all compression algorithms.
|
||||
*/
|
||||
typedef int zio_decompress_abd_func_t(abd_t *src, void *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
/*
|
||||
* Information about each compression function.
|
||||
*/
|
||||
@ -163,34 +153,66 @@ extern void lz4_fini(void);
|
||||
/*
|
||||
* Compression routines.
|
||||
*/
|
||||
extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern size_t zle_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int zle_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern size_t lz4_compress_zfs(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern size_t zfs_lzjb_compress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
extern int zfs_lzjb_decompress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
extern size_t zfs_gzip_compress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
extern int zfs_gzip_decompress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
extern size_t zfs_zle_compress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
extern int zfs_zle_decompress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
extern size_t zfs_lz4_compress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
extern int zfs_lz4_decompress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
|
||||
/*
|
||||
* Compress and decompress data if necessary.
|
||||
*/
|
||||
extern size_t zio_compress_data(enum zio_compress c, abd_t *src, void **dst,
|
||||
extern size_t zio_compress_data(enum zio_compress c, abd_t *src, abd_t **dst,
|
||||
size_t s_len, uint8_t level);
|
||||
extern int zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
|
||||
size_t s_len, size_t d_len, uint8_t *level);
|
||||
extern int zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
|
||||
extern int zio_decompress_data(enum zio_compress c, abd_t *src, abd_t *abd,
|
||||
size_t s_len, size_t d_len, uint8_t *level);
|
||||
extern int zio_compress_to_feature(enum zio_compress comp);
|
||||
|
||||
#define ZFS_COMPRESS_WRAP_DECL(name) \
|
||||
size_t \
|
||||
name(abd_t *src, abd_t *dst, size_t s_len, size_t d_len, int n) \
|
||||
{ \
|
||||
void *s_buf = abd_borrow_buf_copy(src, s_len); \
|
||||
void *d_buf = abd_borrow_buf(dst, d_len); \
|
||||
size_t c_len = name##_buf(s_buf, d_buf, s_len, d_len, n); \
|
||||
abd_return_buf(src, s_buf, s_len); \
|
||||
abd_return_buf_copy(dst, d_buf, d_len); \
|
||||
return (c_len); \
|
||||
}
|
||||
#define ZFS_DECOMPRESS_WRAP_DECL(name) \
|
||||
int \
|
||||
name(abd_t *src, abd_t *dst, size_t s_len, size_t d_len, int n) \
|
||||
{ \
|
||||
void *s_buf = abd_borrow_buf_copy(src, s_len); \
|
||||
void *d_buf = abd_borrow_buf(dst, d_len); \
|
||||
int err = name##_buf(s_buf, d_buf, s_len, d_len, n); \
|
||||
abd_return_buf(src, s_buf, s_len); \
|
||||
abd_return_buf_copy(dst, d_buf, d_len); \
|
||||
return (err); \
|
||||
}
|
||||
#define ZFS_DECOMPRESS_LEVEL_WRAP_DECL(name) \
|
||||
int \
|
||||
name(abd_t *src, abd_t *dst, size_t s_len, size_t d_len, uint8_t *n) \
|
||||
{ \
|
||||
void *s_buf = abd_borrow_buf_copy(src, s_len); \
|
||||
void *d_buf = abd_borrow_buf(dst, d_len); \
|
||||
int err = name##_buf(s_buf, d_buf, s_len, d_len, n); \
|
||||
abd_return_buf(src, s_buf, s_len); \
|
||||
abd_return_buf_copy(dst, d_buf, d_len); \
|
||||
return (err); \
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -120,6 +120,9 @@ extern "C" {
|
||||
|
||||
/*
|
||||
* zio pipeline stage definitions
|
||||
*
|
||||
* NOTE: PLEASE UPDATE THE BITFIELD STRINGS IN zfs_valstr.c IF YOU ADD ANOTHER
|
||||
* FLAG.
|
||||
*/
|
||||
enum zio_stage {
|
||||
ZIO_STAGE_OPEN = 1 << 0, /* RWFCXT */
|
||||
|
@ -22,6 +22,10 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NOTE: PLEASE UPDATE THE ENUM STRINGS IN zfs_valstr.c IF YOU ADD ANOTHER
|
||||
* VALUE.
|
||||
*/
|
||||
typedef enum zio_priority {
|
||||
ZIO_PRIORITY_SYNC_READ,
|
||||
ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
|
||||
|
@ -90,14 +90,12 @@ typedef struct zfs_zstd_meta {
|
||||
int zstd_init(void);
|
||||
void zstd_fini(void);
|
||||
|
||||
size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
size_t zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len,
|
||||
size_t zfs_zstd_compress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int level);
|
||||
int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level);
|
||||
int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
|
||||
int zfs_zstd_decompress_level(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, uint8_t *level);
|
||||
int zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len,
|
||||
int zfs_zstd_decompress(abd_t *src, abd_t *dst, size_t s_len,
|
||||
size_t d_len, int n);
|
||||
void zfs_zstd_cache_reap_now(void);
|
||||
|
||||
|
@ -82,6 +82,7 @@ typedef enum spa_feature {
|
||||
SPA_FEATURE_AVZ_V2,
|
||||
SPA_FEATURE_REDACTION_LIST_SPILL,
|
||||
SPA_FEATURE_RAIDZ_EXPANSION,
|
||||
SPA_FEATURE_FAST_DEDUP,
|
||||
SPA_FEATURES
|
||||
} spa_feature_t;
|
||||
|
||||
|
84
sys/contrib/openzfs/include/zfs_valstr.h
Normal file
84
sys/contrib/openzfs/include/zfs_valstr.h
Normal file
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_VALSTR_H
|
||||
#define _ZFS_VALSTR_H extern __attribute__((visibility("default")))
|
||||
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* These macros create function prototypes for pretty-printing or stringifying
|
||||
* certain kinds of numeric types.
|
||||
*
|
||||
* _ZFS_VALSTR_DECLARE_BITFIELD(name) creates:
|
||||
*
|
||||
* size_t zfs_valstr_<name>_bits(uint64_t bits, char *out, size_t outlen);
|
||||
* expands single char for each set bit, and space for each clear bit
|
||||
*
|
||||
* size_t zfs_valstr_<name>_pairs(uint64_t bits, char *out, size_t outlen);
|
||||
* expands two-char mnemonic for each bit set in `bits`, separated by `|`
|
||||
*
|
||||
* size_t zfs_valstr_<name>(uint64_t bits, char *out, size_t outlen);
|
||||
* expands full name of each bit set in `bits`, separated by spaces
|
||||
*
|
||||
* _ZFS_VALSTR_DECLARE_ENUM(name) creates:
|
||||
*
|
||||
* size_t zfs_valstr_<name>(int v, char *out, size_t outlen);
|
||||
* expands full name of enum value
|
||||
*
|
||||
* Each _ZFS_VALSTR_DECLARE_xxx needs a corresponding _VALSTR_xxx_IMPL string
|
||||
* table in vfs_valstr.c.
|
||||
*/
|
||||
|
||||
#define _ZFS_VALSTR_DECLARE_BITFIELD(name) \
|
||||
_ZFS_VALSTR_H size_t zfs_valstr_ ## name ## _bits( \
|
||||
uint64_t bits, char *out, size_t outlen); \
|
||||
_ZFS_VALSTR_H size_t zfs_valstr_ ## name ## _pairs( \
|
||||
uint64_t bits, char *out, size_t outlen); \
|
||||
_ZFS_VALSTR_H size_t zfs_valstr_ ## name( \
|
||||
uint64_t bits, char *out, size_t outlen); \
|
||||
|
||||
#define _ZFS_VALSTR_DECLARE_ENUM(name) \
|
||||
_ZFS_VALSTR_H size_t zfs_valstr_ ## name( \
|
||||
int v, char *out, size_t outlen); \
|
||||
|
||||
_ZFS_VALSTR_DECLARE_BITFIELD(zio_flag)
|
||||
_ZFS_VALSTR_DECLARE_BITFIELD(zio_stage)
|
||||
|
||||
_ZFS_VALSTR_DECLARE_ENUM(zio_priority)
|
||||
|
||||
#undef _ZFS_VALSTR_DECLARE_BITFIELD
|
||||
#undef _ZFS_VALSTR_DECLARE_ENUM
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZFS_VALSTR_H */
|
@ -47,6 +47,7 @@ nodist_libzfs_la_SOURCES = \
|
||||
module/zcommon/zfs_fletcher_superscalar4.c \
|
||||
module/zcommon/zfs_namecheck.c \
|
||||
module/zcommon/zfs_prop.c \
|
||||
module/zcommon/zfs_valstr.c \
|
||||
module/zcommon/zpool_prop.c \
|
||||
module/zcommon/zprop_common.c
|
||||
|
||||
|
@ -183,8 +183,8 @@
|
||||
<elf-symbol name='fsleep' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='get_dataset_depth' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='get_system_hostid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='get_timestamp' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='getextmntent' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='getmntany' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='getprop_uint64' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -454,6 +454,13 @@
|
||||
<elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valstr_zio_flag' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valstr_zio_flag_bits' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valstr_zio_flag_pairs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valstr_zio_priority' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valstr_zio_stage' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valstr_zio_stage_bits' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valstr_zio_stage_pairs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_version_nvlist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_version_print' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -466,7 +473,9 @@
|
||||
<elf-symbol name='zpool_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_clear_label' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_close' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_collect_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_ddt_prune' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_default_search_paths' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_disable_datasets' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -485,8 +494,8 @@
|
||||
<elf-symbol name='zpool_export_force' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_feature_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_find_config' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_find_vdev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_find_parent_vdev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_find_vdev' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_find_vdev_by_physpath' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_free_handles' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_get_all_vdev_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -529,7 +538,6 @@
|
||||
<elf-symbol name='zpool_prefetch' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_prepare_and_label_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_prepare_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_collect_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_prop_default_numeric' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -556,6 +564,7 @@
|
||||
<elf-symbol name='zpool_scan' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_search_import' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_set_bootenv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_set_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_set_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_set_vdev_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zpool_skip_pool' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -616,7 +625,7 @@
|
||||
<elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='spa_feature_table' size='2296' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='spa_feature_table' size='2352' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -5928,6 +5937,7 @@
|
||||
<enumerator name='ZFS_IOC_VDEV_SET_PROPS' value='23126'/>
|
||||
<enumerator name='ZFS_IOC_POOL_SCRUB' value='23127'/>
|
||||
<enumerator name='ZFS_IOC_POOL_PREFETCH' value='23128'/>
|
||||
<enumerator name='ZFS_IOC_DDT_PRUNE' value='23129'/>
|
||||
<enumerator name='ZFS_IOC_PLATFORM' value='23168'/>
|
||||
<enumerator name='ZFS_IOC_EVENTS_NEXT' value='23169'/>
|
||||
<enumerator name='ZFS_IOC_EVENTS_CLEAR' value='23170'/>
|
||||
@ -5962,6 +5972,13 @@
|
||||
<enumerator name='ZPOOL_PREFETCH_DDT' value='1'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='zpool_prefetch_type_t' type-id='0299ab50' id='e55ff6bc'/>
|
||||
<enum-decl name='zpool_ddt_prune_unit_t' naming-typedef-id='02e25ab0' id='509ae11c'>
|
||||
<underlying-type type-id='9cac1fee'/>
|
||||
<enumerator name='ZPOOL_DDT_PRUNE_NONE' value='0'/>
|
||||
<enumerator name='ZPOOL_DDT_PRUNE_AGE' value='1'/>
|
||||
<enumerator name='ZPOOL_DDT_PRUNE_PERCENTAGE' value='2'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='zpool_ddt_prune_unit_t' type-id='509ae11c' id='02e25ab0'/>
|
||||
<enum-decl name='spa_feature' id='33ecb627'>
|
||||
<underlying-type type-id='9cac1fee'/>
|
||||
<enumerator name='SPA_FEATURE_NONE' value='-1'/>
|
||||
@ -6006,7 +6023,8 @@
|
||||
<enumerator name='SPA_FEATURE_AVZ_V2' value='38'/>
|
||||
<enumerator name='SPA_FEATURE_REDACTION_LIST_SPILL' value='39'/>
|
||||
<enumerator name='SPA_FEATURE_RAIDZ_EXPANSION' value='40'/>
|
||||
<enumerator name='SPA_FEATURES' value='41'/>
|
||||
<enumerator name='SPA_FEATURE_FAST_DEDUP' value='41'/>
|
||||
<enumerator name='SPA_FEATURES' value='42'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
|
||||
<qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/>
|
||||
@ -6137,6 +6155,12 @@
|
||||
<parameter type-id='857bb57e'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='lzc_ddt_prune' visibility='default' binding='global' size-in-bits='64'>
|
||||
<parameter type-id='80f4b756'/>
|
||||
<parameter type-id='02e25ab0'/>
|
||||
<parameter type-id='9c313c2d'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_resolve_shortname' mangled-name='zfs_resolve_shortname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_resolve_shortname'>
|
||||
<parameter type-id='80f4b756'/>
|
||||
<parameter type-id='26a90f95'/>
|
||||
@ -6638,6 +6662,11 @@
|
||||
<parameter type-id='9c313c2d' name='guid'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='zpool_set_guid' mangled-name='zpool_set_guid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_set_guid'>
|
||||
<parameter type-id='4c81de99' name='zhp'/>
|
||||
<parameter type-id='713a56f5' name='guid'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='zpool_reguid' mangled-name='zpool_reguid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_reguid'>
|
||||
<parameter type-id='4c81de99' name='zhp'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
@ -6791,6 +6820,12 @@
|
||||
<parameter type-id='80f4b756' name='propval'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='zpool_ddt_prune' mangled-name='zpool_ddt_prune' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_ddt_prune'>
|
||||
<parameter type-id='4c81de99' name='zhp'/>
|
||||
<parameter type-id='02e25ab0' name='unit'/>
|
||||
<parameter type-id='9c313c2d' name='amount'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='lib/libzfs/libzfs_sendrecv.c' language='LANG_C99'>
|
||||
<array-type-def dimensions='1' type-id='8901473c' size-in-bits='576' id='f5da478b'>
|
||||
@ -7830,7 +7865,7 @@
|
||||
</data-member>
|
||||
</class-decl>
|
||||
<typedef-decl name='vdev_cbdata_t' type-id='b8006be8' id='a9679c94'/>
|
||||
<class-decl name='zprop_get_cbdata' size-in-bits='832' is-struct='yes' visibility='default' id='f3d3c319'>
|
||||
<class-decl name='zprop_get_cbdata' size-in-bits='960' is-struct='yes' visibility='default' id='f3d3c319'>
|
||||
<data-member access='public' layout-offset-in-bits='0'>
|
||||
<var-decl name='cb_sources' type-id='95e97e5e' visibility='default'/>
|
||||
</data-member>
|
||||
@ -7849,6 +7884,9 @@
|
||||
<data-member access='public' layout-offset-in-bits='448'>
|
||||
<var-decl name='cb_first' type-id='c19b74c3' visibility='default'/>
|
||||
</data-member>
|
||||
<data-member access='public' layout-offset-in-bits='480'>
|
||||
<var-decl name='cb_json' type-id='c19b74c3' visibility='default'/>
|
||||
</data-member>
|
||||
<data-member access='public' layout-offset-in-bits='512'>
|
||||
<var-decl name='cb_proplist' type-id='3a9b2288' visibility='default'/>
|
||||
</data-member>
|
||||
@ -7858,6 +7896,15 @@
|
||||
<data-member access='public' layout-offset-in-bits='640'>
|
||||
<var-decl name='cb_vdevs' type-id='a9679c94' visibility='default'/>
|
||||
</data-member>
|
||||
<data-member access='public' layout-offset-in-bits='832'>
|
||||
<var-decl name='cb_jsobj' type-id='5ce45b60' visibility='default'/>
|
||||
</data-member>
|
||||
<data-member access='public' layout-offset-in-bits='896'>
|
||||
<var-decl name='cb_json_as_int' type-id='c19b74c3' visibility='default'/>
|
||||
</data-member>
|
||||
<data-member access='public' layout-offset-in-bits='928'>
|
||||
<var-decl name='cb_json_pool_key_guid' type-id='c19b74c3' visibility='default'/>
|
||||
</data-member>
|
||||
</class-decl>
|
||||
<typedef-decl name='zprop_get_cbdata_t' type-id='f3d3c319' id='f3d87113'/>
|
||||
<typedef-decl name='zprop_func' type-id='2e711a2a' id='1ec3747a'/>
|
||||
@ -7961,6 +8008,11 @@
|
||||
<qualified-type-def type-id='d33f11cb' restrict='yes' id='5c53ba29'/>
|
||||
<pointer-type-def type-id='ffa52b96' size-in-bits='64' id='76c8174b'/>
|
||||
<pointer-type-def type-id='f3d87113' size-in-bits='64' id='0d2a0670'/>
|
||||
<function-decl name='nvlist_print_json' visibility='default' binding='global' size-in-bits='64'>
|
||||
<parameter type-id='822cd80b'/>
|
||||
<parameter type-id='5ce45b60'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'>
|
||||
<parameter type-id='b0382bb3'/>
|
||||
<parameter type-id='4c81de99'/>
|
||||
@ -8068,6 +8120,11 @@
|
||||
<parameter type-id='d33f11cb'/>
|
||||
<return type-id='48b5725f'/>
|
||||
</function-decl>
|
||||
<function-decl name='putc' visibility='default' binding='global' size-in-bits='64'>
|
||||
<parameter type-id='95e97e5e'/>
|
||||
<parameter type-id='822cd80b'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='puts' visibility='default' binding='global' size-in-bits='64'>
|
||||
<parameter type-id='80f4b756'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
@ -8086,6 +8143,11 @@
|
||||
<parameter type-id='95e97e5e'/>
|
||||
<return type-id='48b5725f'/>
|
||||
</function-decl>
|
||||
<function-decl name='strspn' visibility='default' binding='global' size-in-bits='64'>
|
||||
<parameter type-id='80f4b756'/>
|
||||
<parameter type-id='80f4b756'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
<function-decl name='strnlen' visibility='default' binding='global' size-in-bits='64'>
|
||||
<parameter type-id='80f4b756'/>
|
||||
<parameter type-id='b59d7dce'/>
|
||||
@ -8285,12 +8347,12 @@
|
||||
<function-decl name='zfs_version_print' mangled-name='zfs_version_print' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_print'>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='use_color' mangled-name='use_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='use_color'>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_version_nvlist' mangled-name='zfs_version_nvlist' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_nvlist'>
|
||||
<return type-id='5ce45b60'/>
|
||||
</function-decl>
|
||||
<function-decl name='use_color' mangled-name='use_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='use_color'>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='printf_color' mangled-name='printf_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='printf_color'>
|
||||
<parameter type-id='80f4b756' name='color'/>
|
||||
<parameter type-id='80f4b756' name='format'/>
|
||||
@ -8795,11 +8857,6 @@
|
||||
<parameter type-id='78c01427'/>
|
||||
<return type-id='13956559'/>
|
||||
</function-decl>
|
||||
<function-decl name='strspn' visibility='default' binding='global' size-in-bits='64'>
|
||||
<parameter type-id='80f4b756'/>
|
||||
<parameter type-id='80f4b756'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_dirnamelen' mangled-name='zfs_dirnamelen' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_dirnamelen'>
|
||||
<parameter type-id='80f4b756' name='path'/>
|
||||
<return type-id='79a0948f'/>
|
||||
@ -9131,8 +9188,8 @@
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
|
||||
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='18368' id='b93e4d14'>
|
||||
<subrange length='41' type-id='7359adad' id='cb834f44'/>
|
||||
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='18816' id='b937914f'>
|
||||
<subrange length='42' type-id='7359adad' id='cb7c937f'/>
|
||||
</array-type-def>
|
||||
<enum-decl name='zfeature_flags' id='6db816a4'>
|
||||
<underlying-type type-id='9cac1fee'/>
|
||||
@ -9209,7 +9266,7 @@
|
||||
<pointer-type-def type-id='611586a1' size-in-bits='64' id='2e243169'/>
|
||||
<qualified-type-def type-id='eaa32e2f' const='yes' id='83be723c'/>
|
||||
<pointer-type-def type-id='83be723c' size-in-bits='64' id='7acd98a2'/>
|
||||
<var-decl name='spa_feature_table' type-id='b93e4d14' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
|
||||
<var-decl name='spa_feature_table' type-id='b937914f' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
|
||||
<var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/>
|
||||
<function-decl name='opendir' visibility='default' binding='global' size-in-bits='64'>
|
||||
<parameter type-id='80f4b756'/>
|
||||
@ -9781,6 +9838,50 @@
|
||||
<return type-id='c19b74c3'/>
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='module/zcommon/zfs_valstr.c' language='LANG_C99'>
|
||||
<function-decl name='zfs_valstr_zio_flag' mangled-name='zfs_valstr_zio_flag' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_flag'>
|
||||
<parameter type-id='9c313c2d' name='bits'/>
|
||||
<parameter type-id='26a90f95' name='out'/>
|
||||
<parameter type-id='b59d7dce' name='outlen'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_valstr_zio_flag_bits' mangled-name='zfs_valstr_zio_flag_bits' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_flag_bits'>
|
||||
<parameter type-id='9c313c2d' name='bits'/>
|
||||
<parameter type-id='26a90f95' name='out'/>
|
||||
<parameter type-id='b59d7dce' name='outlen'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_valstr_zio_flag_pairs' mangled-name='zfs_valstr_zio_flag_pairs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_flag_pairs'>
|
||||
<parameter type-id='9c313c2d' name='bits'/>
|
||||
<parameter type-id='26a90f95' name='out'/>
|
||||
<parameter type-id='b59d7dce' name='outlen'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_valstr_zio_stage' mangled-name='zfs_valstr_zio_stage' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_stage'>
|
||||
<parameter type-id='9c313c2d' name='bits'/>
|
||||
<parameter type-id='26a90f95' name='out'/>
|
||||
<parameter type-id='b59d7dce' name='outlen'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_valstr_zio_stage_bits' mangled-name='zfs_valstr_zio_stage_bits' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_stage_bits'>
|
||||
<parameter type-id='9c313c2d' name='bits'/>
|
||||
<parameter type-id='26a90f95' name='out'/>
|
||||
<parameter type-id='b59d7dce' name='outlen'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_valstr_zio_stage_pairs' mangled-name='zfs_valstr_zio_stage_pairs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_stage_pairs'>
|
||||
<parameter type-id='9c313c2d' name='bits'/>
|
||||
<parameter type-id='26a90f95' name='out'/>
|
||||
<parameter type-id='b59d7dce' name='outlen'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_valstr_zio_priority' mangled-name='zfs_valstr_zio_priority' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_valstr_zio_priority'>
|
||||
<parameter type-id='95e97e5e' name='v'/>
|
||||
<parameter type-id='26a90f95' name='out'/>
|
||||
<parameter type-id='b59d7dce' name='outlen'/>
|
||||
<return type-id='b59d7dce'/>
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='module/zcommon/zpool_prop.c' language='LANG_C99'>
|
||||
<function-decl name='zpool_prop_string_to_index' mangled-name='zpool_prop_string_to_index' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_string_to_index'>
|
||||
<parameter type-id='5d0c23fb' name='prop'/>
|
||||
|
@ -3733,6 +3733,13 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
break;
|
||||
|
||||
case ZFS_ERR_ASHIFT_MISMATCH:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"The new device cannot have a higher alignment requirement "
|
||||
"than the top-level vdev."));
|
||||
(void) zfs_error(hdl, EZFS_BADTARGET, errbuf);
|
||||
break;
|
||||
default:
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
@ -4303,22 +4310,55 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
|
||||
|
||||
/*
|
||||
* Change the GUID for a pool.
|
||||
*
|
||||
* Similar to zpool_reguid(), but may take a GUID.
|
||||
*
|
||||
* If the guid argument is NULL, then no GUID is passed in the nvlist to the
|
||||
* ioctl().
|
||||
*/
|
||||
int
|
||||
zpool_reguid(zpool_handle_t *zhp)
|
||||
zpool_set_guid(zpool_handle_t *zhp, const uint64_t *guid)
|
||||
{
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
nvlist_t *nvl = NULL;
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
int error = -1;
|
||||
|
||||
if (guid != NULL) {
|
||||
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
|
||||
return (no_memory(hdl));
|
||||
|
||||
if (nvlist_add_uint64(nvl, ZPOOL_REGUID_GUID, *guid) != 0) {
|
||||
nvlist_free(nvl);
|
||||
return (no_memory(hdl));
|
||||
}
|
||||
|
||||
zcmd_write_src_nvlist(hdl, &zc, nvl);
|
||||
}
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
|
||||
return (0);
|
||||
error = zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc);
|
||||
if (error) {
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
if (guid != NULL) {
|
||||
zcmd_free_nvlists(&zc);
|
||||
nvlist_free(nvl);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
/*
|
||||
* Change the GUID for a pool.
|
||||
*/
|
||||
int
|
||||
zpool_reguid(zpool_handle_t *zhp)
|
||||
{
|
||||
return (zpool_set_guid(zhp, NULL));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5609,3 +5649,31 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname,
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prune older entries from the DDT to reclaim space under the quota
|
||||
*/
|
||||
int
|
||||
zpool_ddt_prune(zpool_handle_t *zhp, zpool_ddt_prune_unit_t unit,
|
||||
uint64_t amount)
|
||||
{
|
||||
int error = lzc_ddt_prune(zhp->zpool_name, unit, amount);
|
||||
if (error != 0) {
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot prune dedup table on '%s'"), zhp->zpool_name);
|
||||
|
||||
if (error == EALREADY) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"a prune operation is already in progress"));
|
||||
(void) zfs_error(hdl, EZFS_BUSY, errbuf);
|
||||
} else {
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -162,6 +162,7 @@
|
||||
<elf-symbol name='lzc_channel_program_nosync' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='lzc_clone' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='lzc_create' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='lzc_ddt_prune' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='lzc_destroy' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='lzc_destroy_bookmarks' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='lzc_destroy_snaps' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -1444,6 +1445,7 @@
|
||||
<enumerator name='ZFS_IOC_VDEV_SET_PROPS' value='23126'/>
|
||||
<enumerator name='ZFS_IOC_POOL_SCRUB' value='23127'/>
|
||||
<enumerator name='ZFS_IOC_POOL_PREFETCH' value='23128'/>
|
||||
<enumerator name='ZFS_IOC_DDT_PRUNE' value='23129'/>
|
||||
<enumerator name='ZFS_IOC_PLATFORM' value='23168'/>
|
||||
<enumerator name='ZFS_IOC_EVENTS_NEXT' value='23169'/>
|
||||
<enumerator name='ZFS_IOC_EVENTS_CLEAR' value='23170'/>
|
||||
@ -1484,6 +1486,13 @@
|
||||
<enumerator name='ZPOOL_PREFETCH_DDT' value='1'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='zpool_prefetch_type_t' type-id='0299ab50' id='e55ff6bc'/>
|
||||
<enum-decl name='zpool_ddt_prune_unit_t' naming-typedef-id='02e25ab0' id='509ae11c'>
|
||||
<underlying-type type-id='9cac1fee'/>
|
||||
<enumerator name='ZPOOL_DDT_PRUNE_NONE' value='0'/>
|
||||
<enumerator name='ZPOOL_DDT_PRUNE_AGE' value='1'/>
|
||||
<enumerator name='ZPOOL_DDT_PRUNE_PERCENTAGE' value='2'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='zpool_ddt_prune_unit_t' type-id='509ae11c' id='02e25ab0'/>
|
||||
<enum-decl name='data_type_t' naming-typedef-id='8d0687d2' id='aeeae136'>
|
||||
<underlying-type type-id='9cac1fee'/>
|
||||
<enumerator name='DATA_TYPE_DONTCARE' value='-1'/>
|
||||
@ -3015,6 +3024,12 @@
|
||||
<parameter type-id='857bb57e' name='outnvl'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-decl name='lzc_ddt_prune' mangled-name='lzc_ddt_prune' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzc_ddt_prune'>
|
||||
<parameter type-id='80f4b756' name='pool'/>
|
||||
<parameter type-id='02e25ab0' name='unit'/>
|
||||
<parameter type-id='9c313c2d' name='amount'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
<function-type size-in-bits='64' id='c70fa2e8'>
|
||||
<parameter type-id='95e97e5e'/>
|
||||
<parameter type-id='eaa32e2f'/>
|
||||
|
@ -1927,3 +1927,25 @@ lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
|
||||
{
|
||||
return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
|
||||
}
|
||||
|
||||
/*
|
||||
* Prune the specified amount from the pool's dedup table.
|
||||
*/
|
||||
int
|
||||
lzc_ddt_prune(const char *pool, zpool_ddt_prune_unit_t unit, uint64_t amount)
|
||||
{
|
||||
int error;
|
||||
|
||||
nvlist_t *result = NULL;
|
||||
nvlist_t *args = fnvlist_alloc();
|
||||
|
||||
fnvlist_add_int32(args, DDT_PRUNE_UNIT, unit);
|
||||
fnvlist_add_uint64(args, DDT_PRUNE_AMOUNT, amount);
|
||||
|
||||
error = lzc_ioctl(ZFS_IOC_DDT_PRUNE, pool, args, &result);
|
||||
|
||||
fnvlist_free(args);
|
||||
fnvlist_free(result);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
@ -1,7 +1,9 @@
|
||||
include $(srcdir)/%D%/include/Makefile.am
|
||||
|
||||
libzpool_la_CFLAGS = $(AM_CFLAGS) $(KERNEL_CFLAGS) $(LIBRARY_CFLAGS)
|
||||
libzpool_la_CFLAGS += $(ZLIB_CFLAGS)
|
||||
|
||||
libzpool_la_CPPFLAGS = $(AM_CPPFLAGS) $(FORCEDEBUG_CPPFLAGS)
|
||||
libzpool_la_CPPFLAGS = $(AM_CPPFLAGS) $(LIBZPOOL_CPPFLAGS)
|
||||
libzpool_la_CPPFLAGS += -I$(srcdir)/include/os/@ac_system_l@/zfs
|
||||
libzpool_la_CPPFLAGS += -DLIB_ZPOOL_BUILD
|
||||
|
||||
@ -9,6 +11,7 @@ lib_LTLIBRARIES += libzpool.la
|
||||
CPPCHECKTARGETS += libzpool.la
|
||||
|
||||
dist_libzpool_la_SOURCES = \
|
||||
%D%/abd_os.c \
|
||||
%D%/kernel.c \
|
||||
%D%/taskq.c \
|
||||
%D%/util.c
|
||||
@ -39,7 +42,6 @@ nodist_libzpool_la_SOURCES = \
|
||||
module/lua/lvm.c \
|
||||
module/lua/lzio.c \
|
||||
\
|
||||
module/os/linux/zfs/abd_os.c \
|
||||
module/os/linux/zfs/arc_os.c \
|
||||
module/os/linux/zfs/trace.c \
|
||||
module/os/linux/zfs/vdev_file.c \
|
||||
@ -62,6 +64,7 @@ nodist_libzpool_la_SOURCES = \
|
||||
module/zcommon/zfs_fletcher_superscalar4.c \
|
||||
module/zcommon/zfs_namecheck.c \
|
||||
module/zcommon/zfs_prop.c \
|
||||
module/zcommon/zfs_valstr.c \
|
||||
module/zcommon/zpool_prop.c \
|
||||
module/zcommon/zprop_common.c \
|
||||
\
|
||||
@ -79,6 +82,7 @@ nodist_libzpool_la_SOURCES = \
|
||||
module/zfs/dbuf.c \
|
||||
module/zfs/dbuf_stats.c \
|
||||
module/zfs/ddt.c \
|
||||
module/zfs/ddt_log.c \
|
||||
module/zfs/ddt_stats.c \
|
||||
module/zfs/ddt_zap.c \
|
||||
module/zfs/dmu.c \
|
||||
|
365
sys/contrib/openzfs/lib/libzpool/abd_os.c
Normal file
365
sys/contrib/openzfs/lib/libzpool/abd_os.c
Normal file
@ -0,0 +1,365 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/abd_impl.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
/*
|
||||
* We're simulating scatter/gather with 4K allocations, since that's more like
|
||||
* what a typical kernel does.
|
||||
*/
|
||||
#define ABD_PAGESIZE (4096)
|
||||
#define ABD_PAGESHIFT (12)
|
||||
#define ABD_PAGEMASK (ABD_PAGESIZE-1)
|
||||
|
||||
/*
|
||||
* See rationale in module/os/linux/zfs/abd_os.c, but in userspace this is
|
||||
* mostly useful to get a mix of linear and scatter ABDs for testing.
|
||||
*/
|
||||
#define ABD_SCATTER_MIN_SIZE (512 * 3)
|
||||
|
||||
abd_t *abd_zero_scatter = NULL;
|
||||
|
||||
static uint_t
|
||||
abd_iovcnt_for_bytes(size_t size)
|
||||
{
|
||||
/*
|
||||
* Each iovec points to a 4K page. There's no real reason to do this
|
||||
* in userspace, but our whole point here is to make it feel a bit
|
||||
* more like a real paged memory model.
|
||||
*/
|
||||
return (P2ROUNDUP(size, ABD_PAGESIZE) / ABD_PAGESIZE);
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_alloc_struct_impl(size_t size)
|
||||
{
|
||||
/*
|
||||
* Zero-sized means it will be used for a linear or gang abd, so just
|
||||
* allocate the abd itself and return.
|
||||
*/
|
||||
if (size == 0)
|
||||
return (umem_alloc(sizeof (abd_t), UMEM_NOFAIL));
|
||||
|
||||
/*
|
||||
* Allocating for a scatter abd, so compute how many ABD_PAGESIZE
|
||||
* iovecs we will need to hold this size. Append that allocation to the
|
||||
* end. Note that struct abd_scatter has includes abd_iov[1], so we
|
||||
* allocate one less iovec than we need.
|
||||
*
|
||||
* Note we're not allocating the pages proper, just the iovec pointers.
|
||||
* That's down in abd_alloc_chunks. We _could_ do it here in a single
|
||||
* allocation, but it's fiddly and harder to read for no real gain.
|
||||
*/
|
||||
uint_t n = abd_iovcnt_for_bytes(size);
|
||||
abd_t *abd = umem_alloc(sizeof (abd_t) + (n-1) * sizeof (struct iovec),
|
||||
UMEM_NOFAIL);
|
||||
ABD_SCATTER(abd).abd_offset = 0;
|
||||
ABD_SCATTER(abd).abd_iovcnt = n;
|
||||
return (abd);
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_struct_impl(abd_t *abd)
|
||||
{
|
||||
/* For scatter, compute the extra amount we need to free */
|
||||
uint_t iovcnt =
|
||||
abd_is_linear(abd) || abd_is_gang(abd) ?
|
||||
0 : (ABD_SCATTER(abd).abd_iovcnt - 1);
|
||||
umem_free(abd, sizeof (abd_t) + iovcnt * sizeof (struct iovec));
|
||||
}
|
||||
|
||||
void
|
||||
abd_alloc_chunks(abd_t *abd, size_t size)
|
||||
{
|
||||
/*
|
||||
* We've already allocated the iovec array; ensure that the wanted size
|
||||
* actually matches, otherwise the caller has made a mistake somewhere.
|
||||
*/
|
||||
uint_t n = ABD_SCATTER(abd).abd_iovcnt;
|
||||
ASSERT3U(n, ==, abd_iovcnt_for_bytes(size));
|
||||
|
||||
/*
|
||||
* Allocate a ABD_PAGESIZE region for each iovec.
|
||||
*/
|
||||
struct iovec *iov = ABD_SCATTER(abd).abd_iov;
|
||||
for (int i = 0; i < n; i++) {
|
||||
iov[i].iov_base =
|
||||
umem_alloc_aligned(ABD_PAGESIZE, ABD_PAGESIZE, UMEM_NOFAIL);
|
||||
iov[i].iov_len = ABD_PAGESIZE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_chunks(abd_t *abd)
|
||||
{
|
||||
uint_t n = ABD_SCATTER(abd).abd_iovcnt;
|
||||
struct iovec *iov = ABD_SCATTER(abd).abd_iov;
|
||||
for (int i = 0; i < n; i++)
|
||||
umem_free_aligned(iov[i].iov_base, ABD_PAGESIZE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
abd_size_alloc_linear(size_t size)
|
||||
{
|
||||
return (size < ABD_SCATTER_MIN_SIZE);
|
||||
}
|
||||
|
||||
void
|
||||
abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
||||
{
|
||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||
int waste = P2ROUNDUP(abd->abd_size, ABD_PAGESIZE) - abd->abd_size;
|
||||
if (op == ABDSTAT_INCR) {
|
||||
arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);
|
||||
} else {
|
||||
arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
|
||||
{
|
||||
(void) abd;
|
||||
(void) op;
|
||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||
}
|
||||
|
||||
void
|
||||
abd_verify_scatter(abd_t *abd)
|
||||
{
|
||||
#ifdef ZFS_DEBUG
|
||||
/*
|
||||
* scatter abds shall have:
|
||||
* - at least one iovec
|
||||
* - all iov_base point somewhere
|
||||
* - all iov_len are ABD_PAGESIZE
|
||||
* - offset set within the abd pages somewhere
|
||||
*/
|
||||
uint_t n = ABD_SCATTER(abd).abd_iovcnt;
|
||||
ASSERT3U(n, >, 0);
|
||||
|
||||
uint_t len = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
ASSERT3P(ABD_SCATTER(abd).abd_iov[i].iov_base, !=, NULL);
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_iov[i].iov_len, ==, ABD_PAGESIZE);
|
||||
len += ABD_PAGESIZE;
|
||||
}
|
||||
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_offset, <, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
abd_init(void)
|
||||
{
|
||||
/*
|
||||
* Create the "zero" scatter abd. This is always the size of the
|
||||
* largest possible block, but only actually has a single allocated
|
||||
* page, which all iovecs in the abd point to.
|
||||
*/
|
||||
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
|
||||
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
|
||||
|
||||
void *zero =
|
||||
umem_alloc_aligned(ABD_PAGESIZE, ABD_PAGESIZE, UMEM_NOFAIL);
|
||||
memset(zero, 0, ABD_PAGESIZE);
|
||||
|
||||
uint_t n = abd_iovcnt_for_bytes(SPA_MAXBLOCKSIZE);
|
||||
struct iovec *iov = ABD_SCATTER(abd_zero_scatter).abd_iov;
|
||||
for (int i = 0; i < n; i++) {
|
||||
iov[i].iov_base = zero;
|
||||
iov[i].iov_len = ABD_PAGESIZE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_fini(void)
|
||||
{
|
||||
umem_free_aligned(
|
||||
ABD_SCATTER(abd_zero_scatter).abd_iov[0].iov_base, ABD_PAGESIZE);
|
||||
abd_free_struct(abd_zero_scatter);
|
||||
abd_zero_scatter = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_linear_page(abd_t *abd)
|
||||
{
|
||||
/*
|
||||
* LINEAR_PAGE is specific to the Linux kernel; we never set this
|
||||
* flag, so this will never be called.
|
||||
*/
|
||||
(void) abd;
|
||||
PANIC("unreachable");
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_alloc_for_io(size_t size, boolean_t is_metadata)
|
||||
{
|
||||
return (abd_alloc(size, is_metadata));
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_get_offset_scatter(abd_t *dabd, abd_t *sabd, size_t off, size_t size)
|
||||
{
|
||||
|
||||
/*
|
||||
* Create a new scatter dabd by borrowing data pages from sabd to cover
|
||||
* off+size.
|
||||
*
|
||||
* sabd is an existing scatter abd with a set of iovecs, each covering
|
||||
* an ABD_PAGESIZE (4K) allocation. It's "zero" is at abd_offset.
|
||||
*
|
||||
* [........][........][........][........]
|
||||
* ^- sabd_offset
|
||||
*
|
||||
* We want to produce a new abd, referencing those allocations at the
|
||||
* given offset.
|
||||
*
|
||||
* [........][........][........][........]
|
||||
* ^- dabd_offset = sabd_offset + off
|
||||
* ^- dabd_offset + size
|
||||
*
|
||||
* In this example, dabd needs three iovecs. The first iovec is offset
|
||||
* 0, so the final dabd_offset is masked back into the first iovec.
|
||||
*
|
||||
* [........][........][........]
|
||||
* ^- dabd_offset
|
||||
*/
|
||||
size_t soff = ABD_SCATTER(sabd).abd_offset + off;
|
||||
size_t doff = soff & ABD_PAGEMASK;
|
||||
size_t iovcnt = abd_iovcnt_for_bytes(doff + size);
|
||||
|
||||
/*
|
||||
* If the passed-in abd has enough allocated iovecs already, reuse it.
|
||||
* Otherwise, make a new one. The caller will free the original if the
|
||||
* one it gets back is not the same.
|
||||
*
|
||||
* Note that it's ok if we reuse an abd with more iovecs than we need.
|
||||
* abd_size has the usable amount of data, and the abd does not own the
|
||||
* pages referenced by the iovecs. At worst, they're holding dangling
|
||||
* pointers that we'll never use anyway.
|
||||
*/
|
||||
if (dabd == NULL || ABD_SCATTER(dabd).abd_iovcnt < iovcnt)
|
||||
dabd = abd_alloc_struct(iovcnt << ABD_PAGESHIFT);
|
||||
|
||||
/* Set offset into first page in view */
|
||||
ABD_SCATTER(dabd).abd_offset = doff;
|
||||
|
||||
/* Copy the wanted iovecs from the source to the dest */
|
||||
memcpy(&ABD_SCATTER(dabd).abd_iov[0],
|
||||
&ABD_SCATTER(sabd).abd_iov[soff >> ABD_PAGESHIFT],
|
||||
iovcnt * sizeof (struct iovec));
|
||||
|
||||
return (dabd);
|
||||
}
|
||||
|
||||
void
|
||||
abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
||||
{
|
||||
ASSERT(!abd_is_gang(abd));
|
||||
abd_verify(abd);
|
||||
memset(aiter, 0, sizeof (struct abd_iter));
|
||||
aiter->iter_abd = abd;
|
||||
}
|
||||
|
||||
boolean_t
|
||||
abd_iter_at_end(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);
|
||||
return (aiter->iter_pos == aiter->iter_abd->abd_size);
|
||||
}
|
||||
|
||||
void
|
||||
abd_iter_advance(struct abd_iter *aiter, size_t amount)
|
||||
{
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
aiter->iter_pos += amount;
|
||||
ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);
|
||||
}
|
||||
|
||||
void
|
||||
abd_iter_map(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
if (abd_is_linear(aiter->iter_abd)) {
|
||||
aiter->iter_mapaddr =
|
||||
ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos;
|
||||
aiter->iter_mapsize =
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* For scatter, we index into the appropriate iovec, and return the
|
||||
* smaller of the amount requested, or up to the end of the page.
|
||||
*/
|
||||
size_t poff = aiter->iter_pos + ABD_SCATTER(aiter->iter_abd).abd_offset;
|
||||
|
||||
ASSERT3U(poff >> ABD_PAGESHIFT, <=,
|
||||
ABD_SCATTER(aiter->iter_abd).abd_iovcnt);
|
||||
struct iovec *iov = &ABD_SCATTER(aiter->iter_abd).
|
||||
abd_iov[poff >> ABD_PAGESHIFT];
|
||||
|
||||
aiter->iter_mapsize = MIN(ABD_PAGESIZE - (poff & ABD_PAGEMASK),
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
ASSERT3U(aiter->iter_mapsize, <=, ABD_PAGESIZE);
|
||||
|
||||
aiter->iter_mapaddr = iov->iov_base + (poff & ABD_PAGEMASK);
|
||||
}
|
||||
|
||||
void
|
||||
abd_iter_unmap(struct abd_iter *aiter)
|
||||
{
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
ASSERT3U(aiter->iter_mapsize, >, 0);
|
||||
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
}
|
||||
|
||||
void
|
||||
abd_cache_reap_now(void)
|
||||
{
|
||||
}
|
4
sys/contrib/openzfs/lib/libzpool/include/Makefile.am
Normal file
4
sys/contrib/openzfs/lib/libzpool/include/Makefile.am
Normal file
@ -0,0 +1,4 @@
|
||||
libzpooldir = $(includedir)/libzpool
|
||||
libzpool_HEADERS = \
|
||||
%D%/sys/abd_os.h \
|
||||
%D%/sys/abd_impl_os.h
|
41
sys/contrib/openzfs/lib/libzpool/include/sys/abd_impl_os.h
Normal file
41
sys/contrib/openzfs/lib/libzpool/include/sys/abd_impl_os.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_IMPL_OS_H
|
||||
#define _ABD_IMPL_OS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define abd_enter_critical(flags) ((void)0)
|
||||
#define abd_exit_critical(flags) ((void)0)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ABD_IMPL_OS_H */
|
47
sys/contrib/openzfs/lib/libzpool/include/sys/abd_os.h
Normal file
47
sys/contrib/openzfs/lib/libzpool/include/sys/abd_os.h
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_OS_H
|
||||
#define _ABD_OS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct abd_scatter {
|
||||
uint_t abd_offset;
|
||||
uint_t abd_iovcnt;
|
||||
struct iovec abd_iov[1]; /* actually variable-length */
|
||||
};
|
||||
|
||||
struct abd_linear {
|
||||
void *abd_buf;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ABD_H */
|
@ -72,6 +72,7 @@ dist_man_MANS = \
|
||||
%D%/man8/zpool-create.8 \
|
||||
%D%/man8/zpool-destroy.8 \
|
||||
%D%/man8/zpool-detach.8 \
|
||||
%D%/man8/zpool-ddtprune.8 \
|
||||
%D%/man8/zpool-events.8 \
|
||||
%D%/man8/zpool-export.8 \
|
||||
%D%/man8/zpool-get.8 \
|
||||
|
@ -175,17 +175,6 @@ Increasing this value will
|
||||
result in a slower thread creation rate which may be preferable for some
|
||||
configurations.
|
||||
.
|
||||
.It Sy spl_max_show_tasks Ns = Ns Sy 512 Pq uint
|
||||
The maximum number of tasks per pending list in each taskq shown in
|
||||
.Pa /proc/spl/taskq{,-all} .
|
||||
Write
|
||||
.Sy 0
|
||||
to turn off the limit.
|
||||
The proc file will walk the lists with lock held,
|
||||
reading it could cause a lock-up if the list grow too large
|
||||
without limiting the output.
|
||||
"(truncated)" will be shown if the list is larger than the limit.
|
||||
.
|
||||
.It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 5000 Pq uint
|
||||
Minimum idle threads exit interval for dynamic taskqs.
|
||||
Smaller values allow idle threads exit more often and potentially be
|
||||
|
@ -77,6 +77,17 @@ the array is dynamically sized based on total system memory.
|
||||
dnode slots allocated in a single operation as a power of 2.
|
||||
The default value minimizes lock contention for the bulk operation performed.
|
||||
.
|
||||
.It Sy dmu_ddt_copies Ns = Ns Sy 3 Pq uint
|
||||
Controls the number of copies stored for DeDup Table
|
||||
.Pq DDT
|
||||
objects.
|
||||
Reducing the number of copies to 1 from the previous default of 3
|
||||
can reduce the write inflation caused by deduplication.
|
||||
This assumes redundancy for this data is provided by the vdev layer.
|
||||
If the DDT is damaged, space may be leaked
|
||||
.Pq not freed
|
||||
when the DDT can not report the correct reference count.
|
||||
.
|
||||
.It Sy dmu_prefetch_max Ns = Ns Sy 134217728 Ns B Po 128 MiB Pc Pq uint
|
||||
Limit the amount we can prefetch with one call to this amount in bytes.
|
||||
This helps to limit the amount of memory that can be used by prefetching.
|
||||
@ -121,20 +132,26 @@ Controls whether buffers present on special vdevs are eligible for caching
|
||||
into L2ARC.
|
||||
If set to 1, exclude dbufs on special vdevs from being cached to L2ARC.
|
||||
.
|
||||
.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||
.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Ns | Ns 2 Pq int
|
||||
Controls whether only MFU metadata and data are cached from ARC into L2ARC.
|
||||
This may be desired to avoid wasting space on L2ARC when reading/writing large
|
||||
amounts of data that are not expected to be accessed more than once.
|
||||
.Pp
|
||||
The default is off,
|
||||
The default is 0,
|
||||
meaning both MRU and MFU data and metadata are cached.
|
||||
When turning off this feature, some MRU buffers will still be present
|
||||
in ARC and eventually cached on L2ARC.
|
||||
When turning off this feature (setting it to 0), some MRU buffers will
|
||||
still be present in ARC and eventually cached on L2ARC.
|
||||
.No If Sy l2arc_noprefetch Ns = Ns Sy 0 ,
|
||||
some prefetched buffers will be cached to L2ARC, and those might later
|
||||
transition to MRU, in which case the
|
||||
.Sy l2arc_mru_asize No arcstat will not be Sy 0 .
|
||||
.Pp
|
||||
Setting it to 1 means to L2 cache only MFU data and metadata.
|
||||
.Pp
|
||||
Setting it to 2 means to L2 cache all metadata (MRU+MFU) but
|
||||
only MFU data (ie: MRU data are not cached). This can be the right setting
|
||||
to cache as much metadata as possible even when having high data turnover.
|
||||
.Pp
|
||||
Regardless of
|
||||
.Sy l2arc_noprefetch ,
|
||||
some MFU buffers might be evicted from ARC,
|
||||
@ -821,6 +838,7 @@ This is a limit on how many pages the ARC shrinker makes available for
|
||||
eviction in response to one page allocation attempt.
|
||||
Note that in practice, the kernel's shrinker can ask us to evict
|
||||
up to about four times this for one allocation attempt.
|
||||
To reduce OOM risk, this limit is applied for kswapd reclaims only.
|
||||
.Pp
|
||||
The default limit of
|
||||
.Sy 10000 Pq in practice, Em 160 MiB No per allocation attempt with 4 KiB pages
|
||||
@ -974,6 +992,88 @@ milliseconds until the operation completes.
|
||||
.It Sy zfs_dedup_prefetch Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||
Enable prefetching dedup-ed blocks which are going to be freed.
|
||||
.
|
||||
.It Sy zfs_dedup_log_flush_passes_max Ns = Ns Sy 8 Ns Pq uint
|
||||
Maximum number of dedup log flush passes (iterations) each transaction.
|
||||
.Pp
|
||||
At the start of each transaction, OpenZFS will estimate how many entries it
|
||||
needs to flush out to keep up with the change rate, taking the amount and time
|
||||
taken to flush on previous txgs into account (see
|
||||
.Sy zfs_dedup_log_flush_flow_rate_txgs ) .
|
||||
It will spread this amount into a number of passes.
|
||||
At each pass, it will use the amount already flushed and the total time taken
|
||||
by flushing and by other IO to recompute how much it should do for the remainder
|
||||
of the txg.
|
||||
.Pp
|
||||
Reducing the max number of passes will make flushing more aggressive, flushing
|
||||
out more entries on each pass.
|
||||
This can be faster, but also more likely to compete with other IO.
|
||||
Increasing the max number of passes will put fewer entries onto each pass,
|
||||
keeping the overhead of dedup changes to a minimum but possibly causing a large
|
||||
number of changes to be dumped on the last pass, which can blow out the txg
|
||||
sync time beyond
|
||||
.Sy zfs_txg_timeout .
|
||||
.
|
||||
.It Sy zfs_dedup_log_flush_min_time_ms Ns = Ns Sy 1000 Ns Pq uint
|
||||
Minimum time to spend on dedup log flush each transaction.
|
||||
.Pp
|
||||
At least this long will be spent flushing dedup log entries each transaction,
|
||||
up to
|
||||
.Sy zfs_txg_timeout .
|
||||
This occurs even if doing so would delay the transaction, that is, other IO
|
||||
completes under this time.
|
||||
.
|
||||
.It Sy zfs_dedup_log_flush_entries_min Ns = Ns Sy 1000 Ns Pq uint
|
||||
Flush at least this many entries each transaction.
|
||||
.Pp
|
||||
OpenZFS will estimate how many entries it needs to flush each transaction to
|
||||
keep up with the ingest rate (see
|
||||
.Sy zfs_dedup_log_flush_flow_rate_txgs ) .
|
||||
This sets the minimum for that estimate.
|
||||
Raising it can force OpenZFS to flush more aggressively, keeping the log small
|
||||
and so reducing pool import times, but can make it less able to back off if
|
||||
log flushing would compete with other IO too much.
|
||||
.
|
||||
.It Sy zfs_dedup_log_flush_flow_rate_txgs Ns = Ns Sy 10 Ns Pq uint
|
||||
Number of transactions to use to compute the flow rate.
|
||||
.Pp
|
||||
OpenZFS will estimate how many entries it needs to flush each transaction by
|
||||
monitoring the number of entries changed (ingest rate), number of entries
|
||||
flushed (flush rate) and time spent flushing (flush time rate) and combining
|
||||
these into an overall "flow rate".
|
||||
It will use an exponential weighted moving average over some number of recent
|
||||
transactions to compute these rates.
|
||||
This sets the number of transactions to compute these averages over.
|
||||
Setting it higher can help to smooth out the flow rate in the face of spiky
|
||||
workloads, but will take longer for the flow rate to adjust to a sustained
|
||||
change in the ingress rate.
|
||||
.
|
||||
.It Sy zfs_dedup_log_txg_max Ns = Ns Sy 8 Ns Pq uint
|
||||
Max transactions to before starting to flush dedup logs.
|
||||
.Pp
|
||||
OpenZFS maintains two dedup logs, one receiving new changes, one flushing.
|
||||
If there is nothing to flush, it will accumulate changes for no more than this
|
||||
many transactions before switching the logs and starting to flush entries out.
|
||||
.
|
||||
.It Sy zfs_dedup_log_mem_max Ns = Ns Sy 0 Ns Pq u64
|
||||
Max memory to use for dedup logs.
|
||||
.Pp
|
||||
OpenZFS will spend no more than this much memory on maintaining the in-memory
|
||||
dedup log.
|
||||
Flushing will begin when around half this amount is being spent on logs.
|
||||
The default value of
|
||||
.Sy 0
|
||||
will cause it to be set by
|
||||
.Sy zfs_dedup_log_mem_max_percent
|
||||
instead.
|
||||
.
|
||||
.It Sy zfs_dedup_log_mem_max_percent Ns = Ns Sy 1 Ns % Pq uint
|
||||
Max memory to use for dedup logs, as a percentage of total memory.
|
||||
.Pp
|
||||
If
|
||||
.Sy zfs_dedup_log_mem_max
|
||||
is not set, it will be initialised as a percentage of the total memory in the
|
||||
system.
|
||||
.
|
||||
.It Sy zfs_delay_min_dirty_percent Ns = Ns Sy 60 Ns % Pq uint
|
||||
Start to delay each transaction once there is this amount of dirty data,
|
||||
expressed as a percentage of
|
||||
|
@ -17,8 +17,9 @@
|
||||
.\" Copyright (c) 2019, Klara Inc.
|
||||
.\" Copyright (c) 2019, Allan Jude
|
||||
.\" Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
||||
.\" Copyright (c) 2023, Klara Inc.
|
||||
.\"
|
||||
.Dd June 23, 2022
|
||||
.Dd February 14, 2024
|
||||
.Dt ZPOOL-FEATURES 7
|
||||
.Os
|
||||
.
|
||||
@ -550,6 +551,20 @@ when an encrypted dataset is created and will be returned to the
|
||||
.Sy enabled
|
||||
state when all datasets that use this feature are destroyed.
|
||||
.
|
||||
.feature com.klarasystems fast_dedup yes
|
||||
This feature allows more advanced deduplication features to be enabled on new
|
||||
dedup tables.
|
||||
.Pp
|
||||
This feature will be
|
||||
.Sy active
|
||||
when the first deduplicated block is written after a new dedup table is created
|
||||
(ie after a new pool creation, or new checksum used on a dataset with
|
||||
.Sy dedup
|
||||
enabled).
|
||||
It will be returned to the
|
||||
.Sy enabled
|
||||
state when all deduplicated blocks using it are freed.
|
||||
.
|
||||
.feature com.delphix extensible_dataset no
|
||||
This feature allows more flexible use of internal ZFS data structures,
|
||||
and exists for other features to depend on.
|
||||
|
48
sys/contrib/openzfs/man/man8/zpool-ddtprune.8
Normal file
48
sys/contrib/openzfs/man/man8/zpool-ddtprune.8
Normal file
@ -0,0 +1,48 @@
|
||||
.\"
|
||||
.\" CDDL HEADER START
|
||||
.\"
|
||||
.\" The contents of this file are subject to the terms of the
|
||||
.\" Common Development and Distribution License (the "License").
|
||||
.\" You may not use this file except in compliance with the License.
|
||||
.\"
|
||||
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
.\" or http://www.opensolaris.org/os/licensing.
|
||||
.\" See the License for the specific language governing permissions
|
||||
.\" and limitations under the License.
|
||||
.\"
|
||||
.\" When distributing Covered Code, include this CDDL HEADER in each
|
||||
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
.\" If applicable, add the following below this CDDL HEADER, with the
|
||||
.\" fields enclosed by brackets "[]" replaced with your own identifying
|
||||
.\" information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.\" CDDL HEADER END
|
||||
.\"
|
||||
.\"
|
||||
.\" Copyright (c) 2024, Klara Inc.
|
||||
.\"
|
||||
.Dd June 17, 2024
|
||||
.Dt ZPOOL-DDTPRUNE 8
|
||||
.Os
|
||||
.
|
||||
.Sh NAME
|
||||
.Nm zpool-ddtprune
|
||||
.Nd Prunes the oldest entries from the single reference dedup table(s)
|
||||
.Sh SYNOPSIS
|
||||
.Nm zpool
|
||||
.Cm ddtprune
|
||||
.Fl d Ar days | Fl p Ar percentage
|
||||
.Ar pool
|
||||
.Sh DESCRIPTION
|
||||
This command prunes older unique entries from the dedup table.
|
||||
As a complement to the dedup quota feature,
|
||||
.Sy ddtprune
|
||||
allows removal of older non-duplicate entries to make room for
|
||||
newer duplicate entries.
|
||||
.Pp
|
||||
The amount to prune can be based on a target percentage of the unique entries
|
||||
or based on the age (i.e., every unique entry older than N days).
|
||||
.
|
||||
.Sh SEE ALSO
|
||||
.Xr zdb 8 ,
|
||||
.Xr zpool-status 8
|
@ -25,8 +25,10 @@
|
||||
.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\" Copyright (c) 2024, Klara Inc.
|
||||
.\" Copyright (c) 2024, Mateusz Piotrowski
|
||||
.\"
|
||||
.Dd May 31, 2021
|
||||
.Dd June 21, 2023
|
||||
.Dt ZPOOL-REGUID 8
|
||||
.Os
|
||||
.
|
||||
@ -36,6 +38,7 @@
|
||||
.Sh SYNOPSIS
|
||||
.Nm zpool
|
||||
.Cm reguid
|
||||
.Op Fl g Ar guid
|
||||
.Ar pool
|
||||
.
|
||||
.Sh DESCRIPTION
|
||||
@ -43,6 +46,15 @@ Generates a new unique identifier for the pool.
|
||||
You must ensure that all devices in this pool are online and healthy before
|
||||
performing this action.
|
||||
.
|
||||
.Bl -tag -width Ds
|
||||
.It Fl g Ar guid
|
||||
Set the pool GUID to the provided value.
|
||||
The GUID can be any 64-bit value accepted by
|
||||
.Xr strtoull 3
|
||||
in base 10.
|
||||
.Nm
|
||||
will return an error if the provided GUID is already in use.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr zpool-export 8 ,
|
||||
.Xr zpool-import 8
|
||||
|
@ -592,6 +592,7 @@ don't wait.
|
||||
.Xr zpool-checkpoint 8 ,
|
||||
.Xr zpool-clear 8 ,
|
||||
.Xr zpool-create 8 ,
|
||||
.Xr zpool-ddtprune 8 ,
|
||||
.Xr zpool-destroy 8 ,
|
||||
.Xr zpool-detach 8 ,
|
||||
.Xr zpool-events 8 ,
|
||||
|
@ -16,8 +16,8 @@ src = @abs_srcdir@
|
||||
obj = @abs_builddir@
|
||||
else
|
||||
zfs_include = $(srctree)/include/zfs
|
||||
icp_include = $(srctree)/$(src)/icp/include
|
||||
zstd_include = $(srctree)/$(src)/zstd/include
|
||||
icp_include = $(src)/icp/include
|
||||
zstd_include = $(src)/zstd/include
|
||||
ZFS_MODULE_CFLAGS += -include $(zfs_include)/zfs_config.h
|
||||
endif
|
||||
|
||||
@ -240,6 +240,7 @@ ZCOMMON_OBJS := \
|
||||
zfs_fletcher_superscalar4.o \
|
||||
zfs_namecheck.o \
|
||||
zfs_prop.o \
|
||||
zfs_valstr.o \
|
||||
zpool_prop.o \
|
||||
zprop_common.o
|
||||
|
||||
@ -322,6 +323,7 @@ ZFS_OBJS := \
|
||||
dbuf.o \
|
||||
dbuf_stats.o \
|
||||
ddt.o \
|
||||
ddt_log.o \
|
||||
ddt_stats.o \
|
||||
ddt_zap.o \
|
||||
dmu.o \
|
||||
|
@ -233,6 +233,7 @@ SRCS+= cityhash.c \
|
||||
zfs_fletcher_superscalar.c \
|
||||
zfs_namecheck.c \
|
||||
zfs_prop.c \
|
||||
zfs_valstr.c \
|
||||
zpool_prop.c \
|
||||
zprop_common.c
|
||||
|
||||
@ -252,6 +253,7 @@ SRCS+= abd.c \
|
||||
dbuf.c \
|
||||
dbuf_stats.c \
|
||||
ddt.c \
|
||||
ddt_log.c \
|
||||
ddt_stats.c \
|
||||
ddt_zap.c \
|
||||
dmu.c \
|
||||
@ -426,6 +428,7 @@ CFLAGS.gcc+= -Wno-pointer-to-int-cast
|
||||
|
||||
CFLAGS.abd.c= -Wno-cast-qual
|
||||
CFLAGS.ddt.c= -Wno-cast-qual
|
||||
CFLAGS.ddt_log.c= -Wno-cast-qual -Wno-pointer-arith
|
||||
CFLAGS.ddt_zap.c= -Wno-cast-qual
|
||||
CFLAGS.dmu.c= -Wno-cast-qual
|
||||
CFLAGS.dmu_traverse.c= -Wno-cast-qual
|
||||
|
@ -95,14 +95,12 @@ struct {
|
||||
*/
|
||||
static size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1;
|
||||
|
||||
#if defined(_KERNEL)
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
|
||||
&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
|
||||
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN,
|
||||
&zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations.");
|
||||
#endif
|
||||
|
||||
kmem_cache_t *abd_chunk_cache;
|
||||
static kstat_t *abd_ksp;
|
||||
@ -250,7 +248,7 @@ abd_alloc_zero_scatter(void)
|
||||
|
||||
n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
|
||||
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
|
||||
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
|
||||
|
||||
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
|
||||
|
@ -124,7 +124,6 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
|
||||
|
||||
SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0,
|
||||
"ZFS livelist condense");
|
||||
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
|
||||
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, file, CTLFLAG_RW, 0, "ZFS VDEV file");
|
||||
SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
|
||||
"ZFS VDEV mirror");
|
||||
|
@ -868,16 +868,16 @@ spl_init(void)
|
||||
if ((rc = spl_tsd_init()))
|
||||
goto out2;
|
||||
|
||||
if ((rc = spl_taskq_init()))
|
||||
if ((rc = spl_proc_init()))
|
||||
goto out3;
|
||||
|
||||
if ((rc = spl_kmem_cache_init()))
|
||||
if ((rc = spl_kstat_init()))
|
||||
goto out4;
|
||||
|
||||
if ((rc = spl_proc_init()))
|
||||
if ((rc = spl_taskq_init()))
|
||||
goto out5;
|
||||
|
||||
if ((rc = spl_kstat_init()))
|
||||
if ((rc = spl_kmem_cache_init()))
|
||||
goto out6;
|
||||
|
||||
if ((rc = spl_zlib_init()))
|
||||
@ -891,13 +891,13 @@ spl_init(void)
|
||||
out8:
|
||||
spl_zlib_fini();
|
||||
out7:
|
||||
spl_kstat_fini();
|
||||
out6:
|
||||
spl_proc_fini();
|
||||
out5:
|
||||
spl_kmem_cache_fini();
|
||||
out4:
|
||||
out6:
|
||||
spl_taskq_fini();
|
||||
out5:
|
||||
spl_kstat_fini();
|
||||
out4:
|
||||
spl_proc_fini();
|
||||
out3:
|
||||
spl_tsd_fini();
|
||||
out2:
|
||||
@ -913,10 +913,10 @@ spl_fini(void)
|
||||
{
|
||||
spl_zone_fini();
|
||||
spl_zlib_fini();
|
||||
spl_kstat_fini();
|
||||
spl_proc_fini();
|
||||
spl_kmem_cache_fini();
|
||||
spl_taskq_fini();
|
||||
spl_kstat_fini();
|
||||
spl_proc_fini();
|
||||
spl_tsd_fini();
|
||||
spl_kvmem_fini();
|
||||
spl_random_fini();
|
||||
|
@ -31,7 +31,6 @@
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/proc.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/kmod.h>
|
||||
@ -63,8 +62,6 @@ static struct ctl_table_header *spl_kstat = NULL;
|
||||
static struct proc_dir_entry *proc_spl = NULL;
|
||||
static struct proc_dir_entry *proc_spl_kmem = NULL;
|
||||
static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
|
||||
static struct proc_dir_entry *proc_spl_taskq_all = NULL;
|
||||
static struct proc_dir_entry *proc_spl_taskq = NULL;
|
||||
struct proc_dir_entry *proc_spl_kstat = NULL;
|
||||
|
||||
#ifdef DEBUG_KMEM
|
||||
@ -177,195 +174,6 @@ proc_dohostid(CONST_CTL_TABLE *table, int write,
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_seq_show_headers(struct seq_file *f)
|
||||
{
|
||||
seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
|
||||
"taskq", "act", "nthr", "spwn", "maxt", "pri",
|
||||
"mina", "maxa", "cura", "flags");
|
||||
}
|
||||
|
||||
/* indices into the lheads array below */
|
||||
#define LHEAD_PEND 0
|
||||
#define LHEAD_PRIO 1
|
||||
#define LHEAD_DELAY 2
|
||||
#define LHEAD_WAIT 3
|
||||
#define LHEAD_ACTIVE 4
|
||||
#define LHEAD_SIZE 5
|
||||
|
||||
static unsigned int spl_max_show_tasks = 512;
|
||||
/* CSTYLED */
|
||||
module_param(spl_max_show_tasks, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc");
|
||||
|
||||
static int
|
||||
taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
|
||||
{
|
||||
taskq_t *tq = p;
|
||||
taskq_thread_t *tqt = NULL;
|
||||
spl_wait_queue_entry_t *wq;
|
||||
struct task_struct *tsk;
|
||||
taskq_ent_t *tqe;
|
||||
char name[100];
|
||||
struct list_head *lheads[LHEAD_SIZE], *lh;
|
||||
static char *list_names[LHEAD_SIZE] =
|
||||
{"pend", "prio", "delay", "wait", "active" };
|
||||
int i, j, have_lheads = 0;
|
||||
unsigned long wflags, flags;
|
||||
|
||||
spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
|
||||
spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
|
||||
|
||||
/* get the various lists and check whether they're empty */
|
||||
lheads[LHEAD_PEND] = &tq->tq_pend_list;
|
||||
lheads[LHEAD_PRIO] = &tq->tq_prio_list;
|
||||
lheads[LHEAD_DELAY] = &tq->tq_delay_list;
|
||||
#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
|
||||
lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head;
|
||||
#else
|
||||
lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
|
||||
#endif
|
||||
lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
|
||||
|
||||
for (i = 0; i < LHEAD_SIZE; ++i) {
|
||||
if (list_empty(lheads[i]))
|
||||
lheads[i] = NULL;
|
||||
else
|
||||
++have_lheads;
|
||||
}
|
||||
|
||||
/* early return in non-"all" mode if lists are all empty */
|
||||
if (!allflag && !have_lheads) {
|
||||
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* unlock the waitq quickly */
|
||||
if (!lheads[LHEAD_WAIT])
|
||||
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
||||
|
||||
/* show the base taskq contents */
|
||||
snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance);
|
||||
seq_printf(f, "%-25s ", name);
|
||||
seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
|
||||
tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
|
||||
tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
|
||||
tq->tq_nalloc, tq->tq_flags);
|
||||
|
||||
/* show the active list */
|
||||
if (lheads[LHEAD_ACTIVE]) {
|
||||
j = 0;
|
||||
list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
|
||||
if (j == 0)
|
||||
seq_printf(f, "\t%s:",
|
||||
list_names[LHEAD_ACTIVE]);
|
||||
else if (j == 2) {
|
||||
seq_printf(f, "\n\t ");
|
||||
j = 0;
|
||||
}
|
||||
seq_printf(f, " [%d]%pf(%ps)",
|
||||
tqt->tqt_thread->pid,
|
||||
tqt->tqt_task->tqent_func,
|
||||
tqt->tqt_task->tqent_arg);
|
||||
++j;
|
||||
}
|
||||
seq_printf(f, "\n");
|
||||
}
|
||||
|
||||
for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
|
||||
if (lheads[i]) {
|
||||
j = 0;
|
||||
list_for_each(lh, lheads[i]) {
|
||||
if (spl_max_show_tasks != 0 &&
|
||||
j >= spl_max_show_tasks) {
|
||||
seq_printf(f, "\n\t(truncated)");
|
||||
break;
|
||||
}
|
||||
/* show the wait waitq list */
|
||||
if (i == LHEAD_WAIT) {
|
||||
#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
|
||||
wq = list_entry(lh,
|
||||
spl_wait_queue_entry_t, entry);
|
||||
#else
|
||||
wq = list_entry(lh,
|
||||
spl_wait_queue_entry_t, task_list);
|
||||
#endif
|
||||
if (j == 0)
|
||||
seq_printf(f, "\t%s:",
|
||||
list_names[i]);
|
||||
else if (j % 8 == 0)
|
||||
seq_printf(f, "\n\t ");
|
||||
|
||||
tsk = wq->private;
|
||||
seq_printf(f, " %d", tsk->pid);
|
||||
/* pend, prio and delay lists */
|
||||
} else {
|
||||
tqe = list_entry(lh, taskq_ent_t,
|
||||
tqent_list);
|
||||
if (j == 0)
|
||||
seq_printf(f, "\t%s:",
|
||||
list_names[i]);
|
||||
else if (j % 2 == 0)
|
||||
seq_printf(f, "\n\t ");
|
||||
|
||||
seq_printf(f, " %pf(%ps)",
|
||||
tqe->tqent_func,
|
||||
tqe->tqent_arg);
|
||||
}
|
||||
++j;
|
||||
}
|
||||
seq_printf(f, "\n");
|
||||
}
|
||||
if (lheads[LHEAD_WAIT])
|
||||
spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
taskq_all_seq_show(struct seq_file *f, void *p)
|
||||
{
|
||||
return (taskq_seq_show_impl(f, p, B_TRUE));
|
||||
}
|
||||
|
||||
static int
|
||||
taskq_seq_show(struct seq_file *f, void *p)
|
||||
{
|
||||
return (taskq_seq_show_impl(f, p, B_FALSE));
|
||||
}
|
||||
|
||||
static void *
|
||||
taskq_seq_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
struct list_head *p;
|
||||
loff_t n = *pos;
|
||||
|
||||
down_read(&tq_list_sem);
|
||||
if (!n)
|
||||
taskq_seq_show_headers(f);
|
||||
|
||||
p = tq_list.next;
|
||||
while (n--) {
|
||||
p = p->next;
|
||||
if (p == &tq_list)
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
return (list_entry(p, taskq_t, tq_taskqs));
|
||||
}
|
||||
|
||||
static void *
|
||||
taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
|
||||
{
|
||||
taskq_t *tq = p;
|
||||
|
||||
++*pos;
|
||||
return ((tq->tq_taskqs.next == &tq_list) ?
|
||||
NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
|
||||
}
|
||||
|
||||
static void
|
||||
slab_seq_show_headers(struct seq_file *f)
|
||||
{
|
||||
@ -501,66 +309,6 @@ static const kstat_proc_op_t proc_slab_operations = {
|
||||
#endif
|
||||
};
|
||||
|
||||
static void
|
||||
taskq_seq_stop(struct seq_file *f, void *v)
|
||||
{
|
||||
up_read(&tq_list_sem);
|
||||
}
|
||||
|
||||
static const struct seq_operations taskq_all_seq_ops = {
|
||||
.show = taskq_all_seq_show,
|
||||
.start = taskq_seq_start,
|
||||
.next = taskq_seq_next,
|
||||
.stop = taskq_seq_stop,
|
||||
};
|
||||
|
||||
static const struct seq_operations taskq_seq_ops = {
|
||||
.show = taskq_seq_show,
|
||||
.start = taskq_seq_start,
|
||||
.next = taskq_seq_next,
|
||||
.stop = taskq_seq_stop,
|
||||
};
|
||||
|
||||
static int
|
||||
proc_taskq_all_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return (seq_open(filp, &taskq_all_seq_ops));
|
||||
}
|
||||
|
||||
static int
|
||||
proc_taskq_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return (seq_open(filp, &taskq_seq_ops));
|
||||
}
|
||||
|
||||
static const kstat_proc_op_t proc_taskq_all_operations = {
|
||||
#ifdef HAVE_PROC_OPS_STRUCT
|
||||
.proc_open = proc_taskq_all_open,
|
||||
.proc_read = seq_read,
|
||||
.proc_lseek = seq_lseek,
|
||||
.proc_release = seq_release,
|
||||
#else
|
||||
.open = proc_taskq_all_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
#endif
|
||||
};
|
||||
|
||||
static const kstat_proc_op_t proc_taskq_operations = {
|
||||
#ifdef HAVE_PROC_OPS_STRUCT
|
||||
.proc_open = proc_taskq_open,
|
||||
.proc_read = seq_read,
|
||||
.proc_lseek = seq_lseek,
|
||||
.proc_release = seq_release,
|
||||
#else
|
||||
.open = proc_taskq_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
#endif
|
||||
};
|
||||
|
||||
static struct ctl_table spl_kmem_table[] = {
|
||||
#ifdef DEBUG_KMEM
|
||||
{
|
||||
@ -677,8 +425,6 @@ static void spl_proc_cleanup(void)
|
||||
remove_proc_entry("kstat", proc_spl);
|
||||
remove_proc_entry("slab", proc_spl_kmem);
|
||||
remove_proc_entry("kmem", proc_spl);
|
||||
remove_proc_entry("taskq-all", proc_spl);
|
||||
remove_proc_entry("taskq", proc_spl);
|
||||
remove_proc_entry("spl", NULL);
|
||||
|
||||
#ifndef HAVE_REGISTER_SYSCTL_TABLE
|
||||
@ -761,20 +507,6 @@ spl_proc_init(void)
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl,
|
||||
&proc_taskq_all_operations, NULL);
|
||||
if (proc_spl_taskq_all == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl,
|
||||
&proc_taskq_operations, NULL);
|
||||
if (proc_spl_taskq == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_spl_kmem = proc_mkdir("kmem", proc_spl);
|
||||
if (proc_spl_kmem == NULL) {
|
||||
rc = -EUNATCH;
|
||||
|
@ -22,16 +22,98 @@
|
||||
*
|
||||
* Solaris Porting Layer (SPL) Task Queue Implementation.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
* Copyright (c) 2024, Syneto
|
||||
*/
|
||||
|
||||
#include <sys/timer.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/tsd.h>
|
||||
#include <sys/trace_spl.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/atomic.h>
|
||||
#include <sys/kstat.h>
|
||||
#ifdef HAVE_CPU_HOTPLUG
|
||||
#include <linux/cpuhotplug.h>
|
||||
#endif
|
||||
|
||||
typedef struct taskq_kstats {
|
||||
/* static values, for completeness */
|
||||
kstat_named_t tqks_threads_max;
|
||||
kstat_named_t tqks_entry_pool_min;
|
||||
kstat_named_t tqks_entry_pool_max;
|
||||
|
||||
/* gauges (inc/dec counters, current value) */
|
||||
kstat_named_t tqks_threads_active;
|
||||
kstat_named_t tqks_threads_idle;
|
||||
kstat_named_t tqks_threads_total;
|
||||
kstat_named_t tqks_tasks_pending;
|
||||
kstat_named_t tqks_tasks_priority;
|
||||
kstat_named_t tqks_tasks_total;
|
||||
kstat_named_t tqks_tasks_delayed;
|
||||
kstat_named_t tqks_entries_free;
|
||||
|
||||
/* counters (inc only, since taskq creation) */
|
||||
kstat_named_t tqks_threads_created;
|
||||
kstat_named_t tqks_threads_destroyed;
|
||||
kstat_named_t tqks_tasks_dispatched;
|
||||
kstat_named_t tqks_tasks_dispatched_delayed;
|
||||
kstat_named_t tqks_tasks_executed_normal;
|
||||
kstat_named_t tqks_tasks_executed_priority;
|
||||
kstat_named_t tqks_tasks_executed;
|
||||
kstat_named_t tqks_tasks_delayed_requeued;
|
||||
kstat_named_t tqks_tasks_cancelled;
|
||||
kstat_named_t tqks_thread_wakeups;
|
||||
kstat_named_t tqks_thread_wakeups_nowork;
|
||||
kstat_named_t tqks_thread_sleeps;
|
||||
} taskq_kstats_t;
|
||||
|
||||
static taskq_kstats_t taskq_kstats_template = {
|
||||
{ "threads_max", KSTAT_DATA_UINT64 },
|
||||
{ "entry_pool_min", KSTAT_DATA_UINT64 },
|
||||
{ "entry_pool_max", KSTAT_DATA_UINT64 },
|
||||
{ "threads_active", KSTAT_DATA_UINT64 },
|
||||
{ "threads_idle", KSTAT_DATA_UINT64 },
|
||||
{ "threads_total", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_pending", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_priority", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_total", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_delayed", KSTAT_DATA_UINT64 },
|
||||
{ "entries_free", KSTAT_DATA_UINT64 },
|
||||
|
||||
{ "threads_created", KSTAT_DATA_UINT64 },
|
||||
{ "threads_destroyed", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_dispatched", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_dispatched_delayed", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_executed_normal", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_executed_priority", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_executed", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_delayed_requeued", KSTAT_DATA_UINT64 },
|
||||
{ "tasks_cancelled", KSTAT_DATA_UINT64 },
|
||||
{ "thread_wakeups", KSTAT_DATA_UINT64 },
|
||||
{ "thread_wakeups_nowork", KSTAT_DATA_UINT64 },
|
||||
{ "thread_sleeps", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
#define TQSTAT_INC(tq, stat) wmsum_add(&tq->tq_sums.tqs_##stat, 1)
|
||||
#define TQSTAT_DEC(tq, stat) wmsum_add(&tq->tq_sums.tqs_##stat, -1)
|
||||
|
||||
#define _TQSTAT_MOD_LIST(mod, tq, t) do { \
|
||||
switch (t->tqent_flags & TQENT_LIST_MASK) { \
|
||||
case TQENT_LIST_NONE: ASSERT(list_empty(&t->tqent_list)); break;\
|
||||
case TQENT_LIST_PENDING: mod(tq, tasks_pending); break; \
|
||||
case TQENT_LIST_PRIORITY: mod(tq, tasks_priority); break; \
|
||||
case TQENT_LIST_DELAY: mod(tq, tasks_delayed); break; \
|
||||
} \
|
||||
} while (0)
|
||||
#define TQSTAT_INC_LIST(tq, t) _TQSTAT_MOD_LIST(TQSTAT_INC, tq, t)
|
||||
#define TQSTAT_DEC_LIST(tq, t) _TQSTAT_MOD_LIST(TQSTAT_DEC, tq, t)
|
||||
|
||||
#define TQENT_SET_LIST(t, l) \
|
||||
t->tqent_flags = (t->tqent_flags & ~TQENT_LIST_MASK) | l;
|
||||
|
||||
static int spl_taskq_thread_bind = 0;
|
||||
module_param(spl_taskq_thread_bind, int, 0644);
|
||||
MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
|
||||
@ -134,6 +216,7 @@ retry:
|
||||
ASSERT(!timer_pending(&t->tqent_timer));
|
||||
|
||||
list_del_init(&t->tqent_list);
|
||||
TQSTAT_DEC(tq, entries_free);
|
||||
return (t);
|
||||
}
|
||||
|
||||
@ -204,12 +287,11 @@ task_done(taskq_t *tq, taskq_ent_t *t)
|
||||
{
|
||||
ASSERT(tq);
|
||||
ASSERT(t);
|
||||
ASSERT(list_empty(&t->tqent_list));
|
||||
|
||||
/* Wake tasks blocked in taskq_wait_id() */
|
||||
wake_up_all(&t->tqent_waitq);
|
||||
|
||||
list_del_init(&t->tqent_list);
|
||||
|
||||
if (tq->tq_nalloc <= tq->tq_minalloc) {
|
||||
t->tqent_id = TASKQID_INVALID;
|
||||
t->tqent_func = NULL;
|
||||
@ -217,6 +299,7 @@ task_done(taskq_t *tq, taskq_ent_t *t)
|
||||
t->tqent_flags = 0;
|
||||
|
||||
list_add_tail(&t->tqent_list, &tq->tq_free_list);
|
||||
TQSTAT_INC(tq, entries_free);
|
||||
} else {
|
||||
task_free(tq, t);
|
||||
}
|
||||
@ -263,6 +346,8 @@ task_expire_impl(taskq_ent_t *t)
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
|
||||
wake_up(&tq->tq_work_waitq);
|
||||
|
||||
TQSTAT_INC(tq, tasks_delayed_requeued);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -534,7 +619,11 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id)
|
||||
t = taskq_find(tq, id);
|
||||
if (t && t != ERR_PTR(-EBUSY)) {
|
||||
list_del_init(&t->tqent_list);
|
||||
TQSTAT_DEC_LIST(tq, t);
|
||||
TQSTAT_DEC(tq, tasks_total);
|
||||
|
||||
t->tqent_flags |= TQENT_FLAG_CANCEL;
|
||||
TQSTAT_INC(tq, tasks_cancelled);
|
||||
|
||||
/*
|
||||
* When canceling the lowest outstanding task id we
|
||||
@ -604,13 +693,19 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
|
||||
spin_lock(&t->tqent_lock);
|
||||
|
||||
/* Queue to the front of the list to enforce TQ_NOQUEUE semantics */
|
||||
if (flags & TQ_NOQUEUE)
|
||||
if (flags & TQ_NOQUEUE) {
|
||||
TQENT_SET_LIST(t, TQENT_LIST_PRIORITY);
|
||||
list_add(&t->tqent_list, &tq->tq_prio_list);
|
||||
/* Queue to the priority list instead of the pending list */
|
||||
else if (flags & TQ_FRONT)
|
||||
} else if (flags & TQ_FRONT) {
|
||||
TQENT_SET_LIST(t, TQENT_LIST_PRIORITY);
|
||||
list_add_tail(&t->tqent_list, &tq->tq_prio_list);
|
||||
else
|
||||
} else {
|
||||
TQENT_SET_LIST(t, TQENT_LIST_PENDING);
|
||||
list_add_tail(&t->tqent_list, &tq->tq_pend_list);
|
||||
}
|
||||
TQSTAT_INC_LIST(tq, t);
|
||||
TQSTAT_INC(tq, tasks_total);
|
||||
|
||||
t->tqent_id = rc = tq->tq_next_id;
|
||||
tq->tq_next_id++;
|
||||
@ -629,6 +724,8 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
|
||||
|
||||
wake_up(&tq->tq_work_waitq);
|
||||
|
||||
TQSTAT_INC(tq, tasks_dispatched);
|
||||
|
||||
/* Spawn additional taskq threads if required. */
|
||||
if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
|
||||
(void) taskq_thread_spawn(tq);
|
||||
@ -662,6 +759,9 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
|
||||
|
||||
/* Queue to the delay list for subsequent execution */
|
||||
list_add_tail(&t->tqent_list, &tq->tq_delay_list);
|
||||
TQENT_SET_LIST(t, TQENT_LIST_DELAY);
|
||||
TQSTAT_INC_LIST(tq, t);
|
||||
TQSTAT_INC(tq, tasks_total);
|
||||
|
||||
t->tqent_id = rc = tq->tq_next_id;
|
||||
tq->tq_next_id++;
|
||||
@ -676,6 +776,8 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
|
||||
|
||||
spin_unlock(&t->tqent_lock);
|
||||
|
||||
TQSTAT_INC(tq, tasks_dispatched_delayed);
|
||||
|
||||
/* Spawn additional taskq threads if required. */
|
||||
if (tq->tq_nactive == tq->tq_nthreads)
|
||||
(void) taskq_thread_spawn(tq);
|
||||
@ -724,10 +826,15 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
t->tqent_flags |= TQENT_FLAG_PREALLOC;
|
||||
|
||||
/* Queue to the priority list instead of the pending list */
|
||||
if (flags & TQ_FRONT)
|
||||
if (flags & TQ_FRONT) {
|
||||
TQENT_SET_LIST(t, TQENT_LIST_PRIORITY);
|
||||
list_add_tail(&t->tqent_list, &tq->tq_prio_list);
|
||||
else
|
||||
} else {
|
||||
TQENT_SET_LIST(t, TQENT_LIST_PENDING);
|
||||
list_add_tail(&t->tqent_list, &tq->tq_pend_list);
|
||||
}
|
||||
TQSTAT_INC_LIST(tq, t);
|
||||
TQSTAT_INC(tq, tasks_total);
|
||||
|
||||
t->tqent_id = tq->tq_next_id;
|
||||
tq->tq_next_id++;
|
||||
@ -742,6 +849,8 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
|
||||
|
||||
wake_up(&tq->tq_work_waitq);
|
||||
|
||||
TQSTAT_INC(tq, tasks_dispatched);
|
||||
|
||||
/* Spawn additional taskq threads if required. */
|
||||
if (tq->tq_nactive == tq->tq_nthreads)
|
||||
(void) taskq_thread_spawn(tq);
|
||||
@ -908,6 +1017,8 @@ taskq_thread(void *args)
|
||||
wake_up(&tq->tq_wait_waitq);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
TQSTAT_INC(tq, threads_total);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
|
||||
if (list_empty(&tq->tq_pend_list) &&
|
||||
@ -919,9 +1030,15 @@ taskq_thread(void *args)
|
||||
add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
|
||||
TQSTAT_INC(tq, thread_sleeps);
|
||||
TQSTAT_INC(tq, threads_idle);
|
||||
|
||||
schedule();
|
||||
seq_tasks = 0;
|
||||
|
||||
TQSTAT_DEC(tq, threads_idle);
|
||||
TQSTAT_INC(tq, thread_wakeups);
|
||||
|
||||
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
||||
tq->tq_lock_class);
|
||||
remove_wait_queue(&tq->tq_work_waitq, &wait);
|
||||
@ -931,6 +1048,8 @@ taskq_thread(void *args)
|
||||
|
||||
if ((t = taskq_next_ent(tq)) != NULL) {
|
||||
list_del_init(&t->tqent_list);
|
||||
TQSTAT_DEC_LIST(tq, t);
|
||||
TQSTAT_DEC(tq, tasks_total);
|
||||
|
||||
/*
|
||||
* A TQENT_FLAG_PREALLOC task may be reused or freed
|
||||
@ -955,6 +1074,7 @@ taskq_thread(void *args)
|
||||
tq->tq_nactive++;
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
|
||||
TQSTAT_INC(tq, threads_active);
|
||||
DTRACE_PROBE1(taskq_ent__start, taskq_ent_t *, t);
|
||||
|
||||
/* Perform the requested task */
|
||||
@ -962,8 +1082,17 @@ taskq_thread(void *args)
|
||||
|
||||
DTRACE_PROBE1(taskq_ent__finish, taskq_ent_t *, t);
|
||||
|
||||
TQSTAT_DEC(tq, threads_active);
|
||||
if ((t->tqent_flags & TQENT_LIST_MASK) ==
|
||||
TQENT_LIST_PENDING)
|
||||
TQSTAT_INC(tq, tasks_executed_normal);
|
||||
else
|
||||
TQSTAT_INC(tq, tasks_executed_priority);
|
||||
TQSTAT_INC(tq, tasks_executed);
|
||||
|
||||
spin_lock_irqsave_nested(&tq->tq_lock, flags,
|
||||
tq->tq_lock_class);
|
||||
|
||||
tq->tq_nactive--;
|
||||
list_del_init(&tqt->tqt_active_list);
|
||||
tqt->tqt_task = NULL;
|
||||
@ -989,7 +1118,8 @@ taskq_thread(void *args)
|
||||
tqt->tqt_id = TASKQID_INVALID;
|
||||
tqt->tqt_flags = 0;
|
||||
wake_up_all(&tq->tq_wait_waitq);
|
||||
}
|
||||
} else
|
||||
TQSTAT_INC(tq, thread_wakeups_nowork);
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
@ -998,6 +1128,10 @@ taskq_thread(void *args)
|
||||
__set_current_state(TASK_RUNNING);
|
||||
tq->tq_nthreads--;
|
||||
list_del_init(&tqt->tqt_thread_list);
|
||||
|
||||
TQSTAT_DEC(tq, threads_total);
|
||||
TQSTAT_INC(tq, threads_destroyed);
|
||||
|
||||
error:
|
||||
kmem_free(tqt, sizeof (taskq_thread_t));
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
@ -1037,9 +1171,156 @@ taskq_thread_create(taskq_t *tq)
|
||||
|
||||
wake_up_process(tqt->tqt_thread);
|
||||
|
||||
TQSTAT_INC(tq, threads_created);
|
||||
|
||||
return (tqt);
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_stats_init(taskq_t *tq)
|
||||
{
|
||||
taskq_sums_t *tqs = &tq->tq_sums;
|
||||
wmsum_init(&tqs->tqs_threads_active, 0);
|
||||
wmsum_init(&tqs->tqs_threads_idle, 0);
|
||||
wmsum_init(&tqs->tqs_threads_total, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_pending, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_priority, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_total, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_delayed, 0);
|
||||
wmsum_init(&tqs->tqs_entries_free, 0);
|
||||
wmsum_init(&tqs->tqs_threads_created, 0);
|
||||
wmsum_init(&tqs->tqs_threads_destroyed, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_dispatched, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_dispatched_delayed, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_executed_normal, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_executed_priority, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_executed, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_delayed_requeued, 0);
|
||||
wmsum_init(&tqs->tqs_tasks_cancelled, 0);
|
||||
wmsum_init(&tqs->tqs_thread_wakeups, 0);
|
||||
wmsum_init(&tqs->tqs_thread_wakeups_nowork, 0);
|
||||
wmsum_init(&tqs->tqs_thread_sleeps, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_stats_fini(taskq_t *tq)
|
||||
{
|
||||
taskq_sums_t *tqs = &tq->tq_sums;
|
||||
wmsum_fini(&tqs->tqs_threads_active);
|
||||
wmsum_fini(&tqs->tqs_threads_idle);
|
||||
wmsum_fini(&tqs->tqs_threads_total);
|
||||
wmsum_fini(&tqs->tqs_tasks_pending);
|
||||
wmsum_fini(&tqs->tqs_tasks_priority);
|
||||
wmsum_fini(&tqs->tqs_tasks_total);
|
||||
wmsum_fini(&tqs->tqs_tasks_delayed);
|
||||
wmsum_fini(&tqs->tqs_entries_free);
|
||||
wmsum_fini(&tqs->tqs_threads_created);
|
||||
wmsum_fini(&tqs->tqs_threads_destroyed);
|
||||
wmsum_fini(&tqs->tqs_tasks_dispatched);
|
||||
wmsum_fini(&tqs->tqs_tasks_dispatched_delayed);
|
||||
wmsum_fini(&tqs->tqs_tasks_executed_normal);
|
||||
wmsum_fini(&tqs->tqs_tasks_executed_priority);
|
||||
wmsum_fini(&tqs->tqs_tasks_executed);
|
||||
wmsum_fini(&tqs->tqs_tasks_delayed_requeued);
|
||||
wmsum_fini(&tqs->tqs_tasks_cancelled);
|
||||
wmsum_fini(&tqs->tqs_thread_wakeups);
|
||||
wmsum_fini(&tqs->tqs_thread_wakeups_nowork);
|
||||
wmsum_fini(&tqs->tqs_thread_sleeps);
|
||||
}
|
||||
|
||||
static int
|
||||
taskq_kstats_update(kstat_t *ksp, int rw)
|
||||
{
|
||||
if (rw == KSTAT_WRITE)
|
||||
return (EACCES);
|
||||
|
||||
taskq_t *tq = ksp->ks_private;
|
||||
taskq_kstats_t *tqks = ksp->ks_data;
|
||||
|
||||
tqks->tqks_threads_max.value.ui64 = tq->tq_maxthreads;
|
||||
tqks->tqks_entry_pool_min.value.ui64 = tq->tq_minalloc;
|
||||
tqks->tqks_entry_pool_max.value.ui64 = tq->tq_maxalloc;
|
||||
|
||||
taskq_sums_t *tqs = &tq->tq_sums;
|
||||
|
||||
tqks->tqks_threads_active.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_threads_active);
|
||||
tqks->tqks_threads_idle.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_threads_idle);
|
||||
tqks->tqks_threads_total.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_threads_total);
|
||||
tqks->tqks_tasks_pending.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_pending);
|
||||
tqks->tqks_tasks_priority.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_priority);
|
||||
tqks->tqks_tasks_total.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_total);
|
||||
tqks->tqks_tasks_delayed.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_delayed);
|
||||
tqks->tqks_entries_free.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_entries_free);
|
||||
tqks->tqks_threads_created.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_threads_created);
|
||||
tqks->tqks_threads_destroyed.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_threads_destroyed);
|
||||
tqks->tqks_tasks_dispatched.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_dispatched);
|
||||
tqks->tqks_tasks_dispatched_delayed.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_dispatched_delayed);
|
||||
tqks->tqks_tasks_executed_normal.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_executed_normal);
|
||||
tqks->tqks_tasks_executed_priority.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_executed_priority);
|
||||
tqks->tqks_tasks_executed.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_executed);
|
||||
tqks->tqks_tasks_delayed_requeued.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_delayed_requeued);
|
||||
tqks->tqks_tasks_cancelled.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_tasks_cancelled);
|
||||
tqks->tqks_thread_wakeups.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_thread_wakeups);
|
||||
tqks->tqks_thread_wakeups_nowork.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_thread_wakeups_nowork);
|
||||
tqks->tqks_thread_sleeps.value.ui64 =
|
||||
wmsum_value(&tqs->tqs_thread_sleeps);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_kstats_init(taskq_t *tq)
|
||||
{
|
||||
char name[TASKQ_NAMELEN+5]; /* 5 for dot, 3x instance digits, null */
|
||||
snprintf(name, sizeof (name), "%s.%d", tq->tq_name, tq->tq_instance);
|
||||
|
||||
kstat_t *ksp = kstat_create("taskq", 0, name, "misc",
|
||||
KSTAT_TYPE_NAMED, sizeof (taskq_kstats_t) / sizeof (kstat_named_t),
|
||||
KSTAT_FLAG_VIRTUAL);
|
||||
|
||||
if (ksp == NULL)
|
||||
return;
|
||||
|
||||
ksp->ks_private = tq;
|
||||
ksp->ks_update = taskq_kstats_update;
|
||||
ksp->ks_data = kmem_alloc(sizeof (taskq_kstats_t), KM_SLEEP);
|
||||
memcpy(ksp->ks_data, &taskq_kstats_template, sizeof (taskq_kstats_t));
|
||||
kstat_install(ksp);
|
||||
|
||||
tq->tq_ksp = ksp;
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_kstats_fini(taskq_t *tq)
|
||||
{
|
||||
if (tq->tq_ksp == NULL)
|
||||
return;
|
||||
|
||||
kmem_free(tq->tq_ksp->ks_data, sizeof (taskq_kstats_t));
|
||||
kstat_delete(tq->tq_ksp);
|
||||
|
||||
tq->tq_ksp = NULL;
|
||||
}
|
||||
|
||||
taskq_t *
|
||||
taskq_create(const char *name, int threads_arg, pri_t pri,
|
||||
int minalloc, int maxalloc, uint_t flags)
|
||||
@ -1104,6 +1385,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri,
|
||||
init_waitqueue_head(&tq->tq_wait_waitq);
|
||||
tq->tq_lock_class = TQ_LOCK_GENERAL;
|
||||
INIT_LIST_HEAD(&tq->tq_taskqs);
|
||||
taskq_stats_init(tq);
|
||||
|
||||
if (flags & TASKQ_PREPOPULATE) {
|
||||
spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
|
||||
@ -1137,14 +1419,17 @@ taskq_create(const char *name, int threads_arg, pri_t pri,
|
||||
|
||||
if (rc) {
|
||||
taskq_destroy(tq);
|
||||
tq = NULL;
|
||||
} else {
|
||||
down_write(&tq_list_sem);
|
||||
tq->tq_instance = taskq_find_by_name(name) + 1;
|
||||
list_add_tail(&tq->tq_taskqs, &tq_list);
|
||||
up_write(&tq_list_sem);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
down_write(&tq_list_sem);
|
||||
tq->tq_instance = taskq_find_by_name(name) + 1;
|
||||
list_add_tail(&tq->tq_taskqs, &tq_list);
|
||||
up_write(&tq_list_sem);
|
||||
|
||||
/* Install kstats late, because the name includes tq_instance */
|
||||
taskq_kstats_init(tq);
|
||||
|
||||
return (tq);
|
||||
}
|
||||
EXPORT_SYMBOL(taskq_create);
|
||||
@ -1177,6 +1462,8 @@ taskq_destroy(taskq_t *tq)
|
||||
|
||||
taskq_wait(tq);
|
||||
|
||||
taskq_kstats_fini(tq);
|
||||
|
||||
/* remove taskq from global list used by the kstats */
|
||||
down_write(&tq_list_sem);
|
||||
list_del(&tq->tq_taskqs);
|
||||
@ -1230,6 +1517,7 @@ taskq_destroy(taskq_t *tq)
|
||||
|
||||
spin_unlock_irqrestore(&tq->tq_lock, flags);
|
||||
|
||||
taskq_stats_fini(tq);
|
||||
kmem_strfree(tq->tq_name);
|
||||
kmem_free(tq, sizeof (taskq_t));
|
||||
}
|
||||
@ -1271,6 +1559,100 @@ taskq_create_synced(const char *name, int nthreads, pri_t pri,
|
||||
}
|
||||
EXPORT_SYMBOL(taskq_create_synced);
|
||||
|
||||
static kstat_t *taskq_summary_ksp = NULL;
|
||||
|
||||
static int
|
||||
spl_taskq_kstat_headers(char *buf, size_t size)
|
||||
{
|
||||
size_t n = snprintf(buf, size,
|
||||
"%-20s | %-17s | %-23s\n"
|
||||
"%-20s | %-17s | %-23s\n"
|
||||
"%-20s | %-17s | %-23s\n",
|
||||
"", "threads", "tasks on queue",
|
||||
"taskq name", "tot [act idl] max", " pend [ norm high] dly",
|
||||
"--------------------", "-----------------",
|
||||
"-----------------------");
|
||||
return (n >= size ? ENOMEM : 0);
|
||||
}
|
||||
|
||||
static int
|
||||
spl_taskq_kstat_data(char *buf, size_t size, void *data)
|
||||
{
|
||||
struct list_head *tql = NULL;
|
||||
taskq_t *tq;
|
||||
char name[TASKQ_NAMELEN+5]; /* 5 for dot, 3x instance digits, null */
|
||||
char threads[25];
|
||||
char tasks[30];
|
||||
size_t n;
|
||||
int err = 0;
|
||||
|
||||
down_read(&tq_list_sem);
|
||||
list_for_each_prev(tql, &tq_list) {
|
||||
tq = list_entry(tql, taskq_t, tq_taskqs);
|
||||
|
||||
mutex_enter(tq->tq_ksp->ks_lock);
|
||||
taskq_kstats_update(tq->tq_ksp, KSTAT_READ);
|
||||
taskq_kstats_t *tqks = tq->tq_ksp->ks_data;
|
||||
|
||||
snprintf(name, sizeof (name), "%s.%d", tq->tq_name,
|
||||
tq->tq_instance);
|
||||
snprintf(threads, sizeof (threads), "%3llu [%3llu %3llu] %3llu",
|
||||
tqks->tqks_threads_total.value.ui64,
|
||||
tqks->tqks_threads_active.value.ui64,
|
||||
tqks->tqks_threads_idle.value.ui64,
|
||||
tqks->tqks_threads_max.value.ui64);
|
||||
snprintf(tasks, sizeof (tasks), "%5llu [%5llu %5llu] %3llu",
|
||||
tqks->tqks_tasks_total.value.ui64,
|
||||
tqks->tqks_tasks_pending.value.ui64,
|
||||
tqks->tqks_tasks_priority.value.ui64,
|
||||
tqks->tqks_tasks_delayed.value.ui64);
|
||||
|
||||
mutex_exit(tq->tq_ksp->ks_lock);
|
||||
|
||||
n = snprintf(buf, size, "%-20s | %-17s | %-23s\n",
|
||||
name, threads, tasks);
|
||||
if (n >= size) {
|
||||
err = ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
buf = &buf[n];
|
||||
size -= n;
|
||||
}
|
||||
|
||||
up_read(&tq_list_sem);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static void
|
||||
spl_taskq_kstat_init(void)
|
||||
{
|
||||
kstat_t *ksp = kstat_create("taskq", 0, "summary", "misc",
|
||||
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
||||
|
||||
if (ksp == NULL)
|
||||
return;
|
||||
|
||||
ksp->ks_data = (void *)(uintptr_t)1;
|
||||
ksp->ks_ndata = 1;
|
||||
kstat_set_raw_ops(ksp, spl_taskq_kstat_headers,
|
||||
spl_taskq_kstat_data, NULL);
|
||||
kstat_install(ksp);
|
||||
|
||||
taskq_summary_ksp = ksp;
|
||||
}
|
||||
|
||||
static void
|
||||
spl_taskq_kstat_fini(void)
|
||||
{
|
||||
if (taskq_summary_ksp == NULL)
|
||||
return;
|
||||
|
||||
kstat_delete(taskq_summary_ksp);
|
||||
taskq_summary_ksp = NULL;
|
||||
}
|
||||
|
||||
static unsigned int spl_taskq_kick = 0;
|
||||
|
||||
/*
|
||||
@ -1451,12 +1833,16 @@ spl_taskq_init(void)
|
||||
*/
|
||||
dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC;
|
||||
|
||||
spl_taskq_kstat_init();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_taskq_fini(void)
|
||||
{
|
||||
spl_taskq_kstat_fini();
|
||||
|
||||
taskq_destroy(dynamic_taskq);
|
||||
dynamic_taskq = NULL;
|
||||
|
||||
|
@ -186,6 +186,13 @@ issig(void)
|
||||
|
||||
schedule();
|
||||
#endif
|
||||
/*
|
||||
* Dequeued SIGSTOP/SIGTSTP.
|
||||
* Check if process has other singal pending.
|
||||
*/
|
||||
if (signal_pending(current))
|
||||
return (1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -58,22 +58,16 @@
|
||||
#include <sys/arc.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#ifdef _KERNEL
|
||||
#include <linux/kmap_compat.h>
|
||||
#include <linux/mm_compat.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/version.h>
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
#if defined(MAX_ORDER)
|
||||
#define ABD_MAX_ORDER (MAX_ORDER)
|
||||
#elif defined(MAX_PAGE_ORDER)
|
||||
#define ABD_MAX_ORDER (MAX_PAGE_ORDER)
|
||||
#endif
|
||||
#else
|
||||
#define ABD_MAX_ORDER (1)
|
||||
#endif
|
||||
|
||||
typedef struct abd_stats {
|
||||
kstat_named_t abdstat_struct_size;
|
||||
@ -193,11 +187,9 @@ abd_t *abd_zero_scatter = NULL;
|
||||
|
||||
struct page;
|
||||
/*
|
||||
* _KERNEL - Will point to ZERO_PAGE if it is available or it will be
|
||||
* an allocated zero'd PAGESIZE buffer.
|
||||
* Userspace - Will be an allocated zero'ed PAGESIZE buffer.
|
||||
*
|
||||
* abd_zero_page is assigned to each of the pages of abd_zero_scatter.
|
||||
* abd_zero_page is assigned to each of the pages of abd_zero_scatter. It will
|
||||
* point to ZERO_PAGE if it is available or it will be an allocated zero'd
|
||||
* PAGESIZE buffer.
|
||||
*/
|
||||
static struct page *abd_zero_page = NULL;
|
||||
|
||||
@ -232,7 +224,6 @@ abd_free_struct_impl(abd_t *abd)
|
||||
ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
static unsigned zfs_abd_scatter_max_order = ABD_MAX_ORDER - 1;
|
||||
|
||||
/*
|
||||
@ -509,7 +500,7 @@ abd_alloc_zero_scatter(void)
|
||||
ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl;
|
||||
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
|
||||
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS;
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK;
|
||||
|
||||
abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) {
|
||||
sg_set_page(sg, abd_zero_page, PAGESIZE, 0);
|
||||
@ -520,134 +511,6 @@ abd_alloc_zero_scatter(void)
|
||||
ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
|
||||
}
|
||||
|
||||
#else /* _KERNEL */
|
||||
|
||||
#ifndef PAGE_SHIFT
|
||||
#define PAGE_SHIFT (highbit64(PAGESIZE)-1)
|
||||
#endif
|
||||
|
||||
#define zfs_kmap_local(chunk) ((void *)chunk)
|
||||
#define zfs_kunmap_local(addr) do { (void)(addr); } while (0)
|
||||
#define local_irq_save(flags) do { (void)(flags); } while (0)
|
||||
#define local_irq_restore(flags) do { (void)(flags); } while (0)
|
||||
#define nth_page(pg, i) \
|
||||
((struct page *)((void *)(pg) + (i) * PAGESIZE))
|
||||
|
||||
struct scatterlist {
|
||||
struct page *page;
|
||||
int length;
|
||||
int end;
|
||||
};
|
||||
|
||||
static void
|
||||
sg_init_table(struct scatterlist *sg, int nr)
|
||||
{
|
||||
memset(sg, 0, nr * sizeof (struct scatterlist));
|
||||
sg[nr - 1].end = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This must be called if any of the sg_table allocation functions
|
||||
* are called.
|
||||
*/
|
||||
static void
|
||||
abd_free_sg_table(abd_t *abd)
|
||||
{
|
||||
int nents = ABD_SCATTER(abd).abd_nents;
|
||||
vmem_free(ABD_SCATTER(abd).abd_sgl,
|
||||
nents * sizeof (struct scatterlist));
|
||||
}
|
||||
|
||||
#define for_each_sg(sgl, sg, nr, i) \
|
||||
for ((i) = 0, (sg) = (sgl); (i) < (nr); (i)++, (sg) = sg_next(sg))
|
||||
|
||||
static inline void
|
||||
sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len,
|
||||
unsigned int offset)
|
||||
{
|
||||
/* currently we don't use offset */
|
||||
ASSERT(offset == 0);
|
||||
sg->page = page;
|
||||
sg->length = len;
|
||||
}
|
||||
|
||||
static inline struct page *
|
||||
sg_page(struct scatterlist *sg)
|
||||
{
|
||||
return (sg->page);
|
||||
}
|
||||
|
||||
static inline struct scatterlist *
|
||||
sg_next(struct scatterlist *sg)
|
||||
{
|
||||
if (sg->end)
|
||||
return (NULL);
|
||||
|
||||
return (sg + 1);
|
||||
}
|
||||
|
||||
void
|
||||
abd_alloc_chunks(abd_t *abd, size_t size)
|
||||
{
|
||||
unsigned nr_pages = abd_chunkcnt_for_bytes(size);
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
ABD_SCATTER(abd).abd_sgl = vmem_alloc(nr_pages *
|
||||
sizeof (struct scatterlist), KM_SLEEP);
|
||||
sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages);
|
||||
|
||||
abd_for_each_sg(abd, sg, nr_pages, i) {
|
||||
struct page *p = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
|
||||
sg_set_page(sg, p, PAGESIZE, 0);
|
||||
}
|
||||
ABD_SCATTER(abd).abd_nents = nr_pages;
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_chunks(abd_t *abd)
|
||||
{
|
||||
int i, n = ABD_SCATTER(abd).abd_nents;
|
||||
struct scatterlist *sg;
|
||||
|
||||
abd_for_each_sg(abd, sg, n, i) {
|
||||
struct page *p = nth_page(sg_page(sg), 0);
|
||||
umem_free_aligned(p, PAGESIZE);
|
||||
}
|
||||
abd_free_sg_table(abd);
|
||||
}
|
||||
|
||||
static void
|
||||
abd_alloc_zero_scatter(void)
|
||||
{
|
||||
unsigned nr_pages = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
abd_zero_page = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
|
||||
memset(abd_zero_page, 0, PAGESIZE);
|
||||
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS;
|
||||
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
|
||||
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
|
||||
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
|
||||
ABD_SCATTER(abd_zero_scatter).abd_sgl = vmem_alloc(nr_pages *
|
||||
sizeof (struct scatterlist), KM_SLEEP);
|
||||
|
||||
sg_init_table(ABD_SCATTER(abd_zero_scatter).abd_sgl, nr_pages);
|
||||
|
||||
abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) {
|
||||
sg_set_page(sg, abd_zero_page, PAGESIZE, 0);
|
||||
}
|
||||
|
||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, PAGESIZE);
|
||||
ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
boolean_t
|
||||
abd_size_alloc_linear(size_t size)
|
||||
{
|
||||
@ -712,14 +575,10 @@ abd_free_zero_scatter(void)
|
||||
abd_free_struct(abd_zero_scatter);
|
||||
abd_zero_scatter = NULL;
|
||||
ASSERT3P(abd_zero_page, !=, NULL);
|
||||
#if defined(_KERNEL)
|
||||
#if defined(HAVE_ZERO_PAGE_GPL_ONLY)
|
||||
abd_unmark_zfs_page(abd_zero_page);
|
||||
__free_page(abd_zero_page);
|
||||
#endif /* HAVE_ZERO_PAGE_GPL_ONLY */
|
||||
#else
|
||||
umem_free_aligned(abd_zero_page, PAGESIZE);
|
||||
#endif /* _KERNEL */
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1014,8 +873,6 @@ abd_cache_reap_now(void)
|
||||
{
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
|
||||
/*
|
||||
* This is abd_iter_page(), the function underneath abd_iterate_page_func().
|
||||
* It yields the next page struct and data offset and size within it, without
|
||||
@ -1297,5 +1154,3 @@ MODULE_PARM_DESC(zfs_abd_scatter_min_size,
|
||||
module_param(zfs_abd_scatter_max_order, uint, 0644);
|
||||
MODULE_PARM_DESC(zfs_abd_scatter_max_order,
|
||||
"Maximum order allocation used for a scatter ABD.");
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
@ -201,9 +201,9 @@ arc_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
|
||||
* See also the comment above zfs_arc_shrinker_limit.
|
||||
*/
|
||||
int64_t can_free = btop(arc_evictable_memory());
|
||||
int64_t limit = zfs_arc_shrinker_limit != 0 ?
|
||||
zfs_arc_shrinker_limit : INT64_MAX;
|
||||
return (MIN(can_free, limit));
|
||||
if (current_is_kswapd() && zfs_arc_shrinker_limit)
|
||||
can_free = MIN(can_free, zfs_arc_shrinker_limit);
|
||||
return (can_free);
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
|
@ -1101,8 +1101,8 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
zfsvfs_t *snap_zfsvfs;
|
||||
zfs_snapentry_t *se;
|
||||
char *full_name, *full_path;
|
||||
char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL,
|
||||
NULL };
|
||||
char *argv[] = { "/usr/bin/env", "mount", "-i", "-t", "zfs", "-n",
|
||||
NULL, NULL, NULL };
|
||||
char *envp[] = { NULL };
|
||||
int error;
|
||||
struct path spath;
|
||||
@ -1153,8 +1153,8 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
* value from call_usermodehelper() will be (exitcode << 8 + signal).
|
||||
*/
|
||||
dprintf("mount; name=%s path=%s\n", full_name, full_path);
|
||||
argv[5] = full_name;
|
||||
argv[6] = full_path;
|
||||
argv[6] = full_name;
|
||||
argv[7] = full_path;
|
||||
error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
|
||||
if (error) {
|
||||
if (!(error & MOUNT_BUSY << 8)) {
|
||||
|
@ -292,6 +292,7 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
|
||||
{
|
||||
struct super_block *s;
|
||||
objset_t *os;
|
||||
boolean_t issnap = B_FALSE;
|
||||
int err;
|
||||
|
||||
err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
|
||||
@ -323,6 +324,7 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
|
||||
if (zpl_enter(zfsvfs, FTAG) == 0) {
|
||||
if (os != zfsvfs->z_os)
|
||||
err = -SET_ERROR(EBUSY);
|
||||
issnap = zfsvfs->z_issnap;
|
||||
zpl_exit(zfsvfs, FTAG);
|
||||
} else {
|
||||
err = -SET_ERROR(EBUSY);
|
||||
@ -346,7 +348,11 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
|
||||
return (ERR_PTR(err));
|
||||
}
|
||||
s->s_flags |= SB_ACTIVE;
|
||||
} else if ((flags ^ s->s_flags) & SB_RDONLY) {
|
||||
} else if (!issnap && ((flags ^ s->s_flags) & SB_RDONLY)) {
|
||||
/*
|
||||
* Skip ro check for snap since snap is always ro regardless
|
||||
* ro flag is passed by mount or not.
|
||||
*/
|
||||
deactivate_locked_super(s);
|
||||
return (ERR_PTR(-EBUSY));
|
||||
}
|
||||
|
@ -1213,6 +1213,7 @@ zvol_queue_limits_convert(zvol_queue_limits_t *limits,
|
||||
qlimits->io_opt = limits->zql_io_opt;
|
||||
qlimits->physical_block_size = limits->zql_physical_block_size;
|
||||
qlimits->max_discard_sectors = limits->zql_max_discard_sectors;
|
||||
qlimits->max_hw_discard_sectors = limits->zql_max_discard_sectors;
|
||||
qlimits->discard_granularity = limits->zql_discard_granularity;
|
||||
#ifdef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES
|
||||
qlimits->features =
|
||||
@ -1251,7 +1252,6 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
|
||||
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
zvol_queue_limits_apply(limits, zso->zvo_queue);
|
||||
#elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
|
||||
struct queue_limits qlimits;
|
||||
zvol_queue_limits_convert(limits, &qlimits);
|
||||
@ -1261,13 +1261,10 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
|
||||
return (1);
|
||||
}
|
||||
|
||||
#ifndef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES
|
||||
blk_queue_set_write_cache(zso->zvo_queue, B_TRUE);
|
||||
#endif
|
||||
|
||||
zso->zvo_disk = disk;
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
|
||||
#else
|
||||
zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (zso->zvo_queue == NULL)
|
||||
@ -1361,7 +1358,7 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits)
|
||||
* request queue and generic disk structures for the block device.
|
||||
*/
|
||||
static zvol_state_t *
|
||||
zvol_alloc(dev_t dev, const char *name)
|
||||
zvol_alloc(dev_t dev, const char *name, uint64_t volblocksize)
|
||||
{
|
||||
zvol_state_t *zv;
|
||||
struct zvol_state_os *zso;
|
||||
@ -1381,6 +1378,7 @@ zvol_alloc(dev_t dev, const char *name)
|
||||
zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP);
|
||||
zv->zv_zso = zso;
|
||||
zv->zv_volmode = volmode;
|
||||
zv->zv_volblocksize = volblocksize;
|
||||
|
||||
list_link_init(&zv->zv_next);
|
||||
mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
@ -1670,7 +1668,8 @@ zvol_os_create_minor(const char *name)
|
||||
if (error)
|
||||
goto out_dmu_objset_disown;
|
||||
|
||||
zv = zvol_alloc(MKDEV(zvol_major, minor), name);
|
||||
zv = zvol_alloc(MKDEV(zvol_major, minor), name,
|
||||
doi->doi_data_block_size);
|
||||
if (zv == NULL) {
|
||||
error = SET_ERROR(EAGAIN);
|
||||
goto out_dmu_objset_disown;
|
||||
@ -1680,7 +1679,6 @@ zvol_os_create_minor(const char *name)
|
||||
if (dmu_objset_is_snapshot(os))
|
||||
zv->zv_flags |= ZVOL_RDONLY;
|
||||
|
||||
zv->zv_volblocksize = doi->doi_data_block_size;
|
||||
zv->zv_volsize = volsize;
|
||||
zv->zv_objset = os;
|
||||
|
||||
|
@ -754,6 +754,12 @@ zpool_feature_init(void)
|
||||
"Support for raidz expansion",
|
||||
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);
|
||||
|
||||
zfeature_register(SPA_FEATURE_FAST_DEDUP,
|
||||
"com.klarasystems:fast_dedup", "fast_dedup",
|
||||
"Support for advanced deduplication",
|
||||
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL,
|
||||
sfeatures);
|
||||
|
||||
zfs_mod_list_supported_free(sfeatures);
|
||||
}
|
||||
|
||||
|
277
sys/contrib/openzfs/module/zcommon/zfs_valstr.c
Normal file
277
sys/contrib/openzfs/module/zcommon/zfs_valstr.c
Normal file
@ -0,0 +1,277 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/string.h>
|
||||
#include <sys/debug.h>
|
||||
#include "zfs_valstr.h"
|
||||
|
||||
/*
|
||||
* Each bit in a bitfield has three possible string representations:
|
||||
* - single char
|
||||
* - two-char pair
|
||||
* - full name
|
||||
*/
|
||||
typedef struct {
|
||||
const char vb_bit;
|
||||
const char vb_pair[2];
|
||||
const char *vb_name;
|
||||
} valstr_bit_t;
|
||||
|
||||
/*
|
||||
* Emits a character for each bit in `bits`, up to the number of elements
|
||||
* in the table. Set bits get the character in vb_bit, clear bits get a
|
||||
* space. This results in all strings having the same width, for easier
|
||||
* visual comparison.
|
||||
*/
|
||||
static size_t
|
||||
valstr_bitfield_bits(const valstr_bit_t *table, const size_t nelems,
|
||||
uint64_t bits, char *out, size_t outlen)
|
||||
{
|
||||
ASSERT(out);
|
||||
size_t n = 0;
|
||||
for (int b = 0; b < nelems; b++) {
|
||||
if (n == outlen)
|
||||
break;
|
||||
uint64_t mask = (1ULL << b);
|
||||
out[n++] = (bits & mask) ? table[b].vb_bit : ' ';
|
||||
}
|
||||
if (n < outlen)
|
||||
out[n++] = '\0';
|
||||
return (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* Emits a two-char pair for each bit set in `bits`, taken from vb_pair, and
|
||||
* separated by a `|` character. This gives a concise representation of the
|
||||
* whole value.
|
||||
*/
|
||||
static size_t
|
||||
valstr_bitfield_pairs(const valstr_bit_t *table, const size_t nelems,
|
||||
uint64_t bits, char *out, size_t outlen)
|
||||
{
|
||||
ASSERT(out);
|
||||
size_t n = 0;
|
||||
for (int b = 0; b < nelems; b++) {
|
||||
ASSERT3U(n, <=, outlen);
|
||||
if (n == outlen)
|
||||
break;
|
||||
uint64_t mask = (1ULL << b);
|
||||
if (bits & mask) {
|
||||
size_t len = (n > 0) ? 3 : 2;
|
||||
if (n > outlen-len)
|
||||
break;
|
||||
if (n > 0)
|
||||
out[n++] = '|';
|
||||
out[n++] = table[b].vb_pair[0];
|
||||
out[n++] = table[b].vb_pair[1];
|
||||
}
|
||||
}
|
||||
if (n < outlen)
|
||||
out[n++] = '\0';
|
||||
return (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* Emits the full name for each bit set in `bits`, taken from vb_name, and
|
||||
* separated by a space. This unambiguously shows the entire set of bits, but
|
||||
* can get very long.
|
||||
*/
|
||||
static size_t
|
||||
valstr_bitfield_str(const valstr_bit_t *table, const size_t nelems,
|
||||
uint64_t bits, char *out, size_t outlen)
|
||||
{
|
||||
ASSERT(out);
|
||||
size_t n = 0;
|
||||
for (int b = 0; b < nelems; b++) {
|
||||
ASSERT3U(n, <=, outlen);
|
||||
if (n == outlen)
|
||||
break;
|
||||
uint64_t mask = (1ULL << b);
|
||||
if (bits & mask) {
|
||||
size_t len = strlen(table[b].vb_name);
|
||||
if (n > 0)
|
||||
len++;
|
||||
if (n > outlen-len)
|
||||
break;
|
||||
if (n > 0) {
|
||||
out[n++] = ' ';
|
||||
len--;
|
||||
}
|
||||
memcpy(&out[n], table[b].vb_name, len);
|
||||
n += len;
|
||||
}
|
||||
}
|
||||
if (n < outlen)
|
||||
out[n++] = '\0';
|
||||
return (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* Emits the name of the given enum value in the table.
|
||||
*/
|
||||
static size_t
|
||||
valstr_enum_str(const char **table, const size_t nelems,
|
||||
int v, char *out, size_t outlen)
|
||||
{
|
||||
ASSERT(out);
|
||||
ASSERT3U(v, <, nelems);
|
||||
if (v >= nelems)
|
||||
return (0);
|
||||
return (MIN(strlcpy(out, table[v], outlen), outlen));
|
||||
}
|
||||
|
||||
/*
|
||||
* These macros create the string tables for the given name, and implement
|
||||
* the public functions described in zfs_valstr.h.
|
||||
*/
|
||||
#define _VALSTR_BITFIELD_IMPL(name, ...) \
|
||||
static const valstr_bit_t valstr_ ## name ## _table[] = { __VA_ARGS__ };\
|
||||
size_t \
|
||||
zfs_valstr_ ## name ## _bits(uint64_t bits, char *out, size_t outlen) \
|
||||
{ \
|
||||
return (valstr_bitfield_bits(valstr_ ## name ## _table, \
|
||||
ARRAY_SIZE(valstr_ ## name ## _table), bits, out, outlen)); \
|
||||
} \
|
||||
\
|
||||
size_t \
|
||||
zfs_valstr_ ## name ## _pairs(uint64_t bits, char *out, size_t outlen) \
|
||||
{ \
|
||||
return (valstr_bitfield_pairs(valstr_ ## name ## _table, \
|
||||
ARRAY_SIZE(valstr_ ## name ## _table), bits, out, outlen)); \
|
||||
} \
|
||||
\
|
||||
size_t \
|
||||
zfs_valstr_ ## name(uint64_t bits, char *out, size_t outlen) \
|
||||
{ \
|
||||
return (valstr_bitfield_str(valstr_ ## name ## _table, \
|
||||
ARRAY_SIZE(valstr_ ## name ## _table), bits, out, outlen)); \
|
||||
} \
|
||||
|
||||
#define _VALSTR_ENUM_IMPL(name, ...) \
|
||||
static const char *valstr_ ## name ## _table[] = { __VA_ARGS__ }; \
|
||||
size_t \
|
||||
zfs_valstr_ ## name(int v, char *out, size_t outlen) \
|
||||
{ \
|
||||
return (valstr_enum_str(valstr_ ## name ## _table, \
|
||||
ARRAY_SIZE(valstr_ ## name ## _table), v, out, outlen)); \
|
||||
} \
|
||||
|
||||
|
||||
/* String tables */
|
||||
|
||||
/* ZIO flags: zio_flag_t, typically zio->io_flags */
|
||||
/* BEGIN CSTYLED */
|
||||
_VALSTR_BITFIELD_IMPL(zio_flag,
|
||||
{ '.', "DA", "DONT_AGGREGATE" },
|
||||
{ '.', "RP", "IO_REPAIR" },
|
||||
{ '.', "SH", "SELF_HEAL" },
|
||||
{ '.', "RS", "RESILVER" },
|
||||
{ '.', "SC", "SCRUB" },
|
||||
{ '.', "ST", "SCAN_THREAD" },
|
||||
{ '.', "PH", "PHYSICAL" },
|
||||
{ '.', "CF", "CANFAIL" },
|
||||
{ '.', "SP", "SPECULATIVE" },
|
||||
{ '.', "CW", "CONFIG_WRITER" },
|
||||
{ '.', "DR", "DONT_RETRY" },
|
||||
{ '?', "??", "[UNUSED 11]" },
|
||||
{ '.', "ND", "NODATA" },
|
||||
{ '.', "ID", "INDUCE_DAMAGE" },
|
||||
{ '.', "AL", "IO_ALLOCATING" },
|
||||
{ '.', "RE", "IO_RETRY" },
|
||||
{ '.', "PR", "PROBE" },
|
||||
{ '.', "TH", "TRYHARD" },
|
||||
{ '.', "OP", "OPTIONAL" },
|
||||
{ '.', "DQ", "DONT_QUEUE" },
|
||||
{ '.', "DP", "DONT_PROPAGATE" },
|
||||
{ '.', "BY", "IO_BYPASS" },
|
||||
{ '.', "RW", "IO_REWRITE" },
|
||||
{ '.', "CM", "RAW_COMPRESS" },
|
||||
{ '.', "EN", "RAW_ENCRYPT" },
|
||||
{ '.', "GG", "GANG_CHILD" },
|
||||
{ '.', "DD", "DDT_CHILD" },
|
||||
{ '.', "GF", "GODFATHER" },
|
||||
{ '.', "NP", "NOPWRITE" },
|
||||
{ '.', "EX", "REEXECUTED" },
|
||||
{ '.', "DG", "DELEGATED" },
|
||||
)
|
||||
/* END CSTYLED */
|
||||
|
||||
/*
|
||||
* ZIO pipeline stage(s): enum zio_stage, typically zio->io_stage or
|
||||
* zio->io_pipeline.
|
||||
*/
|
||||
/* BEGIN CSTYLED */
|
||||
_VALSTR_BITFIELD_IMPL(zio_stage,
|
||||
{ 'O', "O ", "OPEN" },
|
||||
{ 'I', "RI", "READ_BP_INIT" },
|
||||
{ 'I', "WI", "WRITE_BP_INIT" },
|
||||
{ 'I', "FI", "FREE_BP_INIT" },
|
||||
{ 'A', "IA", "ISSUE_ASYNC" },
|
||||
{ 'W', "WC", "WRITE_COMPRESS" },
|
||||
{ 'E', "EN", "ENCRYPT" },
|
||||
{ 'C', "CG", "CHECKSUM_GENERATE" },
|
||||
{ 'N', "NW", "NOP_WRITE" },
|
||||
{ 'B', "BF", "BRT_FREE" },
|
||||
{ 'd', "dS", "DDT_READ_START" },
|
||||
{ 'd', "dD", "DDT_READ_DONE" },
|
||||
{ 'd', "dW", "DDT_WRITE" },
|
||||
{ 'd', "dF", "DDT_FREE" },
|
||||
{ 'G', "GA", "GANG_ASSEMBLE" },
|
||||
{ 'G', "GI", "GANG_ISSUE" },
|
||||
{ 'D', "DT", "DVA_THROTTLE" },
|
||||
{ 'D', "DA", "DVA_ALLOCATE" },
|
||||
{ 'D', "DF", "DVA_FREE" },
|
||||
{ 'D', "DC", "DVA_CLAIM" },
|
||||
{ 'R', "R ", "READY" },
|
||||
{ 'V', "VS", "VDEV_IO_START" },
|
||||
{ 'V', "VD", "VDEV_IO_DONE" },
|
||||
{ 'V', "VA", "VDEV_IO_ASSESS" },
|
||||
{ 'C', "CV", "CHECKSUM_VERIFY" },
|
||||
{ 'X', "X ", "DONE" },
|
||||
)
|
||||
/* END CSTYLED */
|
||||
|
||||
/* ZIO priority: zio_priority_t, typically zio->io_priority */
|
||||
/* BEGIN CSTYLED */
|
||||
_VALSTR_ENUM_IMPL(zio_priority,
|
||||
"SYNC_READ",
|
||||
"SYNC_WRITE",
|
||||
"ASYNC_READ",
|
||||
"ASYNC_WRITE",
|
||||
"SCRUB",
|
||||
"REMOVAL",
|
||||
"INITIALIZING",
|
||||
"TRIM",
|
||||
"REBUILD",
|
||||
"[NUM_QUEUEABLE]",
|
||||
"NOW",
|
||||
)
|
||||
/* END CSTYLED */
|
||||
|
||||
#undef _VALSTR_BITFIELD_IMPL
|
||||
#undef _VALSTR_ENUM_IMPL
|
@ -113,7 +113,7 @@ abd_verify(abd_t *abd)
|
||||
ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
|
||||
ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
|
||||
ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG |
|
||||
ABD_FLAG_GANG_FREE | ABD_FLAG_ZEROS | ABD_FLAG_ALLOCD));
|
||||
ABD_FLAG_GANG_FREE | ABD_FLAG_ALLOCD));
|
||||
IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
|
||||
IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
|
||||
if (abd_is_linear(abd)) {
|
||||
@ -603,13 +603,11 @@ abd_get_zeros(size_t size)
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a linear ABD structure for buf.
|
||||
* Create a linear ABD for an existing buf.
|
||||
*/
|
||||
abd_t *
|
||||
abd_get_from_buf(void *buf, size_t size)
|
||||
static abd_t *
|
||||
abd_get_from_buf_impl(abd_t *abd, void *buf, size_t size)
|
||||
{
|
||||
abd_t *abd = abd_alloc_struct(0);
|
||||
|
||||
VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
|
||||
|
||||
/*
|
||||
@ -625,6 +623,20 @@ abd_get_from_buf(void *buf, size_t size)
|
||||
return (abd);
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_get_from_buf(void *buf, size_t size)
|
||||
{
|
||||
abd_t *abd = abd_alloc_struct(0);
|
||||
return (abd_get_from_buf_impl(abd, buf, size));
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_get_from_buf_struct(abd_t *abd, void *buf, size_t size)
|
||||
{
|
||||
abd_init_struct(abd);
|
||||
return (abd_get_from_buf_impl(abd, buf, size));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the raw buffer associated with a linear ABD.
|
||||
*/
|
||||
|
@ -1767,12 +1767,12 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
|
||||
uint64_t csize;
|
||||
uint64_t lsize = HDR_GET_LSIZE(hdr);
|
||||
uint64_t psize = HDR_GET_PSIZE(hdr);
|
||||
void *tmpbuf = NULL;
|
||||
abd_t *abd = hdr->b_l1hdr.b_pabd;
|
||||
boolean_t free_abd = B_FALSE;
|
||||
|
||||
ASSERT(HDR_EMPTY_OR_LOCKED(hdr));
|
||||
ASSERT(HDR_AUTHENTICATED(hdr));
|
||||
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
|
||||
ASSERT3P(abd, !=, NULL);
|
||||
|
||||
/*
|
||||
* The MAC is calculated on the compressed data that is stored on disk.
|
||||
@ -1784,14 +1784,13 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
|
||||
*/
|
||||
if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
|
||||
!HDR_COMPRESSION_ENABLED(hdr)) {
|
||||
|
||||
abd = NULL;
|
||||
csize = zio_compress_data(HDR_GET_COMPRESS(hdr),
|
||||
hdr->b_l1hdr.b_pabd, &tmpbuf, lsize, hdr->b_complevel);
|
||||
ASSERT3P(tmpbuf, !=, NULL);
|
||||
hdr->b_l1hdr.b_pabd, &abd, lsize, hdr->b_complevel);
|
||||
ASSERT3P(abd, !=, NULL);
|
||||
ASSERT3U(csize, <=, psize);
|
||||
abd = abd_get_from_buf(tmpbuf, lsize);
|
||||
abd_take_ownership_of_buf(abd, B_TRUE);
|
||||
abd_zero_off(abd, csize, psize - csize);
|
||||
free_abd = B_TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1810,16 +1809,10 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
|
||||
|
||||
if (ret == 0)
|
||||
arc_hdr_clear_flags(hdr, ARC_FLAG_NOAUTH);
|
||||
else if (ret != ENOENT)
|
||||
goto error;
|
||||
else if (ret == ENOENT)
|
||||
ret = 0;
|
||||
|
||||
if (tmpbuf != NULL)
|
||||
abd_free(abd);
|
||||
|
||||
return (0);
|
||||
|
||||
error:
|
||||
if (tmpbuf != NULL)
|
||||
if (free_abd)
|
||||
abd_free(abd);
|
||||
|
||||
return (ret);
|
||||
@ -1836,7 +1829,6 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
|
||||
{
|
||||
int ret;
|
||||
abd_t *cabd = NULL;
|
||||
void *tmp = NULL;
|
||||
boolean_t no_crypt = B_FALSE;
|
||||
boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
|
||||
|
||||
@ -1871,17 +1863,14 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
|
||||
* linear buffer and wrapping it in an abd later.
|
||||
*/
|
||||
cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, 0);
|
||||
tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
|
||||
|
||||
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
|
||||
hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
|
||||
hdr->b_l1hdr.b_pabd, cabd, HDR_GET_PSIZE(hdr),
|
||||
HDR_GET_LSIZE(hdr), &hdr->b_complevel);
|
||||
if (ret != 0) {
|
||||
abd_return_buf(cabd, tmp, arc_hdr_size(hdr));
|
||||
goto error;
|
||||
}
|
||||
|
||||
abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
|
||||
arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
|
||||
arc_hdr_size(hdr), hdr);
|
||||
hdr->b_l1hdr.b_pabd = cabd;
|
||||
@ -2123,10 +2112,14 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
|
||||
/* Skip byteswapping and checksumming (already done) */
|
||||
return (0);
|
||||
} else {
|
||||
abd_t dabd;
|
||||
abd_get_from_buf_struct(&dabd, buf->b_data,
|
||||
HDR_GET_LSIZE(hdr));
|
||||
error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
|
||||
hdr->b_l1hdr.b_pabd, buf->b_data,
|
||||
hdr->b_l1hdr.b_pabd, &dabd,
|
||||
HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr),
|
||||
&hdr->b_complevel);
|
||||
abd_free(&dabd);
|
||||
|
||||
/*
|
||||
* Absent hardware errors or software bugs, this should
|
||||
@ -8531,18 +8524,15 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
|
||||
!HDR_COMPRESSION_ENABLED(hdr)) {
|
||||
abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
|
||||
ARC_HDR_USE_RESERVE);
|
||||
void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
|
||||
|
||||
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
|
||||
hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
|
||||
hdr->b_l1hdr.b_pabd, cabd, HDR_GET_PSIZE(hdr),
|
||||
HDR_GET_LSIZE(hdr), &hdr->b_complevel);
|
||||
if (ret != 0) {
|
||||
abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
|
||||
arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr);
|
||||
goto error;
|
||||
}
|
||||
|
||||
abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
|
||||
arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
|
||||
arc_hdr_size(hdr), hdr);
|
||||
hdr->b_l1hdr.b_pabd = cabd;
|
||||
@ -9037,9 +9027,8 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
|
||||
}
|
||||
|
||||
if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) {
|
||||
size_t bufsize = MAX(size, asize);
|
||||
void *buf = zio_buf_alloc(bufsize);
|
||||
uint64_t csize = zio_compress_data(compress, to_write, &buf,
|
||||
cabd = abd_alloc_for_io(MAX(size, asize), ismd);
|
||||
uint64_t csize = zio_compress_data(compress, to_write, &cabd,
|
||||
size, hdr->b_complevel);
|
||||
if (csize > psize) {
|
||||
/*
|
||||
@ -9047,13 +9036,12 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
|
||||
* psize. Even if it fits into asize, it does not
|
||||
* matter, since checksum will never match on read.
|
||||
*/
|
||||
zio_buf_free(buf, bufsize);
|
||||
abd_free(cabd);
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
if (asize > csize)
|
||||
memset((char *)buf + csize, 0, asize - csize);
|
||||
to_write = cabd = abd_get_from_buf(buf, bufsize);
|
||||
abd_take_ownership_of_buf(cabd, B_TRUE);
|
||||
abd_zero_off(cabd, csize, asize - csize);
|
||||
to_write = cabd;
|
||||
}
|
||||
|
||||
if (HDR_ENCRYPTED(hdr)) {
|
||||
@ -9158,12 +9146,17 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||
*/
|
||||
for (int pass = 0; pass < L2ARC_FEED_TYPES; pass++) {
|
||||
/*
|
||||
* If pass == 1 or 3, we cache MRU metadata and data
|
||||
* respectively.
|
||||
* pass == 0: MFU meta
|
||||
* pass == 1: MRU meta
|
||||
* pass == 2: MFU data
|
||||
* pass == 3: MRU data
|
||||
*/
|
||||
if (l2arc_mfuonly) {
|
||||
if (l2arc_mfuonly == 1) {
|
||||
if (pass == 1 || pass == 3)
|
||||
continue;
|
||||
} else if (l2arc_mfuonly > 1) {
|
||||
if (pass == 3)
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t passed_sz = 0;
|
||||
@ -10179,7 +10172,6 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
|
||||
{
|
||||
int err = 0;
|
||||
zio_cksum_t cksum;
|
||||
abd_t *abd = NULL;
|
||||
uint64_t asize;
|
||||
|
||||
ASSERT(this_lbp != NULL && next_lbp != NULL);
|
||||
@ -10241,16 +10233,22 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
|
||||
switch (L2BLK_GET_COMPRESS((this_lbp)->lbp_prop)) {
|
||||
case ZIO_COMPRESS_OFF:
|
||||
break;
|
||||
case ZIO_COMPRESS_LZ4:
|
||||
abd = abd_alloc_for_io(asize, B_TRUE);
|
||||
case ZIO_COMPRESS_LZ4: {
|
||||
abd_t *abd = abd_alloc_linear(asize, B_TRUE);
|
||||
abd_copy_from_buf_off(abd, this_lb, 0, asize);
|
||||
if ((err = zio_decompress_data(
|
||||
abd_t dabd;
|
||||
abd_get_from_buf_struct(&dabd, this_lb, sizeof (*this_lb));
|
||||
err = zio_decompress_data(
|
||||
L2BLK_GET_COMPRESS((this_lbp)->lbp_prop),
|
||||
abd, this_lb, asize, sizeof (*this_lb), NULL)) != 0) {
|
||||
abd, &dabd, asize, sizeof (*this_lb), NULL);
|
||||
abd_free(&dabd);
|
||||
abd_free(abd);
|
||||
if (err != 0) {
|
||||
err = SET_ERROR(EINVAL);
|
||||
goto cleanup;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
err = SET_ERROR(EINVAL);
|
||||
goto cleanup;
|
||||
@ -10267,8 +10265,6 @@ cleanup:
|
||||
l2arc_log_blk_fetch_abort(*next_io);
|
||||
*next_io = NULL;
|
||||
}
|
||||
if (abd != NULL)
|
||||
abd_free(abd);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -10504,7 +10500,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
|
||||
uint64_t psize, asize;
|
||||
zio_t *wzio;
|
||||
l2arc_lb_abd_buf_t *abd_buf;
|
||||
uint8_t *tmpbuf = NULL;
|
||||
abd_t *abd = NULL;
|
||||
l2arc_lb_ptr_buf_t *lb_ptr_buf;
|
||||
|
||||
VERIFY3S(dev->l2ad_log_ent_idx, ==, dev->l2ad_log_entries);
|
||||
@ -10527,7 +10523,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
|
||||
|
||||
/* try to compress the buffer */
|
||||
psize = zio_compress_data(ZIO_COMPRESS_LZ4,
|
||||
abd_buf->abd, (void **) &tmpbuf, sizeof (*lb), 0);
|
||||
abd_buf->abd, &abd, sizeof (*lb), 0);
|
||||
|
||||
/* a log block is never entirely zero */
|
||||
ASSERT(psize != 0);
|
||||
@ -10553,27 +10549,26 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
|
||||
ZIO_CHECKSUM_FLETCHER_4);
|
||||
if (asize < sizeof (*lb)) {
|
||||
/* compression succeeded */
|
||||
memset(tmpbuf + psize, 0, asize - psize);
|
||||
abd_zero_off(abd, psize, asize - psize);
|
||||
L2BLK_SET_COMPRESS(
|
||||
(&l2dhdr->dh_start_lbps[0])->lbp_prop,
|
||||
ZIO_COMPRESS_LZ4);
|
||||
} else {
|
||||
/* compression failed */
|
||||
memcpy(tmpbuf, lb, sizeof (*lb));
|
||||
abd_copy_from_buf_off(abd, lb, 0, sizeof (*lb));
|
||||
L2BLK_SET_COMPRESS(
|
||||
(&l2dhdr->dh_start_lbps[0])->lbp_prop,
|
||||
ZIO_COMPRESS_OFF);
|
||||
}
|
||||
|
||||
/* checksum what we're about to write */
|
||||
fletcher_4_native(tmpbuf, asize, NULL,
|
||||
abd_fletcher_4_native(abd, asize, NULL,
|
||||
&l2dhdr->dh_start_lbps[0].lbp_cksum);
|
||||
|
||||
abd_free(abd_buf->abd);
|
||||
|
||||
/* perform the write itself */
|
||||
abd_buf->abd = abd_get_from_buf(tmpbuf, sizeof (*lb));
|
||||
abd_take_ownership_of_buf(abd_buf->abd, B_TRUE);
|
||||
abd_buf->abd = abd;
|
||||
wzio = zio_write_phys(pio, dev->l2ad_vdev, dev->l2ad_hand,
|
||||
asize, abd_buf->abd, ZIO_CHECKSUM_OFF, NULL, NULL,
|
||||
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE);
|
||||
|
@ -142,8 +142,13 @@ decode_embedded_bp(const blkptr_t *bp, void *buf, int buflen)
|
||||
if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
|
||||
uint8_t dstbuf[BPE_PAYLOAD_SIZE];
|
||||
decode_embedded_bp_compressed(bp, dstbuf);
|
||||
VERIFY0(zio_decompress_data_buf(BP_GET_COMPRESS(bp),
|
||||
dstbuf, buf, psize, buflen, NULL));
|
||||
abd_t cabd, dabd;
|
||||
abd_get_from_buf_struct(&cabd, dstbuf, psize);
|
||||
abd_get_from_buf_struct(&dabd, buf, buflen);
|
||||
VERIFY0(zio_decompress_data(BP_GET_COMPRESS(bp), &cabd,
|
||||
&dabd, psize, buflen, NULL));
|
||||
abd_free(&dabd);
|
||||
abd_free(&cabd);
|
||||
} else {
|
||||
ASSERT3U(lsize, ==, psize);
|
||||
decode_embedded_bp_compressed(bp, buf);
|
||||
|
@ -204,6 +204,9 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
|
||||
void
|
||||
dataset_kstats_rename(dataset_kstats_t *dk, const char *name)
|
||||
{
|
||||
if (dk->dk_kstats == NULL)
|
||||
return;
|
||||
|
||||
dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data;
|
||||
char *ds_name;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
778
sys/contrib/openzfs/module/zfs/ddt_log.c
Normal file
778
sys/contrib/openzfs/module/zfs/ddt_log.c
Normal file
@ -0,0 +1,778 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/ddt_impl.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/dbuf.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
|
||||
/*
|
||||
* No more than this many txgs before swapping logs.
|
||||
*/
|
||||
uint_t zfs_dedup_log_txg_max = 8;
|
||||
|
||||
/*
|
||||
* Max memory for the log AVL trees. If zfs_dedup_log_mem_max is zero at module
|
||||
* load, it will be set to zfs_dedup_log_mem_max_percent% of total memory.
|
||||
*/
|
||||
uint64_t zfs_dedup_log_mem_max = 0;
|
||||
uint_t zfs_dedup_log_mem_max_percent = 1;
|
||||
|
||||
|
||||
static kmem_cache_t *ddt_log_entry_flat_cache;
|
||||
static kmem_cache_t *ddt_log_entry_trad_cache;
|
||||
|
||||
#define DDT_LOG_ENTRY_FLAT_SIZE \
|
||||
(sizeof (ddt_log_entry_t) + DDT_FLAT_PHYS_SIZE)
|
||||
#define DDT_LOG_ENTRY_TRAD_SIZE \
|
||||
(sizeof (ddt_log_entry_t) + DDT_TRAD_PHYS_SIZE)
|
||||
|
||||
#define DDT_LOG_ENTRY_SIZE(ddt) \
|
||||
_DDT_PHYS_SWITCH(ddt, DDT_LOG_ENTRY_FLAT_SIZE, DDT_LOG_ENTRY_TRAD_SIZE)
|
||||
|
||||
void
|
||||
ddt_log_init(void)
|
||||
{
|
||||
ddt_log_entry_flat_cache = kmem_cache_create("ddt_log_entry_flat_cache",
|
||||
DDT_LOG_ENTRY_FLAT_SIZE, 0, NULL, NULL, NULL, NULL, NULL, 0);
|
||||
ddt_log_entry_trad_cache = kmem_cache_create("ddt_log_entry_trad_cache",
|
||||
DDT_LOG_ENTRY_TRAD_SIZE, 0, NULL, NULL, NULL, NULL, NULL, 0);
|
||||
|
||||
/*
|
||||
* Max memory for log AVL entries. At least 1M, because we need
|
||||
* something (that's ~3800 entries per tree). They can say 100% if they
|
||||
* want; it just means they're at the mercy of the the txg flush limit.
|
||||
*/
|
||||
if (zfs_dedup_log_mem_max == 0) {
|
||||
zfs_dedup_log_mem_max_percent =
|
||||
MIN(zfs_dedup_log_mem_max_percent, 100);
|
||||
zfs_dedup_log_mem_max = (physmem * PAGESIZE) *
|
||||
zfs_dedup_log_mem_max_percent / 100;
|
||||
}
|
||||
zfs_dedup_log_mem_max = MAX(zfs_dedup_log_mem_max, 1*1024*1024);
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_fini(void)
|
||||
{
|
||||
kmem_cache_destroy(ddt_log_entry_trad_cache);
|
||||
kmem_cache_destroy(ddt_log_entry_flat_cache);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_name(ddt_t *ddt, char *name, uint_t n)
|
||||
{
|
||||
snprintf(name, DDT_NAMELEN, DMU_POOL_DDT_LOG,
|
||||
zio_checksum_table[ddt->ddt_checksum].ci_name, n);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_update_header(ddt_t *ddt, ddt_log_t *ddl, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
VERIFY0(dmu_bonus_hold(ddt->ddt_os, ddl->ddl_object, FTAG, &db));
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
ddt_log_header_t *hdr = (ddt_log_header_t *)db->db_data;
|
||||
DLH_SET_VERSION(hdr, 1);
|
||||
DLH_SET_FLAGS(hdr, ddl->ddl_flags);
|
||||
hdr->dlh_length = ddl->ddl_length;
|
||||
hdr->dlh_first_txg = ddl->ddl_first_txg;
|
||||
hdr->dlh_checkpoint = ddl->ddl_checkpoint;
|
||||
|
||||
dmu_buf_rele(db, FTAG);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_create_one(ddt_t *ddt, ddt_log_t *ddl, uint_t n, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT3U(ddt->ddt_dir_object, >, 0);
|
||||
ASSERT3U(ddl->ddl_object, ==, 0);
|
||||
|
||||
char name[DDT_NAMELEN];
|
||||
ddt_log_name(ddt, name, n);
|
||||
|
||||
ddl->ddl_object = dmu_object_alloc(ddt->ddt_os,
|
||||
DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
|
||||
DMU_OTN_UINT64_METADATA, sizeof (ddt_log_header_t), tx);
|
||||
VERIFY0(zap_add(ddt->ddt_os, ddt->ddt_dir_object, name,
|
||||
sizeof (uint64_t), 1, &ddl->ddl_object, tx));
|
||||
ddl->ddl_length = 0;
|
||||
ddl->ddl_first_txg = tx->tx_txg;
|
||||
ddt_log_update_header(ddt, ddl, tx);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_create(ddt_t *ddt, dmu_tx_t *tx)
|
||||
{
|
||||
ddt_log_create_one(ddt, ddt->ddt_log_active, 0, tx);
|
||||
ddt_log_create_one(ddt, ddt->ddt_log_flushing, 1, tx);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_destroy_one(ddt_t *ddt, ddt_log_t *ddl, uint_t n, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT3U(ddt->ddt_dir_object, >, 0);
|
||||
|
||||
if (ddl->ddl_object == 0)
|
||||
return;
|
||||
|
||||
ASSERT0(ddl->ddl_length);
|
||||
|
||||
char name[DDT_NAMELEN];
|
||||
ddt_log_name(ddt, name, n);
|
||||
|
||||
VERIFY0(zap_remove(ddt->ddt_os, ddt->ddt_dir_object, name, tx));
|
||||
VERIFY0(dmu_object_free(ddt->ddt_os, ddl->ddl_object, tx));
|
||||
|
||||
ddl->ddl_object = 0;
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_destroy(ddt_t *ddt, dmu_tx_t *tx)
|
||||
{
|
||||
ddt_log_destroy_one(ddt, ddt->ddt_log_active, 0, tx);
|
||||
ddt_log_destroy_one(ddt, ddt->ddt_log_flushing, 1, tx);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_update_stats(ddt_t *ddt)
|
||||
{
|
||||
/*
|
||||
* Log object stats. We count the number of live entries in the log
|
||||
* tree, even if there are more than on disk, and even if the same
|
||||
* entry is on both append and flush trees, because that's more what
|
||||
* the user expects to see. This does mean the on-disk size is not
|
||||
* really correlated with the number of entries, but I don't think
|
||||
* that's reasonable to expect anyway.
|
||||
*/
|
||||
dmu_object_info_t doi;
|
||||
uint64_t nblocks;
|
||||
dmu_object_info(ddt->ddt_os, ddt->ddt_log_active->ddl_object, &doi);
|
||||
nblocks = doi.doi_physical_blocks_512;
|
||||
dmu_object_info(ddt->ddt_os, ddt->ddt_log_flushing->ddl_object, &doi);
|
||||
nblocks += doi.doi_physical_blocks_512;
|
||||
|
||||
ddt_object_t *ddo = &ddt->ddt_log_stats;
|
||||
ddo->ddo_count =
|
||||
avl_numnodes(&ddt->ddt_log_active->ddl_tree) +
|
||||
avl_numnodes(&ddt->ddt_log_flushing->ddl_tree);
|
||||
ddo->ddo_mspace = ddo->ddo_count * DDT_LOG_ENTRY_SIZE(ddt);
|
||||
ddo->ddo_dspace = nblocks << 9;
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu)
|
||||
{
|
||||
ASSERT3U(nentries, >, 0);
|
||||
ASSERT3P(dlu->dlu_dbp, ==, NULL);
|
||||
|
||||
if (ddt->ddt_log_active->ddl_object == 0)
|
||||
ddt_log_create(ddt, tx);
|
||||
|
||||
/*
|
||||
* We want to store as many entries as we can in a block, but never
|
||||
* split an entry across block boundaries.
|
||||
*/
|
||||
size_t reclen = P2ALIGN_TYPED(
|
||||
sizeof (ddt_log_record_t) + sizeof (ddt_log_record_entry_t) +
|
||||
DDT_PHYS_SIZE(ddt), sizeof (uint64_t), size_t);
|
||||
ASSERT3U(reclen, <=, UINT16_MAX);
|
||||
dlu->dlu_reclen = reclen;
|
||||
|
||||
VERIFY0(dnode_hold(ddt->ddt_os, ddt->ddt_log_active->ddl_object, FTAG,
|
||||
&dlu->dlu_dn));
|
||||
dnode_set_storage_type(dlu->dlu_dn, DMU_OT_DDT_ZAP);
|
||||
|
||||
uint64_t nblocks = howmany(nentries,
|
||||
dlu->dlu_dn->dn_datablksz / dlu->dlu_reclen);
|
||||
uint64_t offset = ddt->ddt_log_active->ddl_length;
|
||||
uint64_t length = nblocks * dlu->dlu_dn->dn_datablksz;
|
||||
|
||||
VERIFY0(dmu_buf_hold_array_by_dnode(dlu->dlu_dn, offset, length,
|
||||
B_FALSE, FTAG, &dlu->dlu_ndbp, &dlu->dlu_dbp,
|
||||
DMU_READ_NO_PREFETCH));
|
||||
|
||||
dlu->dlu_tx = tx;
|
||||
dlu->dlu_block = dlu->dlu_offset = 0;
|
||||
}
|
||||
|
||||
static ddt_log_entry_t *
|
||||
ddt_log_alloc_entry(ddt_t *ddt)
|
||||
{
|
||||
ddt_log_entry_t *ddle;
|
||||
|
||||
if (ddt->ddt_flags & DDT_FLAG_FLAT) {
|
||||
ddle = kmem_cache_alloc(ddt_log_entry_flat_cache, KM_SLEEP);
|
||||
memset(ddle, 0, DDT_LOG_ENTRY_FLAT_SIZE);
|
||||
} else {
|
||||
ddle = kmem_cache_alloc(ddt_log_entry_trad_cache, KM_SLEEP);
|
||||
memset(ddle, 0, DDT_LOG_ENTRY_TRAD_SIZE);
|
||||
}
|
||||
|
||||
return (ddle);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
|
||||
{
|
||||
/* Create the log tree entry from a live or stored entry */
|
||||
avl_index_t where;
|
||||
ddt_log_entry_t *ddle =
|
||||
avl_find(&ddl->ddl_tree, &ddlwe->ddlwe_key, &where);
|
||||
if (ddle == NULL) {
|
||||
ddle = ddt_log_alloc_entry(ddt);
|
||||
ddle->ddle_key = ddlwe->ddlwe_key;
|
||||
avl_insert(&ddl->ddl_tree, ddle, where);
|
||||
}
|
||||
ddle->ddle_type = ddlwe->ddlwe_type;
|
||||
ddle->ddle_class = ddlwe->ddlwe_class;
|
||||
memcpy(ddle->ddle_phys, &ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, ddt_log_update_t *dlu)
|
||||
{
|
||||
ASSERT3U(dlu->dlu_dbp, !=, NULL);
|
||||
|
||||
ddt_log_update_entry(ddt, ddt->ddt_log_active, ddlwe);
|
||||
ddt_histogram_add_entry(ddt, &ddt->ddt_log_histogram, ddlwe);
|
||||
|
||||
/* Get our block */
|
||||
ASSERT3U(dlu->dlu_block, <, dlu->dlu_ndbp);
|
||||
dmu_buf_t *db = dlu->dlu_dbp[dlu->dlu_block];
|
||||
|
||||
/*
|
||||
* If this would take us past the end of the block, finish it and
|
||||
* move to the next one.
|
||||
*/
|
||||
if (db->db_size < (dlu->dlu_offset + dlu->dlu_reclen)) {
|
||||
ASSERT3U(dlu->dlu_offset, >, 0);
|
||||
dmu_buf_fill_done(db, dlu->dlu_tx, B_FALSE);
|
||||
dlu->dlu_block++;
|
||||
dlu->dlu_offset = 0;
|
||||
ASSERT3U(dlu->dlu_block, <, dlu->dlu_ndbp);
|
||||
db = dlu->dlu_dbp[dlu->dlu_block];
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is the first time touching the block, inform the DMU that
|
||||
* we will fill it, and zero it out.
|
||||
*/
|
||||
if (dlu->dlu_offset == 0) {
|
||||
dmu_buf_will_fill(db, dlu->dlu_tx, B_FALSE);
|
||||
memset(db->db_data, 0, db->db_size);
|
||||
}
|
||||
|
||||
/* Create the log record directly in the buffer */
|
||||
ddt_log_record_t *dlr = (db->db_data + dlu->dlu_offset);
|
||||
DLR_SET_TYPE(dlr, DLR_ENTRY);
|
||||
DLR_SET_RECLEN(dlr, dlu->dlu_reclen);
|
||||
DLR_SET_ENTRY_TYPE(dlr, ddlwe->ddlwe_type);
|
||||
DLR_SET_ENTRY_CLASS(dlr, ddlwe->ddlwe_class);
|
||||
|
||||
ddt_log_record_entry_t *dlre =
|
||||
(ddt_log_record_entry_t *)&dlr->dlr_payload;
|
||||
dlre->dlre_key = ddlwe->ddlwe_key;
|
||||
memcpy(dlre->dlre_phys, &ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));
|
||||
|
||||
/* Advance offset for next record. */
|
||||
dlu->dlu_offset += dlu->dlu_reclen;
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu)
|
||||
{
|
||||
ASSERT3U(dlu->dlu_dbp, !=, NULL);
|
||||
ASSERT3U(dlu->dlu_block+1, ==, dlu->dlu_ndbp);
|
||||
ASSERT3U(dlu->dlu_offset, >, 0);
|
||||
|
||||
/*
|
||||
* Close out the last block. Whatever we haven't used will be zeroed,
|
||||
* which matches DLR_INVALID, so we can detect this during load.
|
||||
*/
|
||||
dmu_buf_fill_done(dlu->dlu_dbp[dlu->dlu_block], dlu->dlu_tx, B_FALSE);
|
||||
|
||||
dmu_buf_rele_array(dlu->dlu_dbp, dlu->dlu_ndbp, FTAG);
|
||||
|
||||
ddt->ddt_log_active->ddl_length +=
|
||||
dlu->dlu_ndbp * (uint64_t)dlu->dlu_dn->dn_datablksz;
|
||||
dnode_rele(dlu->dlu_dn, FTAG);
|
||||
|
||||
ddt_log_update_header(ddt, ddt->ddt_log_active, dlu->dlu_tx);
|
||||
|
||||
memset(dlu, 0, sizeof (ddt_log_update_t));
|
||||
|
||||
ddt_log_update_stats(ddt);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
|
||||
{
|
||||
ddt_log_entry_t *ddle = avl_first(&ddl->ddl_tree);
|
||||
if (ddle == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe);
|
||||
|
||||
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, ddlwe);
|
||||
|
||||
avl_remove(&ddl->ddl_tree, ddle);
|
||||
kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
|
||||
ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk)
|
||||
{
|
||||
ddt_log_entry_t *ddle = avl_find(&ddl->ddl_tree, ddk, NULL);
|
||||
if (ddle == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
ddt_lightweight_entry_t ddlwe;
|
||||
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, &ddlwe);
|
||||
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, &ddlwe);
|
||||
|
||||
avl_remove(&ddl->ddl_tree, ddle);
|
||||
kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
|
||||
ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
|
||||
ddt_lightweight_entry_t *ddlwe)
|
||||
{
|
||||
ddt_log_entry_t *ddle =
|
||||
avl_find(&ddt->ddt_log_active->ddl_tree, ddk, NULL);
|
||||
if (!ddle)
|
||||
ddle = avl_find(&ddt->ddt_log_flushing->ddl_tree, ddk, NULL);
|
||||
if (!ddle)
|
||||
return (B_FALSE);
|
||||
if (ddlwe)
|
||||
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_checkpoint(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx)
|
||||
{
|
||||
ddt_log_t *ddl = ddt->ddt_log_flushing;
|
||||
|
||||
ASSERT3U(ddl->ddl_object, !=, 0);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
/*
|
||||
* There should not be any entries on the log tree before the given
|
||||
* checkpoint. Assert that this is the case.
|
||||
*/
|
||||
ddt_log_entry_t *ddle = avl_first(&ddl->ddl_tree);
|
||||
if (ddle != NULL)
|
||||
VERIFY3U(ddt_key_compare(&ddle->ddle_key, &ddlwe->ddlwe_key),
|
||||
>, 0);
|
||||
#endif
|
||||
|
||||
ddl->ddl_flags |= DDL_FLAG_CHECKPOINT;
|
||||
ddl->ddl_checkpoint = ddlwe->ddlwe_key;
|
||||
ddt_log_update_header(ddt, ddl, tx);
|
||||
|
||||
ddt_log_update_stats(ddt);
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_truncate(ddt_t *ddt, dmu_tx_t *tx)
|
||||
{
|
||||
ddt_log_t *ddl = ddt->ddt_log_flushing;
|
||||
|
||||
if (ddl->ddl_object == 0)
|
||||
return;
|
||||
|
||||
ASSERT(avl_is_empty(&ddl->ddl_tree));
|
||||
|
||||
/* Eject the entire object */
|
||||
dmu_free_range(ddt->ddt_os, ddl->ddl_object, 0, DMU_OBJECT_END, tx);
|
||||
|
||||
ddl->ddl_length = 0;
|
||||
ddl->ddl_flags &= ~DDL_FLAG_CHECKPOINT;
|
||||
memset(&ddl->ddl_checkpoint, 0, sizeof (ddt_key_t));
|
||||
ddt_log_update_header(ddt, ddl, tx);
|
||||
|
||||
ddt_log_update_stats(ddt);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
ddt_log_swap(ddt_t *ddt, dmu_tx_t *tx)
|
||||
{
|
||||
/* Swap the logs. The old flushing one must be empty */
|
||||
VERIFY(avl_is_empty(&ddt->ddt_log_flushing->ddl_tree));
|
||||
|
||||
/*
|
||||
* If there are still blocks on the flushing log, truncate it first.
|
||||
* This can happen if there were entries on the flushing log that were
|
||||
* removed in memory via ddt_lookup(); their vestigal remains are
|
||||
* on disk.
|
||||
*/
|
||||
if (ddt->ddt_log_flushing->ddl_length > 0)
|
||||
ddt_log_truncate(ddt, tx);
|
||||
|
||||
/*
|
||||
* Swap policy. We swap the logs (and so begin flushing) when the
|
||||
* active tree grows too large, or when we haven't swapped it in
|
||||
* some amount of time, or if something has requested the logs be
|
||||
* flushed ASAP (see ddt_walk_init()).
|
||||
*/
|
||||
|
||||
/*
|
||||
* The log tree is too large if the memory usage of its entries is over
|
||||
* half of the memory limit. This effectively gives each log tree half
|
||||
* the available memory.
|
||||
*/
|
||||
const boolean_t too_large =
|
||||
(avl_numnodes(&ddt->ddt_log_active->ddl_tree) *
|
||||
DDT_LOG_ENTRY_SIZE(ddt)) >= (zfs_dedup_log_mem_max >> 1);
|
||||
|
||||
const boolean_t too_old =
|
||||
tx->tx_txg >=
|
||||
(ddt->ddt_log_active->ddl_first_txg +
|
||||
MAX(1, zfs_dedup_log_txg_max));
|
||||
|
||||
const boolean_t force =
|
||||
ddt->ddt_log_active->ddl_first_txg <= ddt->ddt_flush_force_txg;
|
||||
|
||||
if (!(too_large || too_old || force))
|
||||
return (B_FALSE);
|
||||
|
||||
ddt_log_t *swap = ddt->ddt_log_active;
|
||||
ddt->ddt_log_active = ddt->ddt_log_flushing;
|
||||
ddt->ddt_log_flushing = swap;
|
||||
|
||||
ASSERT(ddt->ddt_log_active->ddl_flags & DDL_FLAG_FLUSHING);
|
||||
ddt->ddt_log_active->ddl_flags &=
|
||||
~(DDL_FLAG_FLUSHING | DDL_FLAG_CHECKPOINT);
|
||||
|
||||
ASSERT(!(ddt->ddt_log_flushing->ddl_flags & DDL_FLAG_FLUSHING));
|
||||
ddt->ddt_log_flushing->ddl_flags |= DDL_FLAG_FLUSHING;
|
||||
|
||||
ddt->ddt_log_active->ddl_first_txg = tx->tx_txg;
|
||||
|
||||
ddt_log_update_header(ddt, ddt->ddt_log_active, tx);
|
||||
ddt_log_update_header(ddt, ddt->ddt_log_flushing, tx);
|
||||
|
||||
ddt_log_update_stats(ddt);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
static inline void
|
||||
ddt_log_load_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_log_record_t *dlr,
|
||||
const ddt_key_t *checkpoint)
|
||||
{
|
||||
ASSERT3U(DLR_GET_TYPE(dlr), ==, DLR_ENTRY);
|
||||
|
||||
ddt_log_record_entry_t *dlre =
|
||||
(ddt_log_record_entry_t *)dlr->dlr_payload;
|
||||
if (checkpoint != NULL &&
|
||||
ddt_key_compare(&dlre->dlre_key, checkpoint) <= 0) {
|
||||
/* Skip pre-checkpoint entries; they're already flushed. */
|
||||
return;
|
||||
}
|
||||
|
||||
ddt_lightweight_entry_t ddlwe;
|
||||
ddlwe.ddlwe_type = DLR_GET_ENTRY_TYPE(dlr);
|
||||
ddlwe.ddlwe_class = DLR_GET_ENTRY_CLASS(dlr);
|
||||
|
||||
ddlwe.ddlwe_key = dlre->dlre_key;
|
||||
memcpy(&ddlwe.ddlwe_phys, dlre->dlre_phys, DDT_PHYS_SIZE(ddt));
|
||||
|
||||
ddt_log_update_entry(ddt, ddl, &ddlwe);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_empty(ddt_t *ddt, ddt_log_t *ddl)
|
||||
{
|
||||
void *cookie = NULL;
|
||||
ddt_log_entry_t *ddle;
|
||||
IMPLY(ddt->ddt_version == UINT64_MAX, avl_is_empty(&ddl->ddl_tree));
|
||||
while ((ddle =
|
||||
avl_destroy_nodes(&ddl->ddl_tree, &cookie)) != NULL) {
|
||||
kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
|
||||
ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
|
||||
}
|
||||
ASSERT(avl_is_empty(&ddl->ddl_tree));
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_log_load_one(ddt_t *ddt, uint_t n)
|
||||
{
|
||||
ASSERT3U(n, <, 2);
|
||||
|
||||
ddt_log_t *ddl = &ddt->ddt_log[n];
|
||||
|
||||
char name[DDT_NAMELEN];
|
||||
ddt_log_name(ddt, name, n);
|
||||
|
||||
uint64_t obj;
|
||||
int err = zap_lookup(ddt->ddt_os, ddt->ddt_dir_object, name,
|
||||
sizeof (uint64_t), 1, &obj);
|
||||
if (err == ENOENT)
|
||||
return (0);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
dnode_t *dn;
|
||||
err = dnode_hold(ddt->ddt_os, obj, FTAG, &dn);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
ddt_log_header_t hdr;
|
||||
dmu_buf_t *db;
|
||||
err = dmu_bonus_hold_by_dnode(dn, FTAG, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0) {
|
||||
dnode_rele(dn, FTAG);
|
||||
return (err);
|
||||
}
|
||||
memcpy(&hdr, db->db_data, sizeof (ddt_log_header_t));
|
||||
dmu_buf_rele(db, FTAG);
|
||||
|
||||
if (DLH_GET_VERSION(&hdr) != 1) {
|
||||
dnode_rele(dn, FTAG);
|
||||
zfs_dbgmsg("ddt_log_load: spa=%s ddt_log=%s "
|
||||
"unknown version=%llu", spa_name(ddt->ddt_spa), name,
|
||||
(u_longlong_t)DLH_GET_VERSION(&hdr));
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
ddt_key_t *checkpoint = NULL;
|
||||
if (DLH_GET_FLAGS(&hdr) & DDL_FLAG_CHECKPOINT) {
|
||||
/*
|
||||
* If the log has a checkpoint, then we can ignore any entries
|
||||
* that have already been flushed.
|
||||
*/
|
||||
ASSERT(DLH_GET_FLAGS(&hdr) & DDL_FLAG_FLUSHING);
|
||||
checkpoint = &hdr.dlh_checkpoint;
|
||||
}
|
||||
|
||||
if (hdr.dlh_length > 0) {
|
||||
dmu_prefetch_by_dnode(dn, 0, 0, hdr.dlh_length,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
|
||||
for (uint64_t offset = 0; offset < hdr.dlh_length;
|
||||
offset += dn->dn_datablksz) {
|
||||
err = dmu_buf_hold_by_dnode(dn, offset, FTAG, &db,
|
||||
DMU_READ_PREFETCH);
|
||||
if (err != 0) {
|
||||
dnode_rele(dn, FTAG);
|
||||
ddt_log_empty(ddt, ddl);
|
||||
return (err);
|
||||
}
|
||||
|
||||
uint64_t boffset = 0;
|
||||
while (boffset < db->db_size) {
|
||||
ddt_log_record_t *dlr =
|
||||
(ddt_log_record_t *)(db->db_data + boffset);
|
||||
|
||||
/* Partially-filled block, skip the rest */
|
||||
if (DLR_GET_TYPE(dlr) == DLR_INVALID)
|
||||
break;
|
||||
|
||||
switch (DLR_GET_TYPE(dlr)) {
|
||||
case DLR_ENTRY:
|
||||
ddt_log_load_entry(ddt, ddl, dlr,
|
||||
checkpoint);
|
||||
break;
|
||||
|
||||
default:
|
||||
dmu_buf_rele(db, FTAG);
|
||||
dnode_rele(dn, FTAG);
|
||||
ddt_log_empty(ddt, ddl);
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
boffset += DLR_GET_RECLEN(dlr);
|
||||
}
|
||||
|
||||
dmu_buf_rele(db, FTAG);
|
||||
}
|
||||
}
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
ddl->ddl_object = obj;
|
||||
ddl->ddl_flags = DLH_GET_FLAGS(&hdr);
|
||||
ddl->ddl_length = hdr.dlh_length;
|
||||
ddl->ddl_first_txg = hdr.dlh_first_txg;
|
||||
|
||||
if (ddl->ddl_flags & DDL_FLAG_FLUSHING)
|
||||
ddt->ddt_log_flushing = ddl;
|
||||
else
|
||||
ddt->ddt_log_active = ddl;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ddt_log_load(ddt_t *ddt)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (spa_load_state(ddt->ddt_spa) == SPA_LOAD_TRYIMPORT) {
|
||||
/*
|
||||
* The DDT is going to be freed again in a moment, so there's
|
||||
* no point loading the log; it'll just slow down import.
|
||||
*/
|
||||
return (0);
|
||||
}
|
||||
|
||||
ASSERT0(ddt->ddt_log[0].ddl_object);
|
||||
ASSERT0(ddt->ddt_log[1].ddl_object);
|
||||
if (ddt->ddt_dir_object == 0) {
|
||||
/*
|
||||
* If we're configured but the containing dir doesn't exist
|
||||
* yet, then the log object can't possibly exist either.
|
||||
*/
|
||||
ASSERT3U(ddt->ddt_version, !=, UINT64_MAX);
|
||||
return (SET_ERROR(ENOENT));
|
||||
}
|
||||
|
||||
if ((err = ddt_log_load_one(ddt, 0)) != 0)
|
||||
return (err);
|
||||
if ((err = ddt_log_load_one(ddt, 1)) != 0)
|
||||
return (err);
|
||||
|
||||
VERIFY3P(ddt->ddt_log_active, !=, ddt->ddt_log_flushing);
|
||||
VERIFY(!(ddt->ddt_log_active->ddl_flags & DDL_FLAG_FLUSHING));
|
||||
VERIFY(!(ddt->ddt_log_active->ddl_flags & DDL_FLAG_CHECKPOINT));
|
||||
VERIFY(ddt->ddt_log_flushing->ddl_flags & DDL_FLAG_FLUSHING);
|
||||
|
||||
/*
|
||||
* We have two finalisation tasks:
|
||||
*
|
||||
* - rebuild the histogram. We do this at the end rather than while
|
||||
* we're loading so we don't need to uncount and recount entries that
|
||||
* appear multiple times in the log.
|
||||
*
|
||||
* - remove entries from the flushing tree that are on both trees. This
|
||||
* happens when ddt_lookup() rehydrates an entry from the flushing
|
||||
* tree, as ddt_log_take_key() removes the entry from the in-memory
|
||||
* tree but doesn't remove it from disk.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We don't technically need a config lock here, since there shouldn't
|
||||
* be pool config changes during DDT load. dva_get_dsize_sync() via
|
||||
* ddt_stat_generate() is expecting it though, and it won't hurt
|
||||
* anything, so we take it.
|
||||
*/
|
||||
spa_config_enter(ddt->ddt_spa, SCL_STATE, FTAG, RW_READER);
|
||||
|
||||
avl_tree_t *al = &ddt->ddt_log_active->ddl_tree;
|
||||
avl_tree_t *fl = &ddt->ddt_log_flushing->ddl_tree;
|
||||
ddt_log_entry_t *ae = avl_first(al);
|
||||
ddt_log_entry_t *fe = avl_first(fl);
|
||||
while (ae != NULL || fe != NULL) {
|
||||
ddt_log_entry_t *ddle;
|
||||
if (ae == NULL) {
|
||||
/* active exhausted, take flushing */
|
||||
ddle = fe;
|
||||
fe = AVL_NEXT(fl, fe);
|
||||
} else if (fe == NULL) {
|
||||
/* flushing exuhausted, take active */
|
||||
ddle = ae;
|
||||
ae = AVL_NEXT(al, ae);
|
||||
} else {
|
||||
/* compare active and flushing */
|
||||
int c = ddt_key_compare(&ae->ddle_key, &fe->ddle_key);
|
||||
if (c < 0) {
|
||||
/* active behind, take and advance */
|
||||
ddle = ae;
|
||||
ae = AVL_NEXT(al, ae);
|
||||
} else if (c > 0) {
|
||||
/* flushing behind, take and advance */
|
||||
ddle = fe;
|
||||
fe = AVL_NEXT(fl, fe);
|
||||
} else {
|
||||
/* match. remove from flushing, take active */
|
||||
ddle = fe;
|
||||
fe = AVL_NEXT(fl, fe);
|
||||
avl_remove(fl, ddle);
|
||||
|
||||
ddle = ae;
|
||||
ae = AVL_NEXT(al, ae);
|
||||
}
|
||||
}
|
||||
|
||||
ddt_lightweight_entry_t ddlwe;
|
||||
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, &ddlwe);
|
||||
ddt_histogram_add_entry(ddt, &ddt->ddt_log_histogram, &ddlwe);
|
||||
}
|
||||
|
||||
spa_config_exit(ddt->ddt_spa, SCL_STATE, FTAG);
|
||||
|
||||
ddt_log_update_stats(ddt);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_alloc(ddt_t *ddt)
|
||||
{
|
||||
ASSERT3P(ddt->ddt_log_active, ==, NULL);
|
||||
ASSERT3P(ddt->ddt_log_flushing, ==, NULL);
|
||||
|
||||
avl_create(&ddt->ddt_log[0].ddl_tree, ddt_key_compare,
|
||||
sizeof (ddt_log_entry_t), offsetof(ddt_log_entry_t, ddle_node));
|
||||
avl_create(&ddt->ddt_log[1].ddl_tree, ddt_key_compare,
|
||||
sizeof (ddt_log_entry_t), offsetof(ddt_log_entry_t, ddle_node));
|
||||
ddt->ddt_log_active = &ddt->ddt_log[0];
|
||||
ddt->ddt_log_flushing = &ddt->ddt_log[1];
|
||||
ddt->ddt_log_flushing->ddl_flags |= DDL_FLAG_FLUSHING;
|
||||
}
|
||||
|
||||
void
|
||||
ddt_log_free(ddt_t *ddt)
|
||||
{
|
||||
ddt_log_empty(ddt, &ddt->ddt_log[0]);
|
||||
ddt_log_empty(ddt, &ddt->ddt_log[1]);
|
||||
avl_destroy(&ddt->ddt_log[0].ddl_tree);
|
||||
avl_destroy(&ddt->ddt_log[1].ddl_tree);
|
||||
}
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_dedup, zfs_dedup_, log_txg_max, UINT, ZMOD_RW,
|
||||
"Max transactions before starting to flush dedup logs");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_dedup, zfs_dedup_, log_mem_max, U64, ZMOD_RD,
|
||||
"Max memory for dedup logs");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_dedup, zfs_dedup_, log_mem_max_percent, UINT, ZMOD_RD,
|
||||
"Max memory for dedup logs, as % of total memory");
|
@ -33,27 +33,32 @@
|
||||
#include <sys/ddt_impl.h>
|
||||
|
||||
static void
|
||||
ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
|
||||
ddt_stat_generate(ddt_t *ddt, const ddt_lightweight_entry_t *ddlwe,
|
||||
ddt_stat_t *dds)
|
||||
{
|
||||
spa_t *spa = ddt->ddt_spa;
|
||||
ddt_phys_t *ddp = dde->dde_phys;
|
||||
ddt_key_t *ddk = &dde->dde_key;
|
||||
uint64_t lsize = DDK_GET_LSIZE(ddk);
|
||||
uint64_t psize = DDK_GET_PSIZE(ddk);
|
||||
uint64_t lsize = DDK_GET_LSIZE(&ddlwe->ddlwe_key);
|
||||
uint64_t psize = DDK_GET_PSIZE(&ddlwe->ddlwe_key);
|
||||
|
||||
memset(dds, 0, sizeof (*dds));
|
||||
|
||||
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
|
||||
uint64_t dsize = 0;
|
||||
uint64_t refcnt = ddp->ddp_refcnt;
|
||||
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
|
||||
const ddt_univ_phys_t *ddp = &ddlwe->ddlwe_phys;
|
||||
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
|
||||
|
||||
if (ddp->ddp_phys_birth == 0)
|
||||
if (ddt_phys_birth(ddp, v) == 0)
|
||||
continue;
|
||||
|
||||
int ndvas = DDK_GET_CRYPT(&dde->dde_key) ?
|
||||
SPA_DVAS_PER_BP - 1 : SPA_DVAS_PER_BP;
|
||||
int ndvas = ddt_phys_dva_count(ddp, v,
|
||||
DDK_GET_CRYPT(&ddlwe->ddlwe_key));
|
||||
const dva_t *dvas = (ddt->ddt_flags & DDT_FLAG_FLAT) ?
|
||||
ddp->ddp_flat.ddp_dva : ddp->ddp_trad[p].ddp_dva;
|
||||
|
||||
uint64_t dsize = 0;
|
||||
for (int d = 0; d < ndvas; d++)
|
||||
dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
|
||||
dsize += dva_get_dsize_sync(spa, &dvas[d]);
|
||||
|
||||
uint64_t refcnt = ddt_phys_refcnt(ddp, v);
|
||||
|
||||
dds->dds_blocks += 1;
|
||||
dds->dds_lsize += lsize;
|
||||
@ -67,61 +72,108 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg)
|
||||
static void
|
||||
ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src)
|
||||
{
|
||||
const uint64_t *s = (const uint64_t *)src;
|
||||
uint64_t *d = (uint64_t *)dst;
|
||||
uint64_t *d_end = (uint64_t *)(dst + 1);
|
||||
dst->dds_blocks += src->dds_blocks;
|
||||
dst->dds_lsize += src->dds_lsize;
|
||||
dst->dds_psize += src->dds_psize;
|
||||
dst->dds_dsize += src->dds_dsize;
|
||||
dst->dds_ref_blocks += src->dds_ref_blocks;
|
||||
dst->dds_ref_lsize += src->dds_ref_lsize;
|
||||
dst->dds_ref_psize += src->dds_ref_psize;
|
||||
dst->dds_ref_dsize += src->dds_ref_dsize;
|
||||
}
|
||||
|
||||
ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */
|
||||
static void
|
||||
ddt_stat_sub(ddt_stat_t *dst, const ddt_stat_t *src)
|
||||
{
|
||||
/* This caught more during development than you might expect... */
|
||||
ASSERT3U(dst->dds_blocks, >=, src->dds_blocks);
|
||||
ASSERT3U(dst->dds_lsize, >=, src->dds_lsize);
|
||||
ASSERT3U(dst->dds_psize, >=, src->dds_psize);
|
||||
ASSERT3U(dst->dds_dsize, >=, src->dds_dsize);
|
||||
ASSERT3U(dst->dds_ref_blocks, >=, src->dds_ref_blocks);
|
||||
ASSERT3U(dst->dds_ref_lsize, >=, src->dds_ref_lsize);
|
||||
ASSERT3U(dst->dds_ref_psize, >=, src->dds_ref_psize);
|
||||
ASSERT3U(dst->dds_ref_dsize, >=, src->dds_ref_dsize);
|
||||
|
||||
for (int i = 0; i < d_end - d; i++)
|
||||
d[i] += (s[i] ^ neg) - neg;
|
||||
dst->dds_blocks -= src->dds_blocks;
|
||||
dst->dds_lsize -= src->dds_lsize;
|
||||
dst->dds_psize -= src->dds_psize;
|
||||
dst->dds_dsize -= src->dds_dsize;
|
||||
dst->dds_ref_blocks -= src->dds_ref_blocks;
|
||||
dst->dds_ref_lsize -= src->dds_ref_lsize;
|
||||
dst->dds_ref_psize -= src->dds_ref_psize;
|
||||
dst->dds_ref_dsize -= src->dds_ref_dsize;
|
||||
}
|
||||
|
||||
void
|
||||
ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
|
||||
ddt_histogram_add_entry(ddt_t *ddt, ddt_histogram_t *ddh,
|
||||
const ddt_lightweight_entry_t *ddlwe)
|
||||
{
|
||||
ddt_stat_t dds;
|
||||
ddt_histogram_t *ddh;
|
||||
int bucket;
|
||||
|
||||
ddt_stat_generate(ddt, dde, &dds);
|
||||
ddt_stat_generate(ddt, ddlwe, &dds);
|
||||
|
||||
bucket = highbit64(dds.dds_ref_blocks) - 1;
|
||||
ASSERT3U(bucket, >=, 0);
|
||||
if (bucket < 0)
|
||||
return;
|
||||
|
||||
ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];
|
||||
ddt_stat_add(&ddh->ddh_stat[bucket], &dds);
|
||||
}
|
||||
|
||||
ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg);
|
||||
void
|
||||
ddt_histogram_sub_entry(ddt_t *ddt, ddt_histogram_t *ddh,
|
||||
const ddt_lightweight_entry_t *ddlwe)
|
||||
{
|
||||
ddt_stat_t dds;
|
||||
int bucket;
|
||||
|
||||
ddt_stat_generate(ddt, ddlwe, &dds);
|
||||
|
||||
bucket = highbit64(dds.dds_ref_blocks) - 1;
|
||||
if (bucket < 0)
|
||||
return;
|
||||
|
||||
ddt_stat_sub(&ddh->ddh_stat[bucket], &dds);
|
||||
}
|
||||
|
||||
void
|
||||
ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
|
||||
{
|
||||
for (int h = 0; h < 64; h++)
|
||||
ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0);
|
||||
ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h]);
|
||||
}
|
||||
|
||||
void
|
||||
ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh)
|
||||
ddt_histogram_total(ddt_stat_t *dds, const ddt_histogram_t *ddh)
|
||||
{
|
||||
memset(dds, 0, sizeof (*dds));
|
||||
|
||||
for (int h = 0; h < 64; h++)
|
||||
ddt_stat_add(dds, &ddh->ddh_stat[h], 0);
|
||||
ddt_stat_add(dds, &ddh->ddh_stat[h]);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
ddt_histogram_empty(const ddt_histogram_t *ddh)
|
||||
{
|
||||
const uint64_t *s = (const uint64_t *)ddh;
|
||||
const uint64_t *s_end = (const uint64_t *)(ddh + 1);
|
||||
for (int h = 0; h < 64; h++) {
|
||||
const ddt_stat_t *dds = &ddh->ddh_stat[h];
|
||||
|
||||
while (s < s_end)
|
||||
if (*s++ != 0)
|
||||
return (B_FALSE);
|
||||
if (dds->dds_blocks == 0 &&
|
||||
dds->dds_lsize == 0 &&
|
||||
dds->dds_psize == 0 &&
|
||||
dds->dds_dsize == 0 &&
|
||||
dds->dds_ref_blocks == 0 &&
|
||||
dds->dds_ref_lsize == 0 &&
|
||||
dds->dds_ref_psize == 0 &&
|
||||
dds->dds_ref_dsize == 0)
|
||||
continue;
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
@ -170,6 +222,11 @@ ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total)
|
||||
ddo_total->ddo_mspace += ddo->ddo_mspace;
|
||||
}
|
||||
}
|
||||
|
||||
ddt_object_t *ddo = &ddt->ddt_log_stats;
|
||||
ddo_total->ddo_count += ddo->ddo_count;
|
||||
ddo_total->ddo_dspace += ddo->ddo_dspace;
|
||||
ddo_total->ddo_mspace += ddo->ddo_mspace;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -207,6 +264,8 @@ ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
|
||||
&ddt->ddt_histogram_cache[type][class]);
|
||||
}
|
||||
}
|
||||
|
||||
ddt_histogram_add(ddh, &ddt->ddt_log_histogram);
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,7 +276,7 @@ ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
|
||||
|
||||
ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
|
||||
ddt_get_dedup_histogram(spa, ddh_total);
|
||||
ddt_histogram_stat(dds_total, ddh_total);
|
||||
ddt_histogram_total(dds_total, ddh_total);
|
||||
kmem_free(ddh_total, sizeof (ddt_histogram_t));
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
/*
|
||||
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -51,8 +52,13 @@ ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)
|
||||
|
||||
ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */
|
||||
|
||||
c_len = ci->ci_compress((void *)src, dst, s_len, d_len - 1,
|
||||
ci->ci_level);
|
||||
/* Call compress function directly to avoid hole detection. */
|
||||
abd_t sabd, dabd;
|
||||
abd_get_from_buf_struct(&sabd, (void *)src, s_len);
|
||||
abd_get_from_buf_struct(&dabd, dst, d_len);
|
||||
c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level);
|
||||
abd_free(&dabd);
|
||||
abd_free(&sabd);
|
||||
|
||||
if (c_len == s_len) {
|
||||
cpfunc = ZIO_COMPRESS_OFF;
|
||||
@ -71,12 +77,18 @@ ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
|
||||
{
|
||||
uchar_t version = *src++;
|
||||
int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK;
|
||||
zio_compress_info_t *ci = &zio_compress_table[cpfunc];
|
||||
|
||||
if (ci->ci_decompress != NULL)
|
||||
(void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level);
|
||||
else
|
||||
if (zio_compress_table[cpfunc].ci_decompress == NULL) {
|
||||
memcpy(dst, src, d_len);
|
||||
return;
|
||||
}
|
||||
|
||||
abd_t sabd, dabd;
|
||||
abd_get_from_buf_struct(&sabd, src, s_len);
|
||||
abd_get_from_buf_struct(&dabd, dst, d_len);
|
||||
VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL));
|
||||
abd_free(&dabd);
|
||||
abd_free(&sabd);
|
||||
|
||||
if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) !=
|
||||
(ZFS_HOST_BYTEORDER != 0))
|
||||
@ -108,7 +120,7 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
|
||||
|
||||
static int
|
||||
ddt_zap_lookup(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize)
|
||||
const ddt_key_t *ddk, void *phys, size_t psize)
|
||||
{
|
||||
uchar_t *cbuf;
|
||||
uint64_t one, csize;
|
||||
@ -155,7 +167,7 @@ ddt_zap_prefetch_all(objset_t *os, uint64_t object)
|
||||
|
||||
static int
|
||||
ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
|
||||
const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx)
|
||||
const void *phys, size_t psize, dmu_tx_t *tx)
|
||||
{
|
||||
const size_t cbuf_size = psize + 1;
|
||||
|
||||
@ -181,7 +193,7 @@ ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
|
||||
|
||||
static int
|
||||
ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
|
||||
ddt_phys_t *phys, size_t psize)
|
||||
void *phys, size_t psize)
|
||||
{
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
|
@ -95,6 +95,12 @@ uint_t dmu_prefetch_max = 8 * 1024 * 1024;
|
||||
uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Override copies= for dedup state objects. 0 means the traditional behaviour
|
||||
* (ie the default for the containing objset ie 3 for the MOS).
|
||||
*/
|
||||
uint_t dmu_ddt_copies = 0;
|
||||
|
||||
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
||||
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
|
||||
{DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" },
|
||||
@ -2272,6 +2278,28 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
case ZFS_REDUNDANT_METADATA_NONE:
|
||||
break;
|
||||
}
|
||||
|
||||
if (dmu_ddt_copies > 0) {
|
||||
/*
|
||||
* If this tuneable is set, and this is a write for a
|
||||
* dedup entry store (zap or log), then we treat it
|
||||
* something like ZFS_REDUNDANT_METADATA_MOST on a
|
||||
* regular dataset: this many copies, and one more for
|
||||
* "higher" indirect blocks. This specific exception is
|
||||
* necessary because dedup objects are stored in the
|
||||
* MOS, which always has the highest possible copies.
|
||||
*/
|
||||
dmu_object_type_t stype =
|
||||
dn ? dn->dn_storage_type : DMU_OT_NONE;
|
||||
if (stype == DMU_OT_NONE)
|
||||
stype = type;
|
||||
if (stype == DMU_OT_DDT_ZAP) {
|
||||
copies = dmu_ddt_copies;
|
||||
if (level >=
|
||||
zfs_redundant_metadata_most_ditto_level)
|
||||
copies++;
|
||||
}
|
||||
}
|
||||
} else if (wp & WP_NOFILL) {
|
||||
ASSERT(level == 0);
|
||||
|
||||
@ -2824,3 +2852,7 @@ ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW,
|
||||
/* CSTYLED */
|
||||
ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, UINT, ZMOD_RW,
|
||||
"Limit one prefetch call to this size");
|
||||
|
||||
/* CSTYLED */
|
||||
ZFS_MODULE_PARAM(zfs, , dmu_ddt_copies, UINT, ZMOD_RW,
|
||||
"Override copies= for dedup objects");
|
||||
|
@ -1391,7 +1391,7 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
||||
abd_t *dabd = abd_alloc_linear(
|
||||
drrw->drr_logical_size, B_FALSE);
|
||||
err = zio_decompress_data(drrw->drr_compressiontype,
|
||||
abd, abd_to_buf(dabd), abd_get_size(abd),
|
||||
abd, dabd, abd_get_size(abd),
|
||||
abd_get_size(dabd), NULL);
|
||||
|
||||
if (err != 0) {
|
||||
@ -1407,9 +1407,8 @@ do_corrective_recv(struct receive_writer_arg *rwa, struct drr_write *drrw,
|
||||
/* Recompress the data */
|
||||
abd_t *cabd = abd_alloc_linear(BP_GET_PSIZE(bp),
|
||||
B_FALSE);
|
||||
void *buf = abd_to_buf(cabd);
|
||||
uint64_t csize = zio_compress_data(BP_GET_COMPRESS(bp),
|
||||
abd, &buf, abd_get_size(abd),
|
||||
abd, &cabd, abd_get_size(abd),
|
||||
rwa->os->os_complevel);
|
||||
abd_zero_off(cabd, csize, BP_GET_PSIZE(bp) - csize);
|
||||
/* Swap in newly compressed data into the abd */
|
||||
@ -2221,7 +2220,7 @@ flush_write_batch_impl(struct receive_writer_arg *rwa)
|
||||
|
||||
err = zio_decompress_data(
|
||||
drrw->drr_compressiontype,
|
||||
abd, abd_to_buf(decomp_abd),
|
||||
abd, decomp_abd,
|
||||
abd_get_size(abd),
|
||||
abd_get_size(decomp_abd), NULL);
|
||||
|
||||
|
@ -2425,8 +2425,14 @@ get_receive_resume_token_impl(dsl_dataset_t *ds)
|
||||
fnvlist_free(token_nv);
|
||||
compressed = kmem_alloc(packed_size, KM_SLEEP);
|
||||
|
||||
compressed_size = gzip_compress(packed, compressed,
|
||||
/* Call compress function directly to avoid hole detection. */
|
||||
abd_t pabd, cabd;
|
||||
abd_get_from_buf_struct(&pabd, packed, packed_size);
|
||||
abd_get_from_buf_struct(&cabd, compressed, packed_size);
|
||||
compressed_size = zfs_gzip_compress(&pabd, &cabd,
|
||||
packed_size, packed_size, 6);
|
||||
abd_free(&cabd);
|
||||
abd_free(&pabd);
|
||||
|
||||
zio_cksum_t cksum;
|
||||
fletcher_4_native_varsize(compressed, compressed_size, &cksum);
|
||||
|
@ -630,6 +630,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
|
||||
ddt_walk_init(spa, scn->scn_phys.scn_max_txg);
|
||||
|
||||
spa_scan_stat_init(spa);
|
||||
vdev_scan_stat_init(spa->spa_root_vdev);
|
||||
|
||||
@ -951,6 +953,8 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
|
||||
|
||||
memcpy(&scn->scn_phys_cached, &scn->scn_phys, sizeof (scn->scn_phys));
|
||||
|
||||
ddt_walk_init(spa, scn->scn_phys.scn_max_txg);
|
||||
|
||||
dsl_scan_sync_state(scn, tx, SYNC_MANDATORY);
|
||||
|
||||
spa_history_log_internal(spa, "scan setup", tx,
|
||||
@ -1636,7 +1640,8 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
|
||||
txg_sync_waiting(scn->scn_dp) ||
|
||||
NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) ||
|
||||
spa_shutting_down(scn->scn_dp->dp_spa) ||
|
||||
(zfs_scan_strict_mem_lim && dsl_scan_should_clear(scn))) {
|
||||
(zfs_scan_strict_mem_lim && dsl_scan_should_clear(scn)) ||
|
||||
!ddt_walk_ready(scn->scn_dp->dp_spa)) {
|
||||
if (zb && zb->zb_level == ZB_ROOT_LEVEL) {
|
||||
dprintf("suspending at first available bookmark "
|
||||
"%llx/%llx/%llx/%llx\n",
|
||||
@ -2929,11 +2934,10 @@ enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
|
||||
|
||||
void
|
||||
dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
|
||||
ddt_entry_t *dde, dmu_tx_t *tx)
|
||||
ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx)
|
||||
{
|
||||
(void) tx;
|
||||
const ddt_key_t *ddk = &dde->dde_key;
|
||||
ddt_phys_t *ddp = dde->dde_phys;
|
||||
const ddt_key_t *ddk = &ddlwe->ddlwe_key;
|
||||
blkptr_t bp;
|
||||
zbookmark_phys_t zb = { 0 };
|
||||
|
||||
@ -2954,11 +2958,13 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
|
||||
if (scn->scn_done_txg != 0)
|
||||
return;
|
||||
|
||||
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
|
||||
if (ddp->ddp_phys_birth == 0 ||
|
||||
ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
|
||||
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
|
||||
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
|
||||
uint64_t phys_birth = ddt_phys_birth(&ddlwe->ddlwe_phys, v);
|
||||
|
||||
if (phys_birth == 0 || phys_birth > scn->scn_phys.scn_max_txg)
|
||||
continue;
|
||||
ddt_bp_create(checksum, ddk, ddp, &bp);
|
||||
ddt_bp_create(checksum, ddk, &ddlwe->ddlwe_phys, v, &bp);
|
||||
|
||||
scn->scn_visited_this_txg++;
|
||||
scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
|
||||
@ -3002,11 +3008,11 @@ static void
|
||||
dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
|
||||
{
|
||||
ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark;
|
||||
ddt_entry_t dde = {{{{0}}}};
|
||||
ddt_lightweight_entry_t ddlwe = {0};
|
||||
int error;
|
||||
uint64_t n = 0;
|
||||
|
||||
while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) {
|
||||
while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &ddlwe)) == 0) {
|
||||
ddt_t *ddt;
|
||||
|
||||
if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max)
|
||||
@ -3021,16 +3027,28 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
|
||||
ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum];
|
||||
ASSERT(avl_first(&ddt->ddt_tree) == NULL);
|
||||
|
||||
dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
|
||||
dsl_scan_ddt_entry(scn, ddb->ddb_checksum, ddt, &ddlwe, tx);
|
||||
n++;
|
||||
|
||||
if (dsl_scan_check_suspend(scn, NULL))
|
||||
break;
|
||||
}
|
||||
|
||||
zfs_dbgmsg("scanned %llu ddt entries on %s with class_max = %u; "
|
||||
"suspending=%u", (longlong_t)n, scn->scn_dp->dp_spa->spa_name,
|
||||
(int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending);
|
||||
if (error == EAGAIN) {
|
||||
dsl_scan_check_suspend(scn, NULL);
|
||||
error = 0;
|
||||
|
||||
zfs_dbgmsg("waiting for ddt to become ready for scan "
|
||||
"on %s with class_max = %u; suspending=%u",
|
||||
scn->scn_dp->dp_spa->spa_name,
|
||||
(int)scn->scn_phys.scn_ddt_class_max,
|
||||
(int)scn->scn_suspending);
|
||||
} else
|
||||
zfs_dbgmsg("scanned %llu ddt entries on %s with "
|
||||
"class_max = %u; suspending=%u", (longlong_t)n,
|
||||
scn->scn_dp->dp_spa->spa_name,
|
||||
(int)scn->scn_phys.scn_ddt_class_max,
|
||||
(int)scn->scn_suspending);
|
||||
|
||||
ASSERT(error == 0 || error == ENOENT);
|
||||
ASSERT(error != ENOENT ||
|
||||
|
@ -47,8 +47,9 @@ typedef uLongf zlen_t;
|
||||
|
||||
#endif
|
||||
|
||||
size_t
|
||||
gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
|
||||
static size_t
|
||||
zfs_gzip_compress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n)
|
||||
{
|
||||
int ret;
|
||||
zlen_t dstlen = d_len;
|
||||
@ -82,8 +83,9 @@ gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
|
||||
return ((size_t)dstlen);
|
||||
}
|
||||
|
||||
int
|
||||
gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
|
||||
static int
|
||||
zfs_gzip_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n)
|
||||
{
|
||||
(void) n;
|
||||
zlen_t dstlen = d_len;
|
||||
@ -103,3 +105,6 @@ gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
ZFS_COMPRESS_WRAP_DECL(zfs_gzip_compress)
|
||||
ZFS_DECOMPRESS_WRAP_DECL(zfs_gzip_decompress)
|
||||
|
@ -53,8 +53,8 @@ int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
|
||||
static void *lz4_alloc(int flags);
|
||||
static void lz4_free(void *ctx);
|
||||
|
||||
size_t
|
||||
lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
|
||||
static size_t
|
||||
zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n)
|
||||
{
|
||||
(void) n;
|
||||
@ -81,8 +81,8 @@ lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
|
||||
return (bufsiz + sizeof (bufsiz));
|
||||
}
|
||||
|
||||
int
|
||||
lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
|
||||
static int
|
||||
zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n)
|
||||
{
|
||||
(void) n;
|
||||
@ -101,6 +101,9 @@ lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
|
||||
d_start, bufsiz, d_len) < 0);
|
||||
}
|
||||
|
||||
ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
|
||||
ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
|
||||
|
||||
/*
|
||||
* LZ4 API Description:
|
||||
*
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user