mirror of
https://github.com/freebsd/freebsd-src.git
synced 2024-11-26 18:02:44 +00:00
zfs: merge openzfs/zfs@d0a91b9f8
Some checks are pending
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-14, /usr/lib/llvm-14/bin, ubuntu-22.04, bmake libarchive-dev clang-14 lld-14, amd64, amd64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-14, /usr/lib/llvm-14/bin, ubuntu-22.04, bmake libarchive-dev clang-14 lld-14, arm64, aarch64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-18, /opt/homebrew/opt/llvm@18/bin, macos-latest, bmake libarchive llvm@18, amd64, amd64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-18, /opt/homebrew/opt/llvm@18/bin, macos-latest, bmake libarchive llvm@18, arm64, aarch64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-18, /usr/lib/llvm-18/bin, ubuntu-24.04, bmake libarchive-dev clang-18 lld-18, amd64, amd64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-18, /usr/lib/llvm-18/bin, ubuntu-24.04, bmake libarchive-dev clang-18 lld-18, arm64, aarch64) (push) Waiting to run
Some checks are pending
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-14, /usr/lib/llvm-14/bin, ubuntu-22.04, bmake libarchive-dev clang-14 lld-14, amd64, amd64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-14, /usr/lib/llvm-14/bin, ubuntu-22.04, bmake libarchive-dev clang-14 lld-14, arm64, aarch64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-18, /opt/homebrew/opt/llvm@18/bin, macos-latest, bmake libarchive llvm@18, amd64, amd64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-18, /opt/homebrew/opt/llvm@18/bin, macos-latest, bmake libarchive llvm@18, arm64, aarch64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-18, /usr/lib/llvm-18/bin, ubuntu-24.04, bmake libarchive-dev clang-18 lld-18, amd64, amd64) (push) Waiting to run
Cross-build Kernel / ${{ matrix.target_arch }} ${{ matrix.os }} (${{ matrix.compiler }}) (clang-18, /usr/lib/llvm-18/bin, ubuntu-24.04, bmake libarchive-dev clang-18 lld-18, arm64, aarch64) (push) Waiting to run
Notable upstream pull request merges: #16643 -multiple Change rangelock handling in FreeBSD's zfs_getpages() #1669746c4f2ce0
dsl_dataset: put IO-inducing frees on the pool deadlist #16740 -multiple BRT: Rework structures and locks to be per-vdev #16743a60ed3822
L2ARC: Move different stats updates earlier #167588dc452d90
Fix some nits in zfs_getpages() #16759534688948
Remove hash_elements_max accounting from DBUF and ARC #167669a81484e3
ZAP: Reduce leaf array and free chunks fragmentation #16773457f8b76e
BRT: More optimizations after per-vdev splitting #167820ca82c568
L2ARC: Stop rebuild before setting spa_final_txg #16785d76d79fd2
zio: Avoid sleeping in the I/O path #16791ae1d11882
BRT: Clear bv_entcount_dirty on destroy #16796b3b0ce64d
FreeBSD: Lock vnode in zfs_ioctl() #16797d0a91b9f8
FreeBSD: Reduce copy_file_range() source lock to shared Obtained from: OpenZFS OpenZFS commit:d0a91b9f88
This commit is contained in:
commit
718519f4ef
@ -70,6 +70,7 @@ Rob Norris <robn@despairlabs.com>
|
||||
Rob Norris <rob.norris@klarasystems.com>
|
||||
Sam Lunt <samuel.j.lunt@gmail.com>
|
||||
Sanjeev Bagewadi <sanjeev.bagewadi@gmail.com>
|
||||
Sebastian Wuerl <s.wuerl@mailbox.org>
|
||||
Stoiko Ivanov <github@nomore.at>
|
||||
Tamas TEVESZ <ice@extreme.hu>
|
||||
WHR <msl0000023508@gmail.com>
|
||||
@ -78,6 +79,7 @@ Youzhong Yang <youzhong@gmail.com>
|
||||
|
||||
# Signed-off-by: overriding Author:
|
||||
Ryan <errornointernet@envs.net> <error.nointernet@gmail.com>
|
||||
Sietse <sietse@wizdom.nu> <uglymotha@wizdom.nu>
|
||||
Qiuhao Chen <chenqiuhao1997@gmail.com> <haohao0924@126.com>
|
||||
Yuxin Wang <yuxinwang9999@gmail.com> <Bi11gates9999@gmail.com>
|
||||
Zhenlei Huang <zlei@FreeBSD.org> <zlei.huang@gmail.com>
|
||||
|
@ -423,6 +423,7 @@ CONTRIBUTORS:
|
||||
Mathieu Velten <matmaul@gmail.com>
|
||||
Matt Fiddaman <github@m.fiddaman.uk>
|
||||
Matthew Ahrens <matt@delphix.com>
|
||||
Matthew Heller <matthew.f.heller@gmail.com>
|
||||
Matthew Thode <mthode@mthode.org>
|
||||
Matthias Blankertz <matthias@blankertz.org>
|
||||
Matt Johnston <matt@fugro-fsi.com.au>
|
||||
@ -562,6 +563,7 @@ CONTRIBUTORS:
|
||||
Scot W. Stevenson <scot.stevenson@gmail.com>
|
||||
Sean Eric Fagan <sef@ixsystems.com>
|
||||
Sebastian Gottschall <s.gottschall@dd-wrt.com>
|
||||
Sebastian Wuerl <s.wuerl@mailbox.org>
|
||||
Sebastien Roy <seb@delphix.com>
|
||||
Sen Haerens <sen@senhaerens.be>
|
||||
Serapheim Dimitropoulos <serapheim@delphix.com>
|
||||
@ -574,6 +576,7 @@ CONTRIBUTORS:
|
||||
Shawn Bayern <sbayern@law.fsu.edu>
|
||||
Shengqi Chen <harry-chen@outlook.com>
|
||||
Shen Yan <shenyanxxxy@qq.com>
|
||||
Sietse <sietse@wizdom.nu>
|
||||
Simon Guest <simon.guest@tesujimath.org>
|
||||
Simon Klinkert <simon.klinkert@gmail.com>
|
||||
Sowrabha Gopal <sowrabha.gopal@delphix.com>
|
||||
@ -629,6 +632,7 @@ CONTRIBUTORS:
|
||||
Trevor Bautista <trevrb@trevrb.net>
|
||||
Trey Dockendorf <treydock@gmail.com>
|
||||
Troels Nørgaard <tnn@tradeshift.com>
|
||||
tstabrawa <tstabrawa@users.noreply.github.com>
|
||||
Tulsi Jain <tulsi.jain@delphix.com>
|
||||
Turbo Fredriksson <turbo@bayour.com>
|
||||
Tyler J. Stachecki <stachecki.tyler@gmail.com>
|
||||
|
@ -6,5 +6,5 @@ Release: 1
|
||||
Release-Tags: relext
|
||||
License: CDDL
|
||||
Author: OpenZFS
|
||||
Linux-Maximum: 6.11
|
||||
Linux-Maximum: 6.12
|
||||
Linux-Minimum: 4.18
|
||||
|
@ -662,10 +662,7 @@ def section_arc(kstats_dict):
|
||||
print()
|
||||
|
||||
print('ARC hash breakdown:')
|
||||
prt_i1('Elements max:', f_hits(arc_stats['hash_elements_max']))
|
||||
prt_i2('Elements current:',
|
||||
f_perc(arc_stats['hash_elements'], arc_stats['hash_elements_max']),
|
||||
f_hits(arc_stats['hash_elements']))
|
||||
prt_i1('Elements:', f_hits(arc_stats['hash_elements']))
|
||||
prt_i1('Collisions:', f_hits(arc_stats['hash_collisions']))
|
||||
|
||||
prt_i1('Chain max:', f_hits(arc_stats['hash_chain_max']))
|
||||
|
@ -2119,9 +2119,6 @@ dump_brt(spa_t *spa)
|
||||
return;
|
||||
}
|
||||
|
||||
brt_t *brt = spa->spa_brt;
|
||||
VERIFY(brt);
|
||||
|
||||
char count[32], used[32], saved[32];
|
||||
zdb_nicebytes(brt_get_used(spa), used, sizeof (used));
|
||||
zdb_nicebytes(brt_get_saved(spa), saved, sizeof (saved));
|
||||
@ -2132,11 +2129,8 @@ dump_brt(spa_t *spa)
|
||||
if (dump_opt['T'] < 2)
|
||||
return;
|
||||
|
||||
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
|
||||
if (brtvd == NULL)
|
||||
continue;
|
||||
|
||||
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
|
||||
if (!brtvd->bv_initiated) {
|
||||
printf("BRT: vdev %" PRIu64 ": empty\n", vdevid);
|
||||
continue;
|
||||
@ -2160,20 +2154,21 @@ dump_brt(spa_t *spa)
|
||||
if (!do_histo)
|
||||
printf("\n%-16s %-10s\n", "DVA", "REFCNT");
|
||||
|
||||
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
|
||||
if (brtvd == NULL || !brtvd->bv_initiated)
|
||||
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
|
||||
if (!brtvd->bv_initiated)
|
||||
continue;
|
||||
|
||||
uint64_t counts[64] = {};
|
||||
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t *za = zap_attribute_alloc();
|
||||
for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries);
|
||||
for (zap_cursor_init(&zc, spa->spa_meta_objset,
|
||||
brtvd->bv_mos_entries);
|
||||
zap_cursor_retrieve(&zc, za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
uint64_t refcnt;
|
||||
VERIFY0(zap_lookup_uint64(brt->brt_mos,
|
||||
VERIFY0(zap_lookup_uint64(spa->spa_meta_objset,
|
||||
brtvd->bv_mos_entries,
|
||||
(const uint64_t *)za->za_name, 1,
|
||||
za->za_integer_length, za->za_num_integers,
|
||||
@ -8227,14 +8222,11 @@ dump_mos_leaks(spa_t *spa)
|
||||
}
|
||||
}
|
||||
|
||||
if (spa->spa_brt != NULL) {
|
||||
brt_t *brt = spa->spa_brt;
|
||||
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
|
||||
if (brtvd != NULL && brtvd->bv_initiated) {
|
||||
mos_obj_refd(brtvd->bv_mos_brtvdev);
|
||||
mos_obj_refd(brtvd->bv_mos_entries);
|
||||
}
|
||||
for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid];
|
||||
if (brtvd->bv_initiated) {
|
||||
mos_obj_refd(brtvd->bv_mos_brtvdev);
|
||||
mos_obj_refd(brtvd->bv_mos_entries);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -445,8 +445,8 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||
* its a loopback event from spa_async_remove(). Just
|
||||
* ignore it.
|
||||
*/
|
||||
if (vs->vs_state == VDEV_STATE_REMOVED &&
|
||||
state == VDEV_STATE_REMOVED)
|
||||
if ((vs->vs_state == VDEV_STATE_REMOVED && state ==
|
||||
VDEV_STATE_REMOVED) || vs->vs_state == VDEV_STATE_OFFLINE)
|
||||
return;
|
||||
|
||||
/* Remove the vdev since device is unplugged */
|
||||
|
@ -201,7 +201,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
|
||||
"failed (%lld " #OP " %lld) " STR "\n", \
|
||||
(long long)(_verify3_left), \
|
||||
(long long)(_verify3_right), \
|
||||
__VA_ARGS); \
|
||||
__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define VERIFY3UF(LEFT, OP, RIGHT, STR, ...) do { \
|
||||
@ -213,7 +213,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
|
||||
"failed (%llu " #OP " %llu) " STR "\n", \
|
||||
(unsigned long long)(_verify3_left), \
|
||||
(unsigned long long)(_verify3_right), \
|
||||
__VA_ARGS); \
|
||||
__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define VERIFY3PF(LEFT, OP, RIGHT, STR, ...) do { \
|
||||
|
@ -98,11 +98,9 @@ vn_flush_cached_data(vnode_t *vp, boolean_t sync)
|
||||
{
|
||||
if (vm_object_mightbedirty(vp->v_object)) {
|
||||
int flags = sync ? OBJPC_SYNC : 0;
|
||||
vn_lock(vp, LK_SHARED | LK_RETRY);
|
||||
zfs_vmobject_wlock(vp->v_object);
|
||||
vm_object_page_clean(vp->v_object, 0, 0, flags);
|
||||
zfs_vmobject_wunlock(vp->v_object);
|
||||
VOP_UNLOCK(vp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -205,7 +205,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
|
||||
"failed (%lld " #OP " %lld) " STR "\n", \
|
||||
(long long)(_verify3_left), \
|
||||
(long long)(_verify3_right), \
|
||||
__VA_ARGS); \
|
||||
__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define VERIFY3UF(LEFT, OP, RIGHT, STR, ...) do { \
|
||||
@ -217,7 +217,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
|
||||
"failed (%llu " #OP " %llu) " STR "\n", \
|
||||
(unsigned long long)(_verify3_left), \
|
||||
(unsigned long long)(_verify3_right), \
|
||||
__VA_ARGS); \
|
||||
__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define VERIFY3PF(LEFT, OP, RIGHT, STR, ...) do { \
|
||||
|
@ -347,6 +347,7 @@ void l2arc_fini(void);
|
||||
void l2arc_start(void);
|
||||
void l2arc_stop(void);
|
||||
void l2arc_spa_rebuild_start(spa_t *spa);
|
||||
void l2arc_spa_rebuild_stop(spa_t *spa);
|
||||
|
||||
#ifndef _KERNEL
|
||||
extern boolean_t arc_watch;
|
||||
|
@ -942,6 +942,7 @@ typedef struct arc_sums {
|
||||
wmsum_t arcstat_evict_l2_eligible_mru;
|
||||
wmsum_t arcstat_evict_l2_ineligible;
|
||||
wmsum_t arcstat_evict_l2_skip;
|
||||
wmsum_t arcstat_hash_elements;
|
||||
wmsum_t arcstat_hash_collisions;
|
||||
wmsum_t arcstat_hash_chains;
|
||||
aggsum_t arcstat_size;
|
||||
|
@ -86,28 +86,38 @@ typedef struct brt_vdev_phys {
|
||||
uint64_t bvp_savedspace;
|
||||
} brt_vdev_phys_t;
|
||||
|
||||
typedef struct brt_vdev {
|
||||
struct brt_vdev {
|
||||
/*
|
||||
* Pending changes from open contexts.
|
||||
*/
|
||||
kmutex_t bv_pending_lock;
|
||||
avl_tree_t bv_pending_tree[TXG_SIZE];
|
||||
/*
|
||||
* Protects bv_mos_*.
|
||||
*/
|
||||
krwlock_t bv_mos_entries_lock ____cacheline_aligned;
|
||||
/*
|
||||
* Protects all the fields starting from bv_initiated.
|
||||
*/
|
||||
krwlock_t bv_lock ____cacheline_aligned;
|
||||
/*
|
||||
* VDEV id.
|
||||
*/
|
||||
uint64_t bv_vdevid;
|
||||
/*
|
||||
* Is the structure initiated?
|
||||
* (bv_entcount and bv_bitmap are allocated?)
|
||||
*/
|
||||
boolean_t bv_initiated;
|
||||
uint64_t bv_vdevid ____cacheline_aligned;
|
||||
/*
|
||||
* Object number in the MOS for the entcount array and brt_vdev_phys.
|
||||
*/
|
||||
uint64_t bv_mos_brtvdev;
|
||||
/*
|
||||
* Object number in the MOS for the entries table.
|
||||
* Object number in the MOS and dnode for the entries table.
|
||||
*/
|
||||
uint64_t bv_mos_entries;
|
||||
dnode_t *bv_mos_entries_dnode;
|
||||
/*
|
||||
* Entries to sync.
|
||||
* Is the structure initiated?
|
||||
* (bv_entcount and bv_bitmap are allocated?)
|
||||
*/
|
||||
avl_tree_t bv_tree;
|
||||
boolean_t bv_initiated;
|
||||
/*
|
||||
* Does the bv_entcount[] array needs byte swapping?
|
||||
*/
|
||||
@ -120,6 +130,26 @@ typedef struct brt_vdev {
|
||||
* This is the array with BRT entry count per BRT_RANGESIZE.
|
||||
*/
|
||||
uint16_t *bv_entcount;
|
||||
/*
|
||||
* bv_entcount[] potentially can be a bit too big to sychronize it all
|
||||
* when we just changed few entcounts. The fields below allow us to
|
||||
* track updates to bv_entcount[] array since the last sync.
|
||||
* A single bit in the bv_bitmap represents as many entcounts as can
|
||||
* fit into a single BRT_BLOCKSIZE.
|
||||
* For example we have 65536 entcounts in the bv_entcount array
|
||||
* (so the whole array is 128kB). We updated bv_entcount[2] and
|
||||
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
|
||||
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
|
||||
*/
|
||||
ulong_t *bv_bitmap;
|
||||
/*
|
||||
* bv_entcount[] needs updating on disk.
|
||||
*/
|
||||
boolean_t bv_entcount_dirty;
|
||||
/*
|
||||
* brt_vdev_phys needs updating on disk.
|
||||
*/
|
||||
boolean_t bv_meta_dirty;
|
||||
/*
|
||||
* Sum of all bv_entcount[]s.
|
||||
*/
|
||||
@ -133,65 +163,27 @@ typedef struct brt_vdev {
|
||||
*/
|
||||
uint64_t bv_savedspace;
|
||||
/*
|
||||
* brt_vdev_phys needs updating on disk.
|
||||
* Entries to sync.
|
||||
*/
|
||||
boolean_t bv_meta_dirty;
|
||||
/*
|
||||
* bv_entcount[] needs updating on disk.
|
||||
*/
|
||||
boolean_t bv_entcount_dirty;
|
||||
/*
|
||||
* bv_entcount[] potentially can be a bit too big to sychronize it all
|
||||
* when we just changed few entcounts. The fields below allow us to
|
||||
* track updates to bv_entcount[] array since the last sync.
|
||||
* A single bit in the bv_bitmap represents as many entcounts as can
|
||||
* fit into a single BRT_BLOCKSIZE.
|
||||
* For example we have 65536 entcounts in the bv_entcount array
|
||||
* (so the whole array is 128kB). We updated bv_entcount[2] and
|
||||
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
|
||||
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
|
||||
*/
|
||||
ulong_t *bv_bitmap;
|
||||
uint64_t bv_nblocks;
|
||||
} brt_vdev_t;
|
||||
avl_tree_t bv_tree;
|
||||
};
|
||||
|
||||
/*
|
||||
* In-core brt
|
||||
*/
|
||||
typedef struct brt {
|
||||
krwlock_t brt_lock;
|
||||
spa_t *brt_spa;
|
||||
#define brt_mos brt_spa->spa_meta_objset
|
||||
uint64_t brt_rangesize;
|
||||
uint64_t brt_usedspace;
|
||||
uint64_t brt_savedspace;
|
||||
avl_tree_t brt_pending_tree[TXG_SIZE];
|
||||
kmutex_t brt_pending_lock[TXG_SIZE];
|
||||
/* Sum of all entries across all bv_trees. */
|
||||
uint64_t brt_nentries;
|
||||
brt_vdev_t *brt_vdevs;
|
||||
uint64_t brt_nvdevs;
|
||||
} brt_t;
|
||||
|
||||
/* Size of bre_offset / sizeof (uint64_t). */
|
||||
/* Size of offset / sizeof (uint64_t). */
|
||||
#define BRT_KEY_WORDS (1)
|
||||
|
||||
#define BRE_OFFSET(bre) (DVA_GET_OFFSET(&(bre)->bre_bp.blk_dva[0]))
|
||||
|
||||
/*
|
||||
* In-core brt entry.
|
||||
* On-disk we use bre_offset as the key and bre_refcount as the value.
|
||||
* On-disk we use ZAP with offset as the key and count as the value.
|
||||
*/
|
||||
typedef struct brt_entry {
|
||||
uint64_t bre_offset;
|
||||
uint64_t bre_refcount;
|
||||
avl_node_t bre_node;
|
||||
blkptr_t bre_bp;
|
||||
uint64_t bre_count;
|
||||
uint64_t bre_pcount;
|
||||
} brt_entry_t;
|
||||
|
||||
typedef struct brt_pending_entry {
|
||||
blkptr_t bpe_bp;
|
||||
int bpe_count;
|
||||
avl_node_t bpe_node;
|
||||
} brt_pending_entry_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -53,6 +53,7 @@ extern "C" {
|
||||
/*
|
||||
* Forward references that lots of things need.
|
||||
*/
|
||||
typedef struct brt_vdev brt_vdev_t;
|
||||
typedef struct spa spa_t;
|
||||
typedef struct vdev vdev_t;
|
||||
typedef struct metaslab metaslab_t;
|
||||
|
@ -412,8 +412,12 @@ struct spa {
|
||||
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
|
||||
uint64_t spa_dedup_checksum; /* default dedup checksum */
|
||||
uint64_t spa_dspace; /* dspace in normal class */
|
||||
uint64_t spa_rdspace; /* raw (non-dedup) --//-- */
|
||||
boolean_t spa_active_ddt_prune; /* ddt prune process active */
|
||||
struct brt *spa_brt; /* in-core BRT */
|
||||
brt_vdev_t **spa_brt_vdevs; /* array of per-vdev BRTs */
|
||||
uint64_t spa_brt_nvdevs; /* number of vdevs in BRT */
|
||||
uint64_t spa_brt_rangesize; /* pool's BRT range size */
|
||||
krwlock_t spa_brt_lock; /* Protects brt_vdevs/nvdevs */
|
||||
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
|
||||
kmutex_t spa_proc_lock; /* protects spa_proc* */
|
||||
kcondvar_t spa_proc_cv; /* spa_proc_state transitions */
|
||||
|
@ -223,11 +223,15 @@ int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
boolean_t *normalization_conflictp);
|
||||
int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
|
||||
int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
|
||||
int zap_prefetch_object(objset_t *os, uint64_t zapobj);
|
||||
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints);
|
||||
int zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints);
|
||||
|
||||
int zap_lookup_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
@ -236,9 +240,6 @@ int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp);
|
||||
|
||||
int zap_count_write_by_dnode(dnode_t *dn, const char *name,
|
||||
int add, zfs_refcount_t *towrite, zfs_refcount_t *tooverwrite);
|
||||
|
||||
/*
|
||||
* Create an attribute with the given name and value.
|
||||
*
|
||||
|
@ -109,7 +109,7 @@ Stops and cancels an in-progress removal of a top-level vdev.
|
||||
.El
|
||||
.
|
||||
.Sh EXAMPLES
|
||||
.\" These are, respectively, examples 14 from zpool.8
|
||||
.\" These are, respectively, examples 15 from zpool.8
|
||||
.\" Make sure to update them bidirectionally
|
||||
.Ss Example 1 : No Removing a Mirrored top-level (Log or Data) Device
|
||||
The following commands remove the mirrored log device
|
||||
@ -142,9 +142,43 @@ The command to remove the mirrored log
|
||||
.Ar mirror-2 No is :
|
||||
.Dl # Nm zpool Cm remove Ar tank mirror-2
|
||||
.Pp
|
||||
At this point, the log device no longer exists
|
||||
(both sides of the mirror have been removed):
|
||||
.Bd -literal -compact -offset Ds
|
||||
pool: tank
|
||||
state: ONLINE
|
||||
scan: none requested
|
||||
config:
|
||||
|
||||
NAME STATE READ WRITE CKSUM
|
||||
tank ONLINE 0 0 0
|
||||
mirror-0 ONLINE 0 0 0
|
||||
sda ONLINE 0 0 0
|
||||
sdb ONLINE 0 0 0
|
||||
mirror-1 ONLINE 0 0 0
|
||||
sdc ONLINE 0 0 0
|
||||
sdd ONLINE 0 0 0
|
||||
.Ed
|
||||
.Pp
|
||||
The command to remove the mirrored data
|
||||
.Ar mirror-1 No is :
|
||||
.Dl # Nm zpool Cm remove Ar tank mirror-1
|
||||
.Pp
|
||||
After
|
||||
.Ar mirror-1 No has been evacuated, the pool remains redundant, but
|
||||
the total amount of space is reduced:
|
||||
.Bd -literal -compact -offset Ds
|
||||
pool: tank
|
||||
state: ONLINE
|
||||
scan: none requested
|
||||
config:
|
||||
|
||||
NAME STATE READ WRITE CKSUM
|
||||
tank ONLINE 0 0 0
|
||||
mirror-0 ONLINE 0 0 0
|
||||
sda ONLINE 0 0 0
|
||||
sdb ONLINE 0 0 0
|
||||
.Ed
|
||||
.
|
||||
.Sh SEE ALSO
|
||||
.Xr zpool-add 8 ,
|
||||
|
@ -405,9 +405,43 @@ The command to remove the mirrored log
|
||||
.Ar mirror-2 No is :
|
||||
.Dl # Nm zpool Cm remove Ar tank mirror-2
|
||||
.Pp
|
||||
At this point, the log device no longer exists
|
||||
(both sides of the mirror have been removed):
|
||||
.Bd -literal -compact -offset Ds
|
||||
pool: tank
|
||||
state: ONLINE
|
||||
scan: none requested
|
||||
config:
|
||||
|
||||
NAME STATE READ WRITE CKSUM
|
||||
tank ONLINE 0 0 0
|
||||
mirror-0 ONLINE 0 0 0
|
||||
sda ONLINE 0 0 0
|
||||
sdb ONLINE 0 0 0
|
||||
mirror-1 ONLINE 0 0 0
|
||||
sdc ONLINE 0 0 0
|
||||
sdd ONLINE 0 0 0
|
||||
.Ed
|
||||
.Pp
|
||||
The command to remove the mirrored data
|
||||
.Ar mirror-1 No is :
|
||||
.Dl # Nm zpool Cm remove Ar tank mirror-1
|
||||
.Pp
|
||||
After
|
||||
.Ar mirror-1 No has been evacuated, the pool remains redundant, but
|
||||
the total amount of space is reduced:
|
||||
.Bd -literal -compact -offset Ds
|
||||
pool: tank
|
||||
state: ONLINE
|
||||
scan: none requested
|
||||
config:
|
||||
|
||||
NAME STATE READ WRITE CKSUM
|
||||
tank ONLINE 0 0 0
|
||||
mirror-0 ONLINE 0 0 0
|
||||
sda ONLINE 0 0 0
|
||||
sdb ONLINE 0 0 0
|
||||
.Ed
|
||||
.
|
||||
.Ss Example 16 : No Displaying expanded space on a device
|
||||
The following command displays the detailed information for the pool
|
||||
|
@ -291,8 +291,12 @@ zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
|
||||
case F_SEEK_HOLE:
|
||||
{
|
||||
off = *(offset_t *)data;
|
||||
error = vn_lock(vp, LK_SHARED);
|
||||
if (error)
|
||||
return (error);
|
||||
/* offset parameter is in/out */
|
||||
error = zfs_holey(VTOZ(vp), com, &off);
|
||||
VOP_UNLOCK(vp);
|
||||
if (error)
|
||||
return (error);
|
||||
*(offset_t *)data = off;
|
||||
@ -452,8 +456,10 @@ mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
|
||||
if (!vm_page_wired(pp) && pp->valid == 0 &&
|
||||
vm_page_busy_tryupgrade(pp))
|
||||
vm_page_free(pp);
|
||||
else
|
||||
else {
|
||||
vm_page_deactivate_noreuse(pp);
|
||||
vm_page_sunbusy(pp);
|
||||
}
|
||||
zfs_vmobject_wunlock(obj);
|
||||
}
|
||||
} else {
|
||||
@ -3928,6 +3934,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
|
||||
if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
|
||||
return (zfs_vm_pagerret_error);
|
||||
|
||||
object = ma[0]->object;
|
||||
start = IDX_TO_OFF(ma[0]->pindex);
|
||||
end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
|
||||
|
||||
@ -3936,33 +3943,47 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
|
||||
* Note that we need to handle the case of the block size growing.
|
||||
*/
|
||||
for (;;) {
|
||||
uint64_t len;
|
||||
|
||||
blksz = zp->z_blksz;
|
||||
len = roundup(end, blksz) - rounddown(start, blksz);
|
||||
|
||||
lr = zfs_rangelock_tryenter(&zp->z_rangelock,
|
||||
rounddown(start, blksz),
|
||||
roundup(end, blksz) - rounddown(start, blksz), RL_READER);
|
||||
rounddown(start, blksz), len, RL_READER);
|
||||
if (lr == NULL) {
|
||||
if (rahead != NULL) {
|
||||
*rahead = 0;
|
||||
rahead = NULL;
|
||||
/*
|
||||
* Avoid a deadlock with update_pages(). We need to
|
||||
* hold the range lock when copying from the DMU, so
|
||||
* give up the busy lock to allow update_pages() to
|
||||
* proceed. We might need to allocate new pages, which
|
||||
* isn't quite right since this allocation isn't subject
|
||||
* to the page fault handler's OOM logic, but this is
|
||||
* the best we can do for now.
|
||||
*/
|
||||
for (int i = 0; i < count; i++) {
|
||||
ASSERT(vm_page_none_valid(ma[i]));
|
||||
vm_page_xunbusy(ma[i]);
|
||||
}
|
||||
if (rbehind != NULL) {
|
||||
*rbehind = 0;
|
||||
rbehind = NULL;
|
||||
}
|
||||
break;
|
||||
|
||||
lr = zfs_rangelock_enter(&zp->z_rangelock,
|
||||
rounddown(start, blksz), len, RL_READER);
|
||||
|
||||
zfs_vmobject_wlock(object);
|
||||
(void) vm_page_grab_pages(object, OFF_TO_IDX(start),
|
||||
VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO,
|
||||
ma, count);
|
||||
zfs_vmobject_wunlock(object);
|
||||
}
|
||||
if (blksz == zp->z_blksz)
|
||||
break;
|
||||
zfs_rangelock_exit(lr);
|
||||
}
|
||||
|
||||
object = ma[0]->object;
|
||||
zfs_vmobject_wlock(object);
|
||||
obj_size = object->un_pager.vnp.vnp_size;
|
||||
zfs_vmobject_wunlock(object);
|
||||
if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
|
||||
if (lr != NULL)
|
||||
zfs_rangelock_exit(lr);
|
||||
zfs_rangelock_exit(lr);
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
return (zfs_vm_pagerret_bad);
|
||||
}
|
||||
@ -3987,11 +4008,33 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
|
||||
* ZFS will panic if we request DMU to read beyond the end of the last
|
||||
* allocated block.
|
||||
*/
|
||||
error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
|
||||
&pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
|
||||
for (int i = 0; i < count; i++) {
|
||||
int dummypgsin, count1, j, last_size;
|
||||
|
||||
if (lr != NULL)
|
||||
zfs_rangelock_exit(lr);
|
||||
if (vm_page_any_valid(ma[i])) {
|
||||
ASSERT(vm_page_all_valid(ma[i]));
|
||||
continue;
|
||||
}
|
||||
for (j = i + 1; j < count; j++) {
|
||||
if (vm_page_any_valid(ma[j])) {
|
||||
ASSERT(vm_page_all_valid(ma[j]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
count1 = j - i;
|
||||
dummypgsin = 0;
|
||||
last_size = j == count ?
|
||||
MIN(end, obj_size) - (end - PAGE_SIZE) : PAGE_SIZE;
|
||||
error = dmu_read_pages(zfsvfs->z_os, zp->z_id, &ma[i], count1,
|
||||
i == 0 ? &pgsin_b : &dummypgsin,
|
||||
j == count ? &pgsin_a : &dummypgsin,
|
||||
last_size);
|
||||
if (error != 0)
|
||||
break;
|
||||
i += count1 - 1;
|
||||
}
|
||||
|
||||
zfs_rangelock_exit(lr);
|
||||
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
|
||||
|
||||
dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
|
||||
@ -6159,7 +6202,7 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
||||
} else {
|
||||
#if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
|
||||
__FreeBSD_version >= 1400086
|
||||
vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
|
||||
vn_lock_pair(invp, false, LK_SHARED, outvp, false,
|
||||
LK_EXCLUSIVE);
|
||||
#else
|
||||
vn_lock_pair(invp, false, outvp, false);
|
||||
|
@ -375,7 +375,18 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
|
||||
struct super_block *sb = (struct super_block *)arg;
|
||||
int objects = 0;
|
||||
|
||||
(void) -zfs_prune(sb, nr_to_scan, &objects);
|
||||
/*
|
||||
* deactivate_locked_super calls shrinker_free and only then
|
||||
* sops->kill_sb cb, resulting in UAF on umount when trying to reach
|
||||
* for the shrinker functions in zpl_prune_sb of in-umount dataset.
|
||||
* Increment if s_active is not zero, but don't prune if it is -
|
||||
* umount could be underway.
|
||||
*/
|
||||
if (atomic_inc_not_zero(&sb->s_active)) {
|
||||
(void) -zfs_prune(sb, nr_to_scan, &objects);
|
||||
atomic_dec(&sb->s_active);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const struct super_operations zpl_super_operations = {
|
||||
|
@ -1176,7 +1176,7 @@ zvol_queue_limits_init(zvol_queue_limits_t *limits, zvol_state_t *zv,
|
||||
limits->zql_max_segment_size = UINT_MAX;
|
||||
}
|
||||
|
||||
limits->zql_io_opt = zv->zv_volblocksize;
|
||||
limits->zql_io_opt = DMU_MAX_ACCESS / 2;
|
||||
|
||||
limits->zql_physical_block_size = zv->zv_volblocksize;
|
||||
limits->zql_max_discard_sectors =
|
||||
|
@ -1074,12 +1074,9 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
|
||||
ARCSTAT_BUMP(arcstat_hash_collisions);
|
||||
if (i == 1)
|
||||
ARCSTAT_BUMP(arcstat_hash_chains);
|
||||
|
||||
ARCSTAT_MAX(arcstat_hash_chain_max, i);
|
||||
}
|
||||
uint64_t he = atomic_inc_64_nv(
|
||||
&arc_stats.arcstat_hash_elements.value.ui64);
|
||||
ARCSTAT_MAX(arcstat_hash_elements_max, he);
|
||||
ARCSTAT_BUMP(arcstat_hash_elements);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
@ -1103,8 +1100,7 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
|
||||
arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
|
||||
|
||||
/* collect some hash table performance data */
|
||||
atomic_dec_64(&arc_stats.arcstat_hash_elements.value.ui64);
|
||||
|
||||
ARCSTAT_BUMPDOWN(arcstat_hash_elements);
|
||||
if (buf_hash_table.ht_table[idx] &&
|
||||
buf_hash_table.ht_table[idx]->b_hash_next == NULL)
|
||||
ARCSTAT_BUMPDOWN(arcstat_hash_chains);
|
||||
@ -7008,6 +7004,9 @@ arc_kstat_update(kstat_t *ksp, int rw)
|
||||
wmsum_value(&arc_sums.arcstat_evict_l2_ineligible);
|
||||
as->arcstat_evict_l2_skip.value.ui64 =
|
||||
wmsum_value(&arc_sums.arcstat_evict_l2_skip);
|
||||
as->arcstat_hash_elements.value.ui64 =
|
||||
as->arcstat_hash_elements_max.value.ui64 =
|
||||
wmsum_value(&arc_sums.arcstat_hash_elements);
|
||||
as->arcstat_hash_collisions.value.ui64 =
|
||||
wmsum_value(&arc_sums.arcstat_hash_collisions);
|
||||
as->arcstat_hash_chains.value.ui64 =
|
||||
@ -7432,6 +7431,7 @@ arc_state_init(void)
|
||||
wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mru, 0);
|
||||
wmsum_init(&arc_sums.arcstat_evict_l2_ineligible, 0);
|
||||
wmsum_init(&arc_sums.arcstat_evict_l2_skip, 0);
|
||||
wmsum_init(&arc_sums.arcstat_hash_elements, 0);
|
||||
wmsum_init(&arc_sums.arcstat_hash_collisions, 0);
|
||||
wmsum_init(&arc_sums.arcstat_hash_chains, 0);
|
||||
aggsum_init(&arc_sums.arcstat_size, 0);
|
||||
@ -7590,6 +7590,7 @@ arc_state_fini(void)
|
||||
wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mru);
|
||||
wmsum_fini(&arc_sums.arcstat_evict_l2_ineligible);
|
||||
wmsum_fini(&arc_sums.arcstat_evict_l2_skip);
|
||||
wmsum_fini(&arc_sums.arcstat_hash_elements);
|
||||
wmsum_fini(&arc_sums.arcstat_hash_collisions);
|
||||
wmsum_fini(&arc_sums.arcstat_hash_chains);
|
||||
aggsum_fini(&arc_sums.arcstat_size);
|
||||
@ -9287,6 +9288,14 @@ skip:
|
||||
hdr->b_l2hdr.b_hits = 0;
|
||||
hdr->b_l2hdr.b_arcs_state =
|
||||
hdr->b_l1hdr.b_state->arcs_state;
|
||||
arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR |
|
||||
ARC_FLAG_L2_WRITING);
|
||||
|
||||
(void) zfs_refcount_add_many(&dev->l2ad_alloc,
|
||||
arc_hdr_size(hdr), hdr);
|
||||
l2arc_hdr_arcstats_increment(hdr);
|
||||
vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
|
||||
|
||||
mutex_enter(&dev->l2ad_mtx);
|
||||
if (pio == NULL) {
|
||||
/*
|
||||
@ -9298,12 +9307,6 @@ skip:
|
||||
}
|
||||
list_insert_head(&dev->l2ad_buflist, hdr);
|
||||
mutex_exit(&dev->l2ad_mtx);
|
||||
arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR |
|
||||
ARC_FLAG_L2_WRITING);
|
||||
|
||||
(void) zfs_refcount_add_many(&dev->l2ad_alloc,
|
||||
arc_hdr_size(hdr), hdr);
|
||||
l2arc_hdr_arcstats_increment(hdr);
|
||||
|
||||
boolean_t commit = l2arc_log_blk_insert(dev, hdr);
|
||||
mutex_exit(hash_lock);
|
||||
@ -9333,7 +9336,6 @@ skip:
|
||||
write_psize += psize;
|
||||
write_asize += asize;
|
||||
dev->l2ad_hand += asize;
|
||||
vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
|
||||
|
||||
if (commit) {
|
||||
/* l2ad_hand will be adjusted inside. */
|
||||
@ -9844,6 +9846,37 @@ l2arc_spa_rebuild_start(spa_t *spa)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
l2arc_spa_rebuild_stop(spa_t *spa)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
|
||||
spa->spa_export_thread == curthread);
|
||||
|
||||
for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
|
||||
l2arc_dev_t *dev =
|
||||
l2arc_vdev_get(spa->spa_l2cache.sav_vdevs[i]);
|
||||
if (dev == NULL)
|
||||
continue;
|
||||
mutex_enter(&l2arc_rebuild_thr_lock);
|
||||
dev->l2ad_rebuild_cancel = B_TRUE;
|
||||
mutex_exit(&l2arc_rebuild_thr_lock);
|
||||
}
|
||||
for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
|
||||
l2arc_dev_t *dev =
|
||||
l2arc_vdev_get(spa->spa_l2cache.sav_vdevs[i]);
|
||||
if (dev == NULL)
|
||||
continue;
|
||||
mutex_enter(&l2arc_rebuild_thr_lock);
|
||||
if (dev->l2ad_rebuild_began == B_TRUE) {
|
||||
while (dev->l2ad_rebuild == B_TRUE) {
|
||||
cv_wait(&l2arc_rebuild_thr_cv,
|
||||
&l2arc_rebuild_thr_lock);
|
||||
}
|
||||
}
|
||||
mutex_exit(&l2arc_rebuild_thr_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Main entry point for L2ARC rebuilding.
|
||||
*/
|
||||
@ -9852,12 +9885,12 @@ l2arc_dev_rebuild_thread(void *arg)
|
||||
{
|
||||
l2arc_dev_t *dev = arg;
|
||||
|
||||
VERIFY(!dev->l2ad_rebuild_cancel);
|
||||
VERIFY(dev->l2ad_rebuild);
|
||||
(void) l2arc_rebuild(dev);
|
||||
mutex_enter(&l2arc_rebuild_thr_lock);
|
||||
dev->l2ad_rebuild_began = B_FALSE;
|
||||
dev->l2ad_rebuild = B_FALSE;
|
||||
cv_signal(&l2arc_rebuild_thr_cv);
|
||||
mutex_exit(&l2arc_rebuild_thr_lock);
|
||||
|
||||
thread_exit();
|
||||
@ -10008,8 +10041,6 @@ l2arc_rebuild(l2arc_dev_t *dev)
|
||||
for (;;) {
|
||||
mutex_enter(&l2arc_rebuild_thr_lock);
|
||||
if (dev->l2ad_rebuild_cancel) {
|
||||
dev->l2ad_rebuild = B_FALSE;
|
||||
cv_signal(&l2arc_rebuild_thr_cv);
|
||||
mutex_exit(&l2arc_rebuild_thr_lock);
|
||||
err = SET_ERROR(ECANCELED);
|
||||
goto out;
|
||||
@ -10585,6 +10616,8 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
|
||||
(void) zio_nowait(wzio);
|
||||
|
||||
dev->l2ad_hand += asize;
|
||||
vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
|
||||
|
||||
/*
|
||||
* Include the committed log block's pointer in the list of pointers
|
||||
* to log blocks present in the L2ARC device.
|
||||
@ -10598,7 +10631,6 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
|
||||
zfs_refcount_add_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf);
|
||||
zfs_refcount_add(&dev->l2ad_lb_count, lb_ptr_buf);
|
||||
mutex_exit(&dev->l2ad_mtx);
|
||||
vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
|
||||
|
||||
/* bump the kstats */
|
||||
ARCSTAT_INCR(arcstat_l2_write_bytes, asize);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -89,7 +89,6 @@ typedef struct dbuf_stats {
|
||||
kstat_named_t hash_misses;
|
||||
kstat_named_t hash_collisions;
|
||||
kstat_named_t hash_elements;
|
||||
kstat_named_t hash_elements_max;
|
||||
/*
|
||||
* Number of sublists containing more than one dbuf in the dbuf
|
||||
* hash table. Keep track of the longest hash chain.
|
||||
@ -134,7 +133,6 @@ dbuf_stats_t dbuf_stats = {
|
||||
{ "hash_misses", KSTAT_DATA_UINT64 },
|
||||
{ "hash_collisions", KSTAT_DATA_UINT64 },
|
||||
{ "hash_elements", KSTAT_DATA_UINT64 },
|
||||
{ "hash_elements_max", KSTAT_DATA_UINT64 },
|
||||
{ "hash_chains", KSTAT_DATA_UINT64 },
|
||||
{ "hash_chain_max", KSTAT_DATA_UINT64 },
|
||||
{ "hash_insert_race", KSTAT_DATA_UINT64 },
|
||||
@ -154,6 +152,7 @@ struct {
|
||||
wmsum_t hash_hits;
|
||||
wmsum_t hash_misses;
|
||||
wmsum_t hash_collisions;
|
||||
wmsum_t hash_elements;
|
||||
wmsum_t hash_chains;
|
||||
wmsum_t hash_insert_race;
|
||||
wmsum_t metadata_cache_count;
|
||||
@ -432,8 +431,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
|
||||
db->db_hash_next = h->hash_table[idx];
|
||||
h->hash_table[idx] = db;
|
||||
mutex_exit(DBUF_HASH_MUTEX(h, idx));
|
||||
uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
|
||||
DBUF_STAT_MAX(hash_elements_max, he);
|
||||
DBUF_STAT_BUMP(hash_elements);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
@ -506,7 +504,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
|
||||
h->hash_table[idx]->db_hash_next == NULL)
|
||||
DBUF_STAT_BUMPDOWN(hash_chains);
|
||||
mutex_exit(DBUF_HASH_MUTEX(h, idx));
|
||||
atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
|
||||
DBUF_STAT_BUMPDOWN(hash_elements);
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
@ -903,6 +901,8 @@ dbuf_kstat_update(kstat_t *ksp, int rw)
|
||||
wmsum_value(&dbuf_sums.hash_misses);
|
||||
ds->hash_collisions.value.ui64 =
|
||||
wmsum_value(&dbuf_sums.hash_collisions);
|
||||
ds->hash_elements.value.ui64 =
|
||||
wmsum_value(&dbuf_sums.hash_elements);
|
||||
ds->hash_chains.value.ui64 =
|
||||
wmsum_value(&dbuf_sums.hash_chains);
|
||||
ds->hash_insert_race.value.ui64 =
|
||||
@ -1004,6 +1004,7 @@ dbuf_init(void)
|
||||
wmsum_init(&dbuf_sums.hash_hits, 0);
|
||||
wmsum_init(&dbuf_sums.hash_misses, 0);
|
||||
wmsum_init(&dbuf_sums.hash_collisions, 0);
|
||||
wmsum_init(&dbuf_sums.hash_elements, 0);
|
||||
wmsum_init(&dbuf_sums.hash_chains, 0);
|
||||
wmsum_init(&dbuf_sums.hash_insert_race, 0);
|
||||
wmsum_init(&dbuf_sums.metadata_cache_count, 0);
|
||||
@ -1077,6 +1078,7 @@ dbuf_fini(void)
|
||||
wmsum_fini(&dbuf_sums.hash_hits);
|
||||
wmsum_fini(&dbuf_sums.hash_misses);
|
||||
wmsum_fini(&dbuf_sums.hash_collisions);
|
||||
wmsum_fini(&dbuf_sums.hash_elements);
|
||||
wmsum_fini(&dbuf_sums.hash_chains);
|
||||
wmsum_fini(&dbuf_sums.hash_insert_race);
|
||||
wmsum_fini(&dbuf_sums.metadata_cache_count);
|
||||
@ -2578,8 +2580,11 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
* We are freeing a block that we cloned in the same
|
||||
* transaction group.
|
||||
*/
|
||||
brt_pending_remove(dmu_objset_spa(db->db_objset),
|
||||
&dr->dt.dl.dr_overridden_by, tx);
|
||||
blkptr_t *bp = &dr->dt.dl.dr_overridden_by;
|
||||
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
||||
brt_pending_remove(dmu_objset_spa(db->db_objset),
|
||||
bp, tx);
|
||||
}
|
||||
}
|
||||
|
||||
dnode_t *dn = dr->dr_dnode;
|
||||
|
@ -68,6 +68,7 @@
|
||||
#include <sys/zio_compress.h>
|
||||
#include <zfs_fletcher.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/brt.h>
|
||||
|
||||
/*
|
||||
* The SPA supports block sizes up to 16MB. However, very large blocks
|
||||
@ -289,8 +290,26 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
|
||||
if (BP_GET_LOGICAL_BIRTH(bp) > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
|
||||
int64_t delta;
|
||||
|
||||
dprintf_bp(bp, "freeing ds=%llu", (u_longlong_t)ds->ds_object);
|
||||
dsl_free(tx->tx_pool, tx->tx_txg, bp);
|
||||
/*
|
||||
* Put blocks that would create IO on the pool's deadlist for
|
||||
* dsl_process_async_destroys() to find. This is to prevent
|
||||
* zio_free() from creating a ZIO_TYPE_FREE IO for them, which
|
||||
* are very heavy and can lead to out-of-memory conditions if
|
||||
* something tries to free millions of blocks on the same txg.
|
||||
*/
|
||||
boolean_t defer = spa_version(spa) >= SPA_VERSION_DEADLISTS &&
|
||||
(BP_IS_GANG(bp) || BP_GET_DEDUP(bp) ||
|
||||
brt_maybe_exists(spa, bp));
|
||||
|
||||
if (defer) {
|
||||
dprintf_bp(bp, "putting on free list: %s", "");
|
||||
bpobj_enqueue(&ds->ds_dir->dd_pool->dp_free_bpobj,
|
||||
bp, B_FALSE, tx);
|
||||
} else {
|
||||
dprintf_bp(bp, "freeing ds=%llu",
|
||||
(u_longlong_t)ds->ds_object);
|
||||
dsl_free(tx->tx_pool, tx->tx_txg, bp);
|
||||
}
|
||||
|
||||
mutex_enter(&ds->ds_lock);
|
||||
ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used ||
|
||||
@ -298,9 +317,14 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
|
||||
delta = parent_delta(ds, -used);
|
||||
dsl_dataset_phys(ds)->ds_unique_bytes -= used;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
|
||||
dsl_dir_diduse_transfer_space(ds->ds_dir,
|
||||
delta, -compressed, -uncompressed, -used,
|
||||
DD_USED_REFRSRV, DD_USED_HEAD, tx);
|
||||
|
||||
if (defer)
|
||||
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
|
||||
DD_USED_HEAD, used, compressed, uncompressed, tx);
|
||||
} else {
|
||||
dprintf_bp(bp, "putting on dead list: %s", "");
|
||||
if (async) {
|
||||
|
@ -2081,6 +2081,7 @@ spa_unload(spa_t *spa)
|
||||
vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
|
||||
vdev_autotrim_stop_all(spa);
|
||||
vdev_rebuild_stop_all(spa);
|
||||
l2arc_spa_rebuild_stop(spa);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7115,6 +7116,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
|
||||
vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE);
|
||||
vdev_autotrim_stop_all(spa);
|
||||
vdev_rebuild_stop_all(spa);
|
||||
l2arc_spa_rebuild_stop(spa);
|
||||
|
||||
/*
|
||||
* We want this to be reflected on every label,
|
||||
|
@ -1870,13 +1870,7 @@ spa_get_slop_space(spa_t *spa)
|
||||
if (spa->spa_dedup_dspace == ~0ULL)
|
||||
spa_update_dspace(spa);
|
||||
|
||||
/*
|
||||
* spa_get_dspace() includes the space only logically "used" by
|
||||
* deduplicated data, so since it's not useful to reserve more
|
||||
* space with more deduplicated data, we subtract that out here.
|
||||
*/
|
||||
space =
|
||||
spa_get_dspace(spa) - spa->spa_dedup_dspace - brt_get_dspace(spa);
|
||||
space = spa->spa_rdspace;
|
||||
slop = MIN(space >> spa_slop_shift, spa_max_slop);
|
||||
|
||||
/*
|
||||
@ -1912,8 +1906,7 @@ spa_get_checkpoint_space(spa_t *spa)
|
||||
void
|
||||
spa_update_dspace(spa_t *spa)
|
||||
{
|
||||
spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
|
||||
ddt_get_dedup_dspace(spa) + brt_get_dspace(spa);
|
||||
spa->spa_rdspace = metaslab_class_get_dspace(spa_normal_class(spa));
|
||||
if (spa->spa_nonallocating_dspace > 0) {
|
||||
/*
|
||||
* Subtract the space provided by all non-allocating vdevs that
|
||||
@ -1933,9 +1926,11 @@ spa_update_dspace(spa_t *spa)
|
||||
* doesn't matter that the data we are moving may be
|
||||
* allocated twice (on the old device and the new device).
|
||||
*/
|
||||
ASSERT3U(spa->spa_dspace, >=, spa->spa_nonallocating_dspace);
|
||||
spa->spa_dspace -= spa->spa_nonallocating_dspace;
|
||||
ASSERT3U(spa->spa_rdspace, >=, spa->spa_nonallocating_dspace);
|
||||
spa->spa_rdspace -= spa->spa_nonallocating_dspace;
|
||||
}
|
||||
spa->spa_dspace = spa->spa_rdspace + ddt_get_dedup_dspace(spa) +
|
||||
brt_get_dspace(spa);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -248,20 +248,63 @@ zap_leaf_array_create(zap_leaf_t *l, const char *buf,
|
||||
return (chunk_head);
|
||||
}
|
||||
|
||||
static void
|
||||
zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
|
||||
/*
|
||||
* Non-destructively copy array between leaves.
|
||||
*/
|
||||
static uint16_t
|
||||
zap_leaf_array_copy(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
|
||||
{
|
||||
uint16_t chunk = *chunkp;
|
||||
|
||||
*chunkp = CHAIN_END;
|
||||
uint16_t new_chunk;
|
||||
uint16_t *nchunkp = &new_chunk;
|
||||
|
||||
while (chunk != CHAIN_END) {
|
||||
uint_t nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
|
||||
ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
|
||||
ZAP_CHUNK_ARRAY);
|
||||
zap_leaf_chunk_free(l, chunk);
|
||||
chunk = nextchunk;
|
||||
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
||||
uint16_t nchunk = zap_leaf_chunk_alloc(nl);
|
||||
|
||||
struct zap_leaf_array *la =
|
||||
&ZAP_LEAF_CHUNK(l, chunk).l_array;
|
||||
struct zap_leaf_array *nla =
|
||||
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
|
||||
ASSERT3U(la->la_type, ==, ZAP_CHUNK_ARRAY);
|
||||
|
||||
*nla = *la; /* structure assignment */
|
||||
|
||||
chunk = la->la_next;
|
||||
*nchunkp = nchunk;
|
||||
nchunkp = &nla->la_next;
|
||||
}
|
||||
*nchunkp = CHAIN_END;
|
||||
return (new_chunk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free array. Unlike trivial loop of zap_leaf_chunk_free() this does
|
||||
* not reverse order of chunks in the free list, reducing fragmentation.
|
||||
*/
|
||||
static void
|
||||
zap_leaf_array_free(zap_leaf_t *l, uint16_t chunk)
|
||||
{
|
||||
struct zap_leaf_header *hdr = &zap_leaf_phys(l)->l_hdr;
|
||||
uint16_t *tailp = &hdr->lh_freelist;
|
||||
uint16_t oldfree = *tailp;
|
||||
|
||||
while (chunk != CHAIN_END) {
|
||||
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
||||
zap_leaf_chunk_t *c = &ZAP_LEAF_CHUNK(l, chunk);
|
||||
ASSERT3U(c->l_array.la_type, ==, ZAP_CHUNK_ARRAY);
|
||||
|
||||
*tailp = chunk;
|
||||
chunk = c->l_array.la_next;
|
||||
|
||||
c->l_free.lf_type = ZAP_CHUNK_FREE;
|
||||
memset(c->l_free.lf_pad, 0, sizeof (c->l_free.lf_pad));
|
||||
tailp = &c->l_free.lf_next;
|
||||
|
||||
ASSERT3U(hdr->lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
|
||||
hdr->lh_nfree++;
|
||||
}
|
||||
|
||||
*tailp = oldfree;
|
||||
}
|
||||
|
||||
/* array_len and buf_len are in integers, not bytes */
|
||||
@ -515,7 +558,7 @@ zap_entry_update(zap_entry_handle_t *zeh,
|
||||
if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks)
|
||||
return (SET_ERROR(EAGAIN));
|
||||
|
||||
zap_leaf_array_free(l, &le->le_value_chunk);
|
||||
zap_leaf_array_free(l, le->le_value_chunk);
|
||||
le->le_value_chunk =
|
||||
zap_leaf_array_create(l, buf, integer_size, num_integers);
|
||||
le->le_value_numints = num_integers;
|
||||
@ -534,10 +577,11 @@ zap_entry_remove(zap_entry_handle_t *zeh)
|
||||
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk);
|
||||
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
|
||||
|
||||
zap_leaf_array_free(l, &le->le_name_chunk);
|
||||
zap_leaf_array_free(l, &le->le_value_chunk);
|
||||
|
||||
*zeh->zeh_chunkp = le->le_next;
|
||||
|
||||
/* Free in opposite order to reduce fragmentation. */
|
||||
zap_leaf_array_free(l, le->le_value_chunk);
|
||||
zap_leaf_array_free(l, le->le_name_chunk);
|
||||
zap_leaf_chunk_free(l, entry_chunk);
|
||||
|
||||
zap_leaf_phys(l)->l_hdr.lh_nentries--;
|
||||
@ -701,34 +745,6 @@ zap_leaf_rehash_entry(zap_leaf_t *l, struct zap_leaf_entry *le, uint16_t entry)
|
||||
return (chunkp);
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
|
||||
{
|
||||
uint16_t new_chunk;
|
||||
uint16_t *nchunkp = &new_chunk;
|
||||
|
||||
while (chunk != CHAIN_END) {
|
||||
uint16_t nchunk = zap_leaf_chunk_alloc(nl);
|
||||
struct zap_leaf_array *nla =
|
||||
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
|
||||
struct zap_leaf_array *la =
|
||||
&ZAP_LEAF_CHUNK(l, chunk).l_array;
|
||||
uint_t nextchunk = la->la_next;
|
||||
|
||||
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
||||
ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
|
||||
|
||||
*nla = *la; /* structure assignment */
|
||||
|
||||
zap_leaf_chunk_free(l, chunk);
|
||||
chunk = nextchunk;
|
||||
*nchunkp = nchunk;
|
||||
nchunkp = &nla->la_next;
|
||||
}
|
||||
*nchunkp = CHAIN_END;
|
||||
return (new_chunk);
|
||||
}
|
||||
|
||||
static void
|
||||
zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)
|
||||
{
|
||||
@ -741,10 +757,12 @@ zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)
|
||||
|
||||
(void) zap_leaf_rehash_entry(nl, nle, chunk);
|
||||
|
||||
nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
|
||||
nle->le_value_chunk =
|
||||
zap_leaf_transfer_array(l, le->le_value_chunk, nl);
|
||||
nle->le_name_chunk = zap_leaf_array_copy(l, le->le_name_chunk, nl);
|
||||
nle->le_value_chunk = zap_leaf_array_copy(l, le->le_value_chunk, nl);
|
||||
|
||||
/* Free in opposite order to reduce fragmentation. */
|
||||
zap_leaf_array_free(l, le->le_value_chunk);
|
||||
zap_leaf_array_free(l, le->le_name_chunk);
|
||||
zap_leaf_chunk_free(l, entry);
|
||||
|
||||
zap_leaf_phys(l)->l_hdr.lh_nentries--;
|
||||
|
@ -1227,6 +1227,21 @@ zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_prefetch_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints)
|
||||
{
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
|
||||
fzap_prefetch(zn);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints)
|
||||
@ -1237,13 +1252,37 @@ zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_prefetch_uint64_impl(zap, key, key_numints);
|
||||
/* zap_prefetch_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
int err =
|
||||
zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_prefetch_uint64_impl(zap, key, key_numints);
|
||||
/* zap_prefetch_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_lookup_uint64_impl(zap_t *zap, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
|
||||
{
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
|
||||
fzap_prefetch(zn);
|
||||
int err = fzap_lookup(zn, integer_size, num_integers, buf,
|
||||
NULL, 0, NULL);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
@ -1259,16 +1298,25 @@ zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = zap_lookup_uint64_impl(zap, key, key_numints, integer_size,
|
||||
num_integers, buf);
|
||||
/* zap_lookup_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = fzap_lookup(zn, integer_size, num_integers, buf,
|
||||
NULL, 0, NULL);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
int
|
||||
zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
int err =
|
||||
zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lookup_uint64_impl(zap, key, key_numints, integer_size,
|
||||
num_integers, buf);
|
||||
/* zap_lookup_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -2192,31 +2192,20 @@ zio_delay_interrupt(zio_t *zio)
|
||||
} else {
|
||||
taskqid_t tid;
|
||||
hrtime_t diff = zio->io_target_timestamp - now;
|
||||
clock_t expire_at_tick = ddi_get_lbolt() +
|
||||
NSEC_TO_TICK(diff);
|
||||
int ticks = MAX(1, NSEC_TO_TICK(diff));
|
||||
clock_t expire_at_tick = ddi_get_lbolt() + ticks;
|
||||
|
||||
DTRACE_PROBE3(zio__delay__hit, zio_t *, zio,
|
||||
hrtime_t, now, hrtime_t, diff);
|
||||
|
||||
if (NSEC_TO_TICK(diff) == 0) {
|
||||
/* Our delay is less than a jiffy - just spin */
|
||||
zfs_sleep_until(zio->io_target_timestamp);
|
||||
zio_interrupt(zio);
|
||||
} else {
|
||||
tid = taskq_dispatch_delay(system_taskq, zio_interrupt,
|
||||
zio, TQ_NOSLEEP, expire_at_tick);
|
||||
if (tid == TASKQID_INVALID) {
|
||||
/*
|
||||
* Use taskq_dispatch_delay() in the place of
|
||||
* OpenZFS's timeout_generic().
|
||||
* Couldn't allocate a task. Just finish the
|
||||
* zio without a delay.
|
||||
*/
|
||||
tid = taskq_dispatch_delay(system_taskq,
|
||||
zio_interrupt, zio, TQ_NOSLEEP,
|
||||
expire_at_tick);
|
||||
if (tid == TASKQID_INVALID) {
|
||||
/*
|
||||
* Couldn't allocate a task. Just
|
||||
* finish the zio without a delay.
|
||||
*/
|
||||
zio_interrupt(zio);
|
||||
}
|
||||
zio_interrupt(zio);
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
@ -160,6 +160,12 @@ abd_fletcher_4_byteswap(abd_t *abd, uint64_t size,
|
||||
abd_fletcher_4_impl(abd, size, &acd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checksum vectors.
|
||||
*
|
||||
* Note: you cannot change the name string for these functions, as they are
|
||||
* embedded in on-disk data in some places (eg dedup table names).
|
||||
*/
|
||||
zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
|
||||
{{NULL, NULL}, NULL, NULL, 0, "inherit"},
|
||||
{{NULL, NULL}, NULL, NULL, 0, "on"},
|
||||
|
@ -44,10 +44,6 @@ static unsigned long zio_decompress_fail_fraction = 0;
|
||||
|
||||
/*
|
||||
* Compression vectors.
|
||||
*
|
||||
* NOTE: DO NOT CHANGE THE NAMES OF THESE COMPRESSION FUNCTIONS.
|
||||
* THEY ARE USED AS ZAP KEY NAMES BY FAST DEDUP AND THEREFORE
|
||||
* PART OF THE ON-DISK FORMAT.
|
||||
*/
|
||||
zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
|
||||
{"inherit", 0, NULL, NULL, NULL},
|
||||
|
@ -32,6 +32,7 @@ Requires(post): gcc, make, perl, diffutils
|
||||
%if 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}%{?openEuler}
|
||||
Requires: kernel-devel >= @ZFS_META_KVER_MIN@, kernel-devel <= @ZFS_META_KVER_MAX@.999
|
||||
Requires(post): kernel-devel >= @ZFS_META_KVER_MIN@, kernel-devel <= @ZFS_META_KVER_MAX@.999
|
||||
Conflicts: kernel-devel < @ZFS_META_KVER_MIN@, kernel-devel > @ZFS_META_KVER_MAX@.999
|
||||
Obsoletes: spl-dkms <= %{version}
|
||||
%endif
|
||||
Provides: %{module}-kmod = %{version}
|
||||
|
@ -19,9 +19,13 @@
|
||||
*/
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/fcntl.h>
|
||||
#else
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
#include <linux/fs.h>
|
||||
#include <err.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
@ -41,9 +41,11 @@ log_must zfs set compress=zle $TESTDSTFS
|
||||
|
||||
for prop in "${sync_prop_vals[@]}"; do
|
||||
log_must zfs set sync=$prop $TESTSRCFS
|
||||
# 15*8=120, which is greater than 113, so we are sure the data won't
|
||||
# be embedded into BP.
|
||||
# 32767*8=262136, which is larger than a single default recordsize of
|
||||
# 131072.
|
||||
FILESIZE=$(random_int_between 1 32767)
|
||||
FILESIZE=$(random_int_between 15 32767)
|
||||
FILESIZE=$((FILESIZE * 8))
|
||||
bclone_test random $FILESIZE false $TESTSRCDIR $TESTSRCDIR
|
||||
done
|
||||
@ -52,9 +54,11 @@ for srcprop in "${sync_prop_vals[@]}"; do
|
||||
log_must zfs set sync=$srcprop $TESTSRCFS
|
||||
for dstprop in "${sync_prop_vals[@]}"; do
|
||||
log_must zfs set sync=$dstprop $TESTDSTFS
|
||||
# 15*8=120, which is greater than 113, so we are sure the data won't
|
||||
# be embedded into BP.
|
||||
# 32767*8=262136, which is larger than a single default recordsize of
|
||||
# 131072.
|
||||
FILESIZE=$(random_int_between 1 32767)
|
||||
FILESIZE=$(random_int_between 15 32767)
|
||||
FILESIZE=$((FILESIZE * 8))
|
||||
bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR
|
||||
done
|
||||
|
@ -69,15 +69,16 @@ for raid_type in "draid2:3d:6c:1s" "raidz2"; do
|
||||
log_mustnot eval "zpool status -e $TESTPOOL2 | grep ONLINE"
|
||||
|
||||
# Check no ONLINE slow vdevs are show. Then mark IOs greater than
|
||||
# 160ms slow, delay IOs 320ms to vdev6, check slow IOs.
|
||||
# 750ms slow, delay IOs 1000ms to vdev6, check slow IOs.
|
||||
log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev6 "ONLINE"
|
||||
log_mustnot eval "zpool status -es $TESTPOOL2 | grep ONLINE"
|
||||
|
||||
log_must set_tunable64 ZIO_SLOW_IO_MS 160
|
||||
log_must zinject -d $TESTDIR/vdev6 -D320:100 $TESTPOOL2
|
||||
log_must set_tunable64 ZIO_SLOW_IO_MS 750
|
||||
log_must zinject -d $TESTDIR/vdev6 -D1000:100 $TESTPOOL2
|
||||
log_must mkfile 1048576 /$TESTPOOL2/testfile
|
||||
sync_pool $TESTPOOL2
|
||||
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
|
||||
log_must zinject -c all
|
||||
|
||||
# Check vdev6 slow IOs are only shown when requested with -s.
|
||||
log_mustnot eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev6 | grep ONLINE"
|
||||
@ -95,10 +96,9 @@ for raid_type in "draid2:3d:6c:1s" "raidz2"; do
|
||||
log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev2 | grep ONLINE"
|
||||
log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev3 | grep ONLINE"
|
||||
|
||||
log_must zinject -c all
|
||||
log_must zpool status -es $TESTPOOL2
|
||||
|
||||
zpool destroy $TESTPOOL2
|
||||
log_must zpool destroy $TESTPOOL2
|
||||
done
|
||||
|
||||
log_pass "Verify zpool status -e shows only unhealthy vdevs"
|
||||
|
@ -792,7 +792,7 @@
|
||||
/* #undef ZFS_DEVICE_MINOR */
|
||||
|
||||
/* Define the project alias string. */
|
||||
#define ZFS_META_ALIAS "zfs-2.3.99-64-FreeBSD_g1c9a4c8cb"
|
||||
#define ZFS_META_ALIAS "zfs-2.3.99-92-FreeBSD_gd0a91b9f8"
|
||||
|
||||
/* Define the project author. */
|
||||
#define ZFS_META_AUTHOR "OpenZFS"
|
||||
@ -801,7 +801,7 @@
|
||||
/* #undef ZFS_META_DATA */
|
||||
|
||||
/* Define the maximum compatible kernel version. */
|
||||
#define ZFS_META_KVER_MAX "6.11"
|
||||
#define ZFS_META_KVER_MAX "6.12"
|
||||
|
||||
/* Define the minimum compatible kernel version. */
|
||||
#define ZFS_META_KVER_MIN "4.18"
|
||||
@ -822,7 +822,7 @@
|
||||
#define ZFS_META_NAME "zfs"
|
||||
|
||||
/* Define the project release. */
|
||||
#define ZFS_META_RELEASE "64-FreeBSD_g1c9a4c8cb"
|
||||
#define ZFS_META_RELEASE "92-FreeBSD_gd0a91b9f8"
|
||||
|
||||
/* Define the project version. */
|
||||
#define ZFS_META_VERSION "2.3.99"
|
||||
|
@ -1 +1 @@
|
||||
#define ZFS_META_GITREV "zfs-2.3.99-64-g1c9a4c8cb"
|
||||
#define ZFS_META_GITREV "zfs-2.3.99-92-gd0a91b9f8"
|
||||
|
Loading…
Reference in New Issue
Block a user