mirror of
https://github.com/freebsd/freebsd-src.git
synced 2024-11-28 02:32:47 +00:00
zfs: merge openzfs/zfs@dbda45160
Notable upstream pull request merges: #156659b1677fb5
dmu: Allow buffer fills to fail Obtained from: OpenZFS OpenZFS commit:dbda45160f
This commit is contained in:
commit
188408da9f
@ -62,7 +62,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
|
||||
}
|
||||
|
||||
static inline void
|
||||
zfs_uio_advance(zfs_uio_t *uio, size_t size)
|
||||
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
|
||||
{
|
||||
zfs_uio_resid(uio) -= size;
|
||||
zfs_uio_offset(uio) += size;
|
||||
|
@ -95,7 +95,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
|
||||
}
|
||||
|
||||
static inline void
|
||||
zfs_uio_advance(zfs_uio_t *uio, size_t size)
|
||||
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
|
||||
{
|
||||
uio->uio_resid -= size;
|
||||
uio->uio_loffset += size;
|
||||
|
@ -380,8 +380,8 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
|
||||
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
|
||||
void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
|
||||
boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
|
||||
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
|
||||
|
@ -90,7 +90,7 @@ zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len)
|
||||
}
|
||||
|
||||
static inline void
|
||||
zfs_uio_advance(zfs_uio_t *uio, size_t size)
|
||||
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
|
||||
{
|
||||
uio->uio_resid -= size;
|
||||
uio->uio_loffset += size;
|
||||
|
@ -107,7 +107,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_will_fill(db, tx);
|
||||
dmu_buf_will_fill(db, tx, B_FALSE);
|
||||
else
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
@ -123,7 +123,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
}
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_fill_done(db, tx);
|
||||
dmu_buf_fill_done(db, tx, B_FALSE);
|
||||
|
||||
offset += tocpy;
|
||||
size -= tocpy;
|
||||
|
@ -2751,7 +2751,7 @@ dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
|
||||
@ -2769,8 +2769,14 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||
* Block cloning: We will be completely overwriting a block
|
||||
* cloned in this transaction group, so let's undirty the
|
||||
* pending clone and mark the block as uncached. This will be
|
||||
* as if the clone was never done.
|
||||
* as if the clone was never done. But if the fill can fail
|
||||
* we should have a way to return back to the cloned data.
|
||||
*/
|
||||
if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
|
||||
mutex_exit(&db->db_mtx);
|
||||
dmu_buf_will_dirty(db_fake, tx);
|
||||
return;
|
||||
}
|
||||
VERIFY(!dbuf_undirty(db, tx));
|
||||
db->db_state = DB_UNCACHED;
|
||||
}
|
||||
@ -2831,32 +2837,41 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
dl->dr_overridden_by.blk_birth = dr->dr_txg;
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx)
|
||||
boolean_t
|
||||
dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
|
||||
{
|
||||
(void) tx;
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
|
||||
dbuf_states_t old_state;
|
||||
mutex_enter(&db->db_mtx);
|
||||
DBUF_VERIFY(db);
|
||||
|
||||
old_state = db->db_state;
|
||||
db->db_state = DB_CACHED;
|
||||
if (old_state == DB_FILL) {
|
||||
if (db->db_state == DB_FILL) {
|
||||
if (db->db_level == 0 && db->db_freed_in_flight) {
|
||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||
/* we were freed while filling */
|
||||
/* XXX dbuf_undirty? */
|
||||
memset(db->db.db_data, 0, db->db.db_size);
|
||||
db->db_freed_in_flight = FALSE;
|
||||
db->db_state = DB_CACHED;
|
||||
DTRACE_SET_STATE(db,
|
||||
"fill done handling freed in flight");
|
||||
failed = B_FALSE;
|
||||
} else if (failed) {
|
||||
VERIFY(!dbuf_undirty(db, tx));
|
||||
db->db_buf = NULL;
|
||||
dbuf_clear_data(db);
|
||||
DTRACE_SET_STATE(db, "fill failed");
|
||||
} else {
|
||||
db->db_state = DB_CACHED;
|
||||
DTRACE_SET_STATE(db, "fill done");
|
||||
}
|
||||
cv_broadcast(&db->db_changed);
|
||||
} else {
|
||||
db->db_state = DB_CACHED;
|
||||
failed = B_FALSE;
|
||||
}
|
||||
mutex_exit(&db->db_mtx);
|
||||
return (failed);
|
||||
}
|
||||
|
||||
void
|
||||
@ -3001,7 +3016,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
||||
DTRACE_SET_STATE(db, "filling assigned arcbuf");
|
||||
mutex_exit(&db->db_mtx);
|
||||
(void) dbuf_dirty(db, tx);
|
||||
dmu_buf_fill_done(&db->db, tx);
|
||||
dmu_buf_fill_done(&db->db, tx, B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1134,14 +1134,14 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
|
||||
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_will_fill(db, tx);
|
||||
dmu_buf_will_fill(db, tx, B_FALSE);
|
||||
else
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
(void) memcpy((char *)db->db_data + bufoff, buf, tocpy);
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_fill_done(db, tx);
|
||||
dmu_buf_fill_done(db, tx, B_FALSE);
|
||||
|
||||
offset += tocpy;
|
||||
size -= tocpy;
|
||||
@ -1349,27 +1349,24 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
||||
|
||||
ASSERT(size > 0);
|
||||
|
||||
bufoff = zfs_uio_offset(uio) - db->db_offset;
|
||||
offset_t off = zfs_uio_offset(uio);
|
||||
bufoff = off - db->db_offset;
|
||||
tocpy = MIN(db->db_size - bufoff, size);
|
||||
|
||||
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_will_fill(db, tx);
|
||||
dmu_buf_will_fill(db, tx, B_TRUE);
|
||||
else
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
/*
|
||||
* XXX zfs_uiomove could block forever (eg.nfs-backed
|
||||
* pages). There needs to be a uiolockdown() function
|
||||
* to lock the pages in memory, so that zfs_uiomove won't
|
||||
* block.
|
||||
*/
|
||||
err = zfs_uio_fault_move((char *)db->db_data + bufoff,
|
||||
tocpy, UIO_WRITE, uio);
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_fill_done(db, tx);
|
||||
if (tocpy == db->db_size && dmu_buf_fill_done(db, tx, err)) {
|
||||
/* The fill was reverted. Undo any uio progress. */
|
||||
zfs_uio_advance(uio, off - zfs_uio_offset(uio));
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
|
@ -2532,7 +2532,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
|
||||
* size of the provided arc_buf_t.
|
||||
*/
|
||||
if (db_spill->db_size != drrs->drr_length) {
|
||||
dmu_buf_will_fill(db_spill, tx);
|
||||
dmu_buf_will_fill(db_spill, tx, B_FALSE);
|
||||
VERIFY0(dbuf_spill_set_blksz(db_spill,
|
||||
drrs->drr_length, tx));
|
||||
}
|
||||
|
@ -490,7 +490,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
|
||||
dmu_buf_t *db;
|
||||
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
|
||||
DB_RF_MUST_SUCCEED, FTAG, &db));
|
||||
dmu_buf_will_fill(db, tx);
|
||||
dmu_buf_will_fill(db, tx, B_FALSE);
|
||||
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
|
||||
SPA_MINBLOCKSIZE), tx));
|
||||
local_rl->rl_phys = db->db_data;
|
||||
|
@ -44,7 +44,8 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
|
||||
'block_cloning_copyfilerange_cross_dataset',
|
||||
'block_cloning_cross_enc_dataset',
|
||||
'block_cloning_copyfilerange_fallback_same_txg',
|
||||
'block_cloning_replay', 'block_cloning_replay_encrypted']
|
||||
'block_cloning_replay', 'block_cloning_replay_encrypted',
|
||||
'block_cloning_lwb_buffer_overflow']
|
||||
tags = ['functional', 'block_cloning']
|
||||
|
||||
[tests/functional/chattr:Linux]
|
||||
|
@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'):
|
||||
['SKIP', cfr_reason],
|
||||
'block_cloning/block_cloning_replay_encrypted':
|
||||
['SKIP', cfr_reason],
|
||||
'block_cloning/block_cloning_lwb_buffer_overflow':
|
||||
['SKIP', cfr_reason],
|
||||
'block_cloning/block_cloning_copyfilerange_cross_dataset':
|
||||
['SKIP', cfr_cross_reason],
|
||||
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
|
||||
|
@ -454,6 +454,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/block_cloning/block_cloning_cross_enc_dataset.ksh \
|
||||
functional/block_cloning/block_cloning_replay.ksh \
|
||||
functional/block_cloning/block_cloning_replay_encrypted.ksh \
|
||||
functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \
|
||||
functional/bootfs/bootfs_001_pos.ksh \
|
||||
functional/bootfs/bootfs_002_neg.ksh \
|
||||
functional/bootfs/bootfs_003_pos.ksh \
|
||||
|
@ -0,0 +1,89 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023 by iXsystems, Inc. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Test for LWB buffer overflow with multiple VDEVs ZIL when 128KB
|
||||
# block write is split into two 68KB ones, trying to write maximum
|
||||
# sizes 128KB TX_CLONE_RANGE record with 1022 block pointers into
|
||||
# 68KB buffer.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with multiple VDEVs ZIL
|
||||
# 2. Write maximum sizes TX_CLONE_RANGE record with 1022 block
|
||||
# pointers into 68KB buffer
|
||||
# 3. Sync TXG
|
||||
# 4. Clone the file
|
||||
# 5. Synchronize cached writes
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
|
||||
log_unsupported "copy_file_range not available before Linux 4.5"
|
||||
fi
|
||||
|
||||
VDIR=$TEST_BASE_DIR/disk-bclone
|
||||
VDEV="$VDIR/a $VDIR/b $VDIR/c"
|
||||
LDEV="$VDIR/e $VDIR/f"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
rm -rf $VDIR
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_assert "Test for LWB buffer overflow with multiple VDEVs ZIL"
|
||||
|
||||
log_must rm -rf $VDIR
|
||||
log_must mkdir -p $VDIR
|
||||
log_must truncate -s $MINVDEVSIZE $VDEV $LDEV
|
||||
|
||||
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \
|
||||
log mirror $LDEV
|
||||
log_must zfs create -o recordsize=32K $TESTPOOL/$TESTFS
|
||||
# Each ZIL log entry can fit 130816 bytes for a block cloning operation,
|
||||
# so it can store 1022 block pointers. When LWB optimization is enabled,
|
||||
# an assert is hit when 128KB block write is split into two 68KB ones
|
||||
# for 2 SLOG devices
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 bs=32K count=1022 \
|
||||
conv=fsync
|
||||
sync_pool $TESTPOOL
|
||||
log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2
|
||||
log_must sync
|
||||
|
||||
sync_pool $TESTPOOL
|
||||
log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2
|
||||
typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2)
|
||||
log_must [ "$blocks" = "$(seq -s " " 0 1021)" ]
|
||||
|
||||
log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL"
|
||||
|
@ -1113,7 +1113,7 @@
|
||||
/* #undef ZFS_IS_GPL_COMPATIBLE */
|
||||
|
||||
/* Define the project alias string. */
|
||||
#define ZFS_META_ALIAS "zfs-2.2.99-268-FreeBSD_g86e115e21"
|
||||
#define ZFS_META_ALIAS "zfs-2.2.99-270-FreeBSD_gdbda45160"
|
||||
|
||||
/* Define the project author. */
|
||||
#define ZFS_META_AUTHOR "OpenZFS"
|
||||
@ -1143,7 +1143,7 @@
|
||||
#define ZFS_META_NAME "zfs"
|
||||
|
||||
/* Define the project release. */
|
||||
#define ZFS_META_RELEASE "268-FreeBSD_g86e115e21"
|
||||
#define ZFS_META_RELEASE "270-FreeBDS_gdbda45160"
|
||||
|
||||
/* Define the project version. */
|
||||
#define ZFS_META_VERSION "2.2.99"
|
||||
|
@ -1 +1 @@
|
||||
#define ZFS_META_GITREV "zfs-2.2.99-268-g86e115e21"
|
||||
#define ZFS_META_GITREV "zfs-2.2.99-270-gdbda45160"
|
||||
|
Loading…
Reference in New Issue
Block a user