mirror of
https://github.com/freebsd/freebsd-src.git
synced 2024-11-26 20:12:44 +00:00
Reduce dirty records memory usage
Small block workloads may use a very large number of dirty records. During simple block cloning test due to BRT still using 4KB blocks I can easily see up to 2.5M of those used. Before this change dbuf_dirty_record_t structures representing them were allocated via kmem_zalloc(), that rounded their size up to 512 bytes. Introduction of specialized kmem cache allows to reduce the size from 512 to 408 bytes. Additionally, since override and raw params in dirty records are mutually exclusive, puting them into a union allows to reduce structure size down to 368 bytes, increasing the saving to 28%, that can be a 0.5GB or more of RAM. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Brian Atkinson <batkinson@lanl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #16694
This commit is contained in:
parent
91bd12dfeb
commit
b16e096198
@ -171,7 +171,6 @@ typedef struct dbuf_dirty_record {
|
|||||||
* gets COW'd in a subsequent transaction group.
|
* gets COW'd in a subsequent transaction group.
|
||||||
*/
|
*/
|
||||||
arc_buf_t *dr_data;
|
arc_buf_t *dr_data;
|
||||||
blkptr_t dr_overridden_by;
|
|
||||||
override_states_t dr_override_state;
|
override_states_t dr_override_state;
|
||||||
uint8_t dr_copies;
|
uint8_t dr_copies;
|
||||||
boolean_t dr_nopwrite;
|
boolean_t dr_nopwrite;
|
||||||
@ -179,14 +178,21 @@ typedef struct dbuf_dirty_record {
|
|||||||
boolean_t dr_diowrite;
|
boolean_t dr_diowrite;
|
||||||
boolean_t dr_has_raw_params;
|
boolean_t dr_has_raw_params;
|
||||||
|
|
||||||
/*
|
/* Override and raw params are mutually exclusive. */
|
||||||
* If dr_has_raw_params is set, the following crypt
|
union {
|
||||||
* params will be set on the BP that's written.
|
blkptr_t dr_overridden_by;
|
||||||
*/
|
struct {
|
||||||
boolean_t dr_byteorder;
|
/*
|
||||||
uint8_t dr_salt[ZIO_DATA_SALT_LEN];
|
* If dr_has_raw_params is set, the
|
||||||
uint8_t dr_iv[ZIO_DATA_IV_LEN];
|
* following crypt params will be set
|
||||||
uint8_t dr_mac[ZIO_DATA_MAC_LEN];
|
* on the BP that's written.
|
||||||
|
*/
|
||||||
|
boolean_t dr_byteorder;
|
||||||
|
uint8_t dr_salt[ZIO_DATA_SALT_LEN];
|
||||||
|
uint8_t dr_iv[ZIO_DATA_IV_LEN];
|
||||||
|
uint8_t dr_mac[ZIO_DATA_MAC_LEN];
|
||||||
|
};
|
||||||
|
};
|
||||||
} dl;
|
} dl;
|
||||||
struct dirty_lightweight_leaf {
|
struct dirty_lightweight_leaf {
|
||||||
/*
|
/*
|
||||||
@ -346,6 +352,8 @@ typedef struct dbuf_hash_table {
|
|||||||
|
|
||||||
typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
|
typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
|
||||||
|
|
||||||
|
extern kmem_cache_t *dbuf_dirty_kmem_cache;
|
||||||
|
|
||||||
uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level,
|
uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level,
|
||||||
const uint64_t offset);
|
const uint64_t offset);
|
||||||
|
|
||||||
|
@ -182,6 +182,7 @@ static void dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr);
|
|||||||
* Global data structures and functions for the dbuf cache.
|
* Global data structures and functions for the dbuf cache.
|
||||||
*/
|
*/
|
||||||
static kmem_cache_t *dbuf_kmem_cache;
|
static kmem_cache_t *dbuf_kmem_cache;
|
||||||
|
kmem_cache_t *dbuf_dirty_kmem_cache;
|
||||||
static taskq_t *dbu_evict_taskq;
|
static taskq_t *dbu_evict_taskq;
|
||||||
|
|
||||||
static kthread_t *dbuf_cache_evict_thread;
|
static kthread_t *dbuf_cache_evict_thread;
|
||||||
@ -966,6 +967,8 @@ dbuf_init(void)
|
|||||||
dbuf_kmem_cache = kmem_cache_create("dmu_buf_impl_t",
|
dbuf_kmem_cache = kmem_cache_create("dmu_buf_impl_t",
|
||||||
sizeof (dmu_buf_impl_t),
|
sizeof (dmu_buf_impl_t),
|
||||||
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
|
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
|
||||||
|
dbuf_dirty_kmem_cache = kmem_cache_create("dbuf_dirty_record_t",
|
||||||
|
sizeof (dbuf_dirty_record_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
|
||||||
|
|
||||||
for (int i = 0; i < hmsize; i++)
|
for (int i = 0; i < hmsize; i++)
|
||||||
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_NOLOCKDEP, NULL);
|
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_NOLOCKDEP, NULL);
|
||||||
@ -1041,6 +1044,7 @@ dbuf_fini(void)
|
|||||||
sizeof (kmutex_t));
|
sizeof (kmutex_t));
|
||||||
|
|
||||||
kmem_cache_destroy(dbuf_kmem_cache);
|
kmem_cache_destroy(dbuf_kmem_cache);
|
||||||
|
kmem_cache_destroy(dbuf_dirty_kmem_cache);
|
||||||
taskq_destroy(dbu_evict_taskq);
|
taskq_destroy(dbu_evict_taskq);
|
||||||
|
|
||||||
mutex_enter(&dbuf_evict_lock);
|
mutex_enter(&dbuf_evict_lock);
|
||||||
@ -2343,7 +2347,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
* to make a copy of it so that the changes we make in this
|
* to make a copy of it so that the changes we make in this
|
||||||
* transaction group won't leak out when we sync the older txg.
|
* transaction group won't leak out when we sync the older txg.
|
||||||
*/
|
*/
|
||||||
dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
|
dr = kmem_cache_alloc(dbuf_dirty_kmem_cache, KM_SLEEP);
|
||||||
|
memset(dr, 0, sizeof (*dr));
|
||||||
list_link_init(&dr->dr_dirty_node);
|
list_link_init(&dr->dr_dirty_node);
|
||||||
list_link_init(&dr->dr_dbuf_node);
|
list_link_init(&dr->dr_dbuf_node);
|
||||||
dr->dr_dnode = dn;
|
dr->dr_dnode = dn;
|
||||||
@ -2526,7 +2531,7 @@ dbuf_undirty_bonus(dbuf_dirty_record_t *dr)
|
|||||||
mutex_destroy(&dr->dt.di.dr_mtx);
|
mutex_destroy(&dr->dt.di.dr_mtx);
|
||||||
list_destroy(&dr->dt.di.dr_children);
|
list_destroy(&dr->dt.di.dr_children);
|
||||||
}
|
}
|
||||||
kmem_free(dr, sizeof (dbuf_dirty_record_t));
|
kmem_cache_free(dbuf_dirty_kmem_cache, dr);
|
||||||
ASSERT3U(db->db_dirtycnt, >, 0);
|
ASSERT3U(db->db_dirtycnt, >, 0);
|
||||||
db->db_dirtycnt -= 1;
|
db->db_dirtycnt -= 1;
|
||||||
}
|
}
|
||||||
@ -2616,7 +2621,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kmem_free(dr, sizeof (dbuf_dirty_record_t));
|
kmem_cache_free(dbuf_dirty_kmem_cache, dr);
|
||||||
|
|
||||||
ASSERT(db->db_dirtycnt > 0);
|
ASSERT(db->db_dirtycnt > 0);
|
||||||
db->db_dirtycnt -= 1;
|
db->db_dirtycnt -= 1;
|
||||||
@ -2941,7 +2946,7 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
|
|||||||
* (see dbuf_sync_dnode_leaf_crypt()).
|
* (see dbuf_sync_dnode_leaf_crypt()).
|
||||||
*/
|
*/
|
||||||
ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
|
ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
|
||||||
ASSERT3U(db->db_level, ==, 0);
|
ASSERT0(db->db_level);
|
||||||
ASSERT(db->db_objset->os_raw_receive);
|
ASSERT(db->db_objset->os_raw_receive);
|
||||||
|
|
||||||
dmu_buf_will_dirty_impl(db_fake,
|
dmu_buf_will_dirty_impl(db_fake,
|
||||||
@ -2950,6 +2955,7 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
|
|||||||
dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
dr = dbuf_find_dirty_eq(db, tx->tx_txg);
|
||||||
|
|
||||||
ASSERT3P(dr, !=, NULL);
|
ASSERT3P(dr, !=, NULL);
|
||||||
|
ASSERT3U(dr->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN);
|
||||||
|
|
||||||
dr->dt.dl.dr_has_raw_params = B_TRUE;
|
dr->dt.dl.dr_has_raw_params = B_TRUE;
|
||||||
dr->dt.dl.dr_byteorder = byteorder;
|
dr->dt.dl.dr_byteorder = byteorder;
|
||||||
@ -2964,10 +2970,14 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
|
|||||||
struct dirty_leaf *dl;
|
struct dirty_leaf *dl;
|
||||||
dbuf_dirty_record_t *dr;
|
dbuf_dirty_record_t *dr;
|
||||||
|
|
||||||
|
ASSERT3U(db->db.db_object, !=, DMU_META_DNODE_OBJECT);
|
||||||
|
ASSERT0(db->db_level);
|
||||||
|
|
||||||
dr = list_head(&db->db_dirty_records);
|
dr = list_head(&db->db_dirty_records);
|
||||||
ASSERT3P(dr, !=, NULL);
|
ASSERT3P(dr, !=, NULL);
|
||||||
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
|
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
|
||||||
dl = &dr->dt.dl;
|
dl = &dr->dt.dl;
|
||||||
|
ASSERT0(dl->dr_has_raw_params);
|
||||||
dl->dr_overridden_by = *bp;
|
dl->dr_overridden_by = *bp;
|
||||||
dl->dr_override_state = DR_OVERRIDDEN;
|
dl->dr_override_state = DR_OVERRIDDEN;
|
||||||
BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, dr->dr_txg);
|
BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, dr->dr_txg);
|
||||||
@ -3040,6 +3050,7 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
|
|||||||
ASSERT3P(dr, !=, NULL);
|
ASSERT3P(dr, !=, NULL);
|
||||||
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
|
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
|
||||||
dl = &dr->dt.dl;
|
dl = &dr->dt.dl;
|
||||||
|
ASSERT0(dl->dr_has_raw_params);
|
||||||
encode_embedded_bp_compressed(&dl->dr_overridden_by,
|
encode_embedded_bp_compressed(&dl->dr_overridden_by,
|
||||||
data, comp, uncompressed_size, compressed_size);
|
data, comp, uncompressed_size, compressed_size);
|
||||||
BPE_SET_ETYPE(&dl->dr_overridden_by, etype);
|
BPE_SET_ETYPE(&dl->dr_overridden_by, etype);
|
||||||
@ -5083,7 +5094,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||||||
dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted,
|
dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted,
|
||||||
zio->io_txg);
|
zio->io_txg);
|
||||||
|
|
||||||
kmem_free(dr, sizeof (dbuf_dirty_record_t));
|
kmem_cache_free(dbuf_dirty_kmem_cache, dr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -1895,6 +1895,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
|
|||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC);
|
ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC);
|
||||||
if (zio->io_error == 0) {
|
if (zio->io_error == 0) {
|
||||||
|
ASSERT0(dr->dt.dl.dr_has_raw_params);
|
||||||
dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE);
|
dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE);
|
||||||
if (dr->dt.dl.dr_nopwrite) {
|
if (dr->dt.dl.dr_nopwrite) {
|
||||||
blkptr_t *bp = zio->io_bp;
|
blkptr_t *bp = zio->io_bp;
|
||||||
@ -2190,6 +2191,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
|
|||||||
return (SET_ERROR(EALREADY));
|
return (SET_ERROR(EALREADY));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT0(dr->dt.dl.dr_has_raw_params);
|
||||||
ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
|
ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
|
||||||
dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC;
|
dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC;
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
@ -2657,6 +2659,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
|
|||||||
db = (dmu_buf_impl_t *)dbuf;
|
db = (dmu_buf_impl_t *)dbuf;
|
||||||
bp = &bps[i];
|
bp = &bps[i];
|
||||||
|
|
||||||
|
ASSERT3U(db->db.db_object, !=, DMU_META_DNODE_OBJECT);
|
||||||
ASSERT0(db->db_level);
|
ASSERT0(db->db_level);
|
||||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||||
ASSERT(db->db_blkid != DMU_SPILL_BLKID);
|
ASSERT(db->db_blkid != DMU_SPILL_BLKID);
|
||||||
@ -2672,11 +2675,6 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
|
|||||||
db = (dmu_buf_impl_t *)dbuf;
|
db = (dmu_buf_impl_t *)dbuf;
|
||||||
bp = &bps[i];
|
bp = &bps[i];
|
||||||
|
|
||||||
ASSERT0(db->db_level);
|
|
||||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
|
||||||
ASSERT(db->db_blkid != DMU_SPILL_BLKID);
|
|
||||||
ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp));
|
|
||||||
|
|
||||||
dmu_buf_will_clone_or_dio(dbuf, tx);
|
dmu_buf_will_clone_or_dio(dbuf, tx);
|
||||||
|
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
@ -2685,6 +2683,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
|
|||||||
VERIFY(dr != NULL);
|
VERIFY(dr != NULL);
|
||||||
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
|
ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
|
||||||
dl = &dr->dt.dl;
|
dl = &dr->dt.dl;
|
||||||
|
ASSERT0(dl->dr_has_raw_params);
|
||||||
dl->dr_overridden_by = *bp;
|
dl->dr_overridden_by = *bp;
|
||||||
if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) {
|
if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) {
|
||||||
if (!BP_IS_EMBEDDED(bp)) {
|
if (!BP_IS_EMBEDDED(bp)) {
|
||||||
|
@ -180,6 +180,7 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
|
|||||||
if (list_next(&db->db_dirty_records, dr_head) != NULL)
|
if (list_next(&db->db_dirty_records, dr_head) != NULL)
|
||||||
zp.zp_nopwrite = B_FALSE;
|
zp.zp_nopwrite = B_FALSE;
|
||||||
|
|
||||||
|
ASSERT0(dr_head->dt.dl.dr_has_raw_params);
|
||||||
ASSERT3S(dr_head->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN);
|
ASSERT3S(dr_head->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN);
|
||||||
dr_head->dt.dl.dr_override_state = DR_IN_DMU_SYNC;
|
dr_head->dt.dl.dr_override_state = DR_IN_DMU_SYNC;
|
||||||
|
|
||||||
|
@ -566,7 +566,7 @@ dnode_undirty_dbufs(list_t *list)
|
|||||||
mutex_destroy(&dr->dt.di.dr_mtx);
|
mutex_destroy(&dr->dt.di.dr_mtx);
|
||||||
list_destroy(&dr->dt.di.dr_children);
|
list_destroy(&dr->dt.di.dr_children);
|
||||||
}
|
}
|
||||||
kmem_free(dr, sizeof (dbuf_dirty_record_t));
|
kmem_cache_free(dbuf_dirty_kmem_cache, dr);
|
||||||
dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
|
dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user