mlx5e tls: Ensure all allocated tags have a hw context associated

Ensure all allocated tags have a hardware context associated.
The hardware context allocation is moved into the zone import
routine, as suggested by kib.  This is safe because these zone
allocations are always done in a sleepable context.

I have removed the now pointless num_resources tracking,
and added sysctls / tunables to control UMA zone limits
for these tls tags, as well as a tunable to let the
driver pre-allocate tags at boot.

MFC after:	2 weeks
This commit is contained in:
Andrew Gallatin 2024-11-23 11:43:17 +02:00 committed by Konstantin Belousov
parent fdeb273d49
commit 81b38bce07
2 changed files with 95 additions and 39 deletions

View File

@ -84,7 +84,7 @@ struct mlx5e_tls {
struct workqueue_struct *wq; struct workqueue_struct *wq;
uma_zone_t zone; uma_zone_t zone;
uint32_t max_resources; /* max number of resources */ uint32_t max_resources; /* max number of resources */
volatile uint32_t num_resources; /* current number of resources */ int zone_max;
int init; /* set when ready */ int init; /* set when ready */
char zname[32]; char zname[32];
}; };

View File

@ -81,17 +81,57 @@ static const char *mlx5e_tls_stats_desc[] = {
static void mlx5e_tls_work(struct work_struct *); static void mlx5e_tls_work(struct work_struct *);
/*
* Expand the tls tag UMA zone in a sleepable context
*/
static void
mlx5e_prealloc_tags(struct mlx5e_priv *priv, int nitems)
{
struct mlx5e_tls_tag **tags;
int i;
tags = malloc(sizeof(tags[0]) * nitems,
M_MLX5E_TLS, M_WAITOK);
for (i = 0; i < nitems; i++)
tags[i] = uma_zalloc(priv->tls.zone, M_WAITOK);
__compiler_membar();
for (i = 0; i < nitems; i++)
uma_zfree(priv->tls.zone, tags[i]);
free(tags, M_MLX5E_TLS);
}
static int static int
mlx5e_tls_tag_import(void *arg, void **store, int cnt, int domain, int flags) mlx5e_tls_tag_import(void *arg, void **store, int cnt, int domain, int flags)
{ {
struct mlx5e_tls_tag *ptag; struct mlx5e_tls_tag *ptag;
int i; struct mlx5e_priv *priv = arg;
int err, i;
/*
* mlx5_tls_open_tis() sleeps on a firmware command, so
* zone allocations must be done from a sleepable context.
* Note that the uma_zalloc() in mlx5e_tls_snd_tag_alloc()
* is done with M_NOWAIT so that hitting the zone limit does
* not cause the allocation to pause forever.
*/
for (i = 0; i != cnt; i++) { for (i = 0; i != cnt; i++) {
ptag = malloc_domainset(sizeof(*ptag), M_MLX5E_TLS, ptag = malloc_domainset(sizeof(*ptag), M_MLX5E_TLS,
mlx5_dev_domainset(arg), flags | M_ZERO); mlx5_dev_domainset(arg), flags | M_ZERO);
if (ptag == NULL)
return (i);
ptag->tls = &priv->tls;
mtx_init(&ptag->mtx, "mlx5-tls-tag-mtx", NULL, MTX_DEF); mtx_init(&ptag->mtx, "mlx5-tls-tag-mtx", NULL, MTX_DEF);
INIT_WORK(&ptag->work, mlx5e_tls_work); INIT_WORK(&ptag->work, mlx5e_tls_work);
err = mlx5_tls_open_tis(priv->mdev, 0, priv->tdn,
priv->pdn, &ptag->tisn);
if (err) {
MLX5E_TLS_STAT_INC(ptag, tx_error, 1);
free(ptag, M_MLX5E_TLS);
return (i);
}
store[i] = ptag; store[i] = ptag;
} }
return (i); return (i);
@ -114,7 +154,6 @@ mlx5e_tls_tag_release(void *arg, void **store, int cnt)
if (ptag->tisn != 0) { if (ptag->tisn != 0) {
mlx5_tls_close_tis(priv->mdev, ptag->tisn); mlx5_tls_close_tis(priv->mdev, ptag->tisn);
atomic_add_32(&ptls->num_resources, -1U);
} }
mtx_destroy(&ptag->mtx); mtx_destroy(&ptag->mtx);
@ -136,20 +175,38 @@ mlx5e_tls_tag_zfree(struct mlx5e_tls_tag *ptag)
/* avoid leaking keys */ /* avoid leaking keys */
memset(ptag->crypto_params, 0, sizeof(ptag->crypto_params)); memset(ptag->crypto_params, 0, sizeof(ptag->crypto_params));
/* update number of TIS contexts */
if (ptag->tisn == 0)
atomic_add_32(&ptag->tls->num_resources, -1U);
/* return tag to UMA */ /* return tag to UMA */
uma_zfree(ptag->tls->zone, ptag); uma_zfree(ptag->tls->zone, ptag);
} }
static int
mlx5e_max_tag_proc(SYSCTL_HANDLER_ARGS)
{
struct mlx5e_priv *priv = (struct mlx5e_priv *)arg1;
struct mlx5e_tls *ptls = &priv->tls;
int err;
unsigned int max_tags;
max_tags = ptls->zone_max;
err = sysctl_handle_int(oidp, &max_tags, arg2, req);
if (err != 0 || req->newptr == NULL )
return err;
if (max_tags == ptls->zone_max)
return 0;
if (max_tags > priv->tls.max_resources || max_tags == 0)
return (EINVAL);
ptls->zone_max = max_tags;
uma_zone_set_max(ptls->zone, ptls->zone_max);
return 0;
}
int int
mlx5e_tls_init(struct mlx5e_priv *priv) mlx5e_tls_init(struct mlx5e_priv *priv)
{ {
struct mlx5e_tls *ptls = &priv->tls; struct mlx5e_tls *ptls = &priv->tls;
struct sysctl_oid *node; struct sysctl_oid *node;
uint32_t x; uint32_t max_dek, max_tis, x;
int zone_max = 0, prealloc_tags = 0;
if (MLX5_CAP_GEN(priv->mdev, tls_tx) == 0 || if (MLX5_CAP_GEN(priv->mdev, tls_tx) == 0 ||
MLX5_CAP_GEN(priv->mdev, log_max_dek) == 0) MLX5_CAP_GEN(priv->mdev, log_max_dek) == 0)
@ -164,13 +221,31 @@ mlx5e_tls_init(struct mlx5e_priv *priv)
snprintf(ptls->zname, sizeof(ptls->zname), snprintf(ptls->zname, sizeof(ptls->zname),
"mlx5_%u_tls", device_get_unit(priv->mdev->pdev->dev.bsddev)); "mlx5_%u_tls", device_get_unit(priv->mdev->pdev->dev.bsddev));
TUNABLE_INT_FETCH("hw.mlx5.tls_max_tags", &zone_max);
TUNABLE_INT_FETCH("hw.mlx5.tls_prealloc_tags", &prealloc_tags);
ptls->zone = uma_zcache_create(ptls->zname, ptls->zone = uma_zcache_create(ptls->zname,
sizeof(struct mlx5e_tls_tag), NULL, NULL, NULL, NULL, sizeof(struct mlx5e_tls_tag), NULL, NULL, NULL, NULL,
mlx5e_tls_tag_import, mlx5e_tls_tag_release, priv->mdev, mlx5e_tls_tag_import, mlx5e_tls_tag_release, priv,
UMA_ZONE_UNMANAGED); UMA_ZONE_UNMANAGED | (prealloc_tags ? UMA_ZONE_NOFREE : 0));
/* shared between RX and TX TLS */ /* shared between RX and TX TLS */
ptls->max_resources = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_dek) - 1); max_dek = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_dek) - 1);
max_tis = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_tis) - 1);
ptls->max_resources = MIN(max_dek, max_tis);
if (zone_max != 0) {
ptls->zone_max = zone_max;
if (ptls->zone_max > priv->tls.max_resources)
ptls->zone_max = priv->tls.max_resources;
} else {
ptls->zone_max = priv->tls.max_resources;
}
uma_zone_set_max(ptls->zone, ptls->zone_max);
if (prealloc_tags != 0)
mlx5e_prealloc_tags(priv, ptls->zone_max);
for (x = 0; x != MLX5E_TLS_STATS_NUM; x++) for (x = 0; x != MLX5E_TLS_STATS_NUM; x++)
ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK); ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK);
@ -183,6 +258,10 @@ mlx5e_tls_init(struct mlx5e_priv *priv)
if (node == NULL) if (node == NULL)
return (0); return (0);
SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tls_max_tag",
CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, priv, 0, mlx5e_max_tag_proc,
"IU", "Max number of TLS offload session tags");
mlx5e_create_counter_stats(&ptls->ctx, mlx5e_create_counter_stats(&ptls->ctx,
SYSCTL_CHILDREN(node), "stats", SYSCTL_CHILDREN(node), "stats",
mlx5e_tls_stats_desc, MLX5E_TLS_STATS_NUM, mlx5e_tls_stats_desc, MLX5E_TLS_STATS_NUM,
@ -206,9 +285,6 @@ mlx5e_tls_cleanup(struct mlx5e_priv *priv)
uma_zdestroy(ptls->zone); uma_zdestroy(ptls->zone);
destroy_workqueue(ptls->wq); destroy_workqueue(ptls->wq);
/* check if all resources are freed */
MPASS(priv->tls.num_resources == 0);
for (x = 0; x != MLX5E_TLS_STATS_NUM; x++) for (x = 0; x != MLX5E_TLS_STATS_NUM; x++)
counter_u64_free(ptls->stats.arg[x]); counter_u64_free(ptls->stats.arg[x]);
} }
@ -334,28 +410,16 @@ mlx5e_tls_snd_tag_alloc(if_t ifp,
if (priv->gone != 0 || priv->tls.init == 0) if (priv->gone != 0 || priv->tls.init == 0)
return (EOPNOTSUPP); return (EOPNOTSUPP);
/* allocate new tag from zone, if any */
ptag = uma_zalloc(priv->tls.zone, M_WAITOK); ptag = uma_zalloc(priv->tls.zone, M_WAITOK);
if (ptag == NULL)
return (ENOMEM);
/* sanity check default values */ /* sanity check default values */
MPASS(ptag->dek_index == 0); MPASS(ptag->dek_index == 0);
MPASS(ptag->dek_index_ok == 0); MPASS(ptag->dek_index_ok == 0);
/* setup TLS tag */
ptag->tls = &priv->tls;
/* check if there is no TIS context */ /* check if there is no TIS context */
if (ptag->tisn == 0) { KASSERT(ptag->tisn != 0, ("ptag %p w/0 tisn", ptag));
uint32_t value;
value = atomic_fetchadd_32(&priv->tls.num_resources, 1U);
/* check resource limits */
if (value >= priv->tls.max_resources) {
error = ENOMEM;
goto failure;
}
}
en = &params->tls.tls->params; en = &params->tls.tls->params;
@ -448,17 +512,9 @@ mlx5e_tls_snd_tag_alloc(if_t ifp,
/* reset state */ /* reset state */
ptag->state = MLX5E_TLS_ST_INIT; ptag->state = MLX5E_TLS_ST_INIT;
/*
* Try to immediately init the tag. We may fail if the NIC's
* resources are tied up with send tags that are in the work
* queue, waiting to be freed. So if we fail, put ourselves
* on the queue so as to try again after resouces have been freed.
*/
error = mlx5e_tls_st_init(priv, ptag); error = mlx5e_tls_st_init(priv, ptag);
if (error != 0) { if (error != 0)
queue_work(priv->tls.wq, &ptag->work); goto failure;
flush_work(&ptag->work);
}
return (0); return (0);