From 9207f9d206a4017001f01ca27d3d25a26c268a95 Mon Sep 17 00:00:00 2001 From: Chandrakanth patil Date: Fri, 5 Jul 2024 13:23:46 +0530 Subject: [PATCH] bnxt_re: User library support for RoCE driver This patch introduces userspace library support for the bnxt_re RoCE driver. The library can be linked with RDMA applications such as perftest and rping. The RoCE traffic has been tested with the rping and perftest utility. Reviewed by: imp, kib, sumit.saxena@broadcom.com Approved by: imp Differential revision: https://reviews.freebsd.org/D45729 --- contrib/ofed/libbnxtre/abi.h | 542 +++++++ contrib/ofed/libbnxtre/db.c | 566 +++++++ contrib/ofed/libbnxtre/list.h | 122 ++ contrib/ofed/libbnxtre/main.c | 383 +++++ contrib/ofed/libbnxtre/main.h | 538 +++++++ contrib/ofed/libbnxtre/memory.c | 96 ++ contrib/ofed/libbnxtre/memory.h | 166 ++ contrib/ofed/libbnxtre/verbs.c | 2557 ++++++++++++++++++++++++++++++ contrib/ofed/libbnxtre/verbs.h | 184 +++ contrib/ofed/libbnxtre/version.h | 45 + lib/ofed/libbnxtre/Makefile | 14 + 11 files changed, 5213 insertions(+) create mode 100644 contrib/ofed/libbnxtre/abi.h create mode 100644 contrib/ofed/libbnxtre/db.c create mode 100644 contrib/ofed/libbnxtre/list.h create mode 100644 contrib/ofed/libbnxtre/main.c create mode 100644 contrib/ofed/libbnxtre/main.h create mode 100644 contrib/ofed/libbnxtre/memory.c create mode 100644 contrib/ofed/libbnxtre/memory.h create mode 100644 contrib/ofed/libbnxtre/verbs.c create mode 100644 contrib/ofed/libbnxtre/verbs.h create mode 100644 contrib/ofed/libbnxtre/version.h create mode 100755 lib/ofed/libbnxtre/Makefile diff --git a/contrib/ofed/libbnxtre/abi.h b/contrib/ofed/libbnxtre/abi.h new file mode 100644 index 000000000000..390605edb40b --- /dev/null +++ b/contrib/ofed/libbnxtre/abi.h @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BNXT_RE_ABI_H__ +#define __BNXT_RE_ABI_H__ + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define __aligned_u64 __attribute__((aligned(8))) u64 + +#define BNXT_RE_ABI_VERSION 6 +#define BNXT_RE_MAX_INLINE_SIZE 0x60 +#define BNXT_RE_MAX_INLINE_SIZE_VAR_WQE 0x1E0 +#define BNXT_RE_MAX_PUSH_SIZE_VAR_WQE 0xD0 +#define BNXT_RE_FULL_FLAG_DELTA 0x00 + +enum bnxt_re_wr_opcode { + BNXT_RE_WR_OPCD_SEND = 0x00, + BNXT_RE_WR_OPCD_SEND_IMM = 0x01, + BNXT_RE_WR_OPCD_SEND_INVAL = 0x02, + BNXT_RE_WR_OPCD_RDMA_WRITE = 0x04, + BNXT_RE_WR_OPCD_RDMA_WRITE_IMM = 0x05, + BNXT_RE_WR_OPCD_RDMA_READ = 0x06, + BNXT_RE_WR_OPCD_ATOMIC_CS = 0x08, + BNXT_RE_WR_OPCD_ATOMIC_FA = 0x0B, + BNXT_RE_WR_OPCD_LOC_INVAL = 0x0C, + BNXT_RE_WR_OPCD_BIND = 0x0E, + BNXT_RE_WR_OPCD_RECV = 0x80, + BNXT_RE_WR_OPCD_INVAL = 0xFF +}; + +enum bnxt_re_wr_flags { + BNXT_RE_WR_FLAGS_INLINE = 0x10, + BNXT_RE_WR_FLAGS_SE = 0x08, + BNXT_RE_WR_FLAGS_UC_FENCE = 0x04, + BNXT_RE_WR_FLAGS_RD_FENCE = 0x02, + BNXT_RE_WR_FLAGS_SIGNALED = 0x01 +}; + +#define BNXT_RE_MEMW_TYPE_2 0x02 +#define BNXT_RE_MEMW_TYPE_1 0x00 +enum bnxt_re_wr_bind_acc { + BNXT_RE_WR_BIND_ACC_LWR = 0x01, + BNXT_RE_WR_BIND_ACC_RRD = 0x02, + BNXT_RE_WR_BIND_ACC_RWR = 0x04, + BNXT_RE_WR_BIND_ACC_RAT = 0x08, + BNXT_RE_WR_BIND_ACC_MWB = 0x10, + BNXT_RE_WR_BIND_ACC_ZBVA = 0x01, + BNXT_RE_WR_BIND_ACC_SHIFT = 0x10 +}; + +enum bnxt_re_wc_type { + BNXT_RE_WC_TYPE_SEND = 0x00, + BNXT_RE_WC_TYPE_RECV_RC = 0x01, + BNXT_RE_WC_TYPE_RECV_UD = 0x02, + BNXT_RE_WC_TYPE_RECV_RAW = 0x03, + BNXT_RE_WC_TYPE_TERM = 0x0E, + BNXT_RE_WC_TYPE_COFF = 0x0F +}; + +#define BNXT_RE_WC_OPCD_RECV 0x80 +enum bnxt_re_req_wc_status { + BNXT_RE_REQ_ST_OK = 0x00, + BNXT_RE_REQ_ST_BAD_RESP = 0x01, + BNXT_RE_REQ_ST_LOC_LEN = 0x02, + BNXT_RE_REQ_ST_LOC_QP_OP = 0x03, + BNXT_RE_REQ_ST_PROT = 0x04, + BNXT_RE_REQ_ST_MEM_OP = 0x05, + BNXT_RE_REQ_ST_REM_INVAL = 0x06, + BNXT_RE_REQ_ST_REM_ACC = 0x07, + BNXT_RE_REQ_ST_REM_OP = 0x08, + BNXT_RE_REQ_ST_RNR_NAK_XCED = 0x09, + BNXT_RE_REQ_ST_TRNSP_XCED = 0x0A, + BNXT_RE_REQ_ST_WR_FLUSH = 0x0B +}; + +enum bnxt_re_rsp_wc_status { + BNXT_RE_RSP_ST_OK = 0x00, + BNXT_RE_RSP_ST_LOC_ACC = 0x01, + BNXT_RE_RSP_ST_LOC_LEN = 0x02, + BNXT_RE_RSP_ST_LOC_PROT = 0x03, + BNXT_RE_RSP_ST_LOC_QP_OP = 0x04, + BNXT_RE_RSP_ST_MEM_OP = 0x05, + BNXT_RE_RSP_ST_REM_INVAL = 0x06, + BNXT_RE_RSP_ST_WR_FLUSH = 0x07, + BNXT_RE_RSP_ST_HW_FLUSH = 0x08 +}; + +enum bnxt_re_hdr_offset { + BNXT_RE_HDR_WT_MASK = 0xFF, + BNXT_RE_HDR_FLAGS_MASK = 0xFF, + BNXT_RE_HDR_FLAGS_SHIFT = 0x08, + BNXT_RE_HDR_WS_MASK = 0xFF, + BNXT_RE_HDR_WS_SHIFT = 0x10 +}; + +enum bnxt_re_db_que_type { + BNXT_RE_QUE_TYPE_SQ = 0x00, + BNXT_RE_QUE_TYPE_RQ = 0x01, + BNXT_RE_QUE_TYPE_SRQ = 0x02, + BNXT_RE_QUE_TYPE_SRQ_ARM = 0x03, + BNXT_RE_QUE_TYPE_CQ = 0x04, + BNXT_RE_QUE_TYPE_CQ_ARMSE = 0x05, + BNXT_RE_QUE_TYPE_CQ_ARMALL = 0x06, + BNXT_RE_QUE_TYPE_CQ_ARMENA = 0x07, + BNXT_RE_QUE_TYPE_SRQ_ARMENA = 0x08, + BNXT_RE_QUE_TYPE_CQ_CUT_ACK = 0x09, + BNXT_RE_PUSH_TYPE_START = 0x0C, + BNXT_RE_PUSH_TYPE_END = 0x0D, + BNXT_RE_QUE_TYPE_NULL = 0x0F +}; + +enum bnxt_re_db_mask { + BNXT_RE_DB_INDX_MASK = 0xFFFFFFUL, + BNXT_RE_DB_PILO_MASK = 0x0FFUL, + BNXT_RE_DB_PILO_SHIFT = 0x18, + BNXT_RE_DB_QID_MASK = 0xFFFFFUL, + BNXT_RE_DB_PIHI_MASK = 0xF00UL, + BNXT_RE_DB_PIHI_SHIFT = 0x0C, /* Because mask is 0xF00 */ + BNXT_RE_DB_TYP_MASK = 0x0FUL, + BNXT_RE_DB_TYP_SHIFT = 0x1C, + BNXT_RE_DB_VALID_SHIFT = 0x1A, + BNXT_RE_DB_EPOCH_SHIFT = 0x18, + BNXT_RE_DB_TOGGLE_SHIFT = 0x19, + +}; + +enum bnxt_re_psns_mask { + BNXT_RE_PSNS_SPSN_MASK = 0xFFFFFF, + BNXT_RE_PSNS_OPCD_MASK = 0xFF, + BNXT_RE_PSNS_OPCD_SHIFT = 0x18, + BNXT_RE_PSNS_NPSN_MASK = 0xFFFFFF, + BNXT_RE_PSNS_FLAGS_MASK = 0xFF, + BNXT_RE_PSNS_FLAGS_SHIFT = 0x18 +}; + +enum bnxt_re_msns_mask { + BNXT_RE_SQ_MSN_SEARCH_START_PSN_MASK = 0xFFFFFFUL, + BNXT_RE_SQ_MSN_SEARCH_START_PSN_SHIFT = 0, + BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_MASK = 0xFFFFFF000000ULL, + BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_SHIFT = 0x18, + BNXT_RE_SQ_MSN_SEARCH_START_IDX_MASK = 0xFFFF000000000000ULL, + BNXT_RE_SQ_MSN_SEARCH_START_IDX_SHIFT = 0x30 +}; + +enum bnxt_re_bcqe_mask { + BNXT_RE_BCQE_PH_MASK = 0x01, + BNXT_RE_BCQE_TYPE_MASK = 0x0F, + BNXT_RE_BCQE_TYPE_SHIFT = 0x01, + BNXT_RE_BCQE_STATUS_MASK = 0xFF, + BNXT_RE_BCQE_STATUS_SHIFT = 0x08, + BNXT_RE_BCQE_FLAGS_MASK = 0xFFFFU, + BNXT_RE_BCQE_FLAGS_SHIFT = 0x10, + BNXT_RE_BCQE_RWRID_MASK = 0xFFFFFU, + BNXT_RE_BCQE_SRCQP_MASK = 0xFF, + BNXT_RE_BCQE_SRCQP_SHIFT = 0x18 +}; + +enum bnxt_re_rc_flags_mask { + BNXT_RE_RC_FLAGS_SRQ_RQ_MASK = 0x01, + BNXT_RE_RC_FLAGS_IMM_MASK = 0x02, + BNXT_RE_RC_FLAGS_IMM_SHIFT = 0x01, + BNXT_RE_RC_FLAGS_INV_MASK = 0x04, + BNXT_RE_RC_FLAGS_INV_SHIFT = 0x02, + BNXT_RE_RC_FLAGS_RDMA_MASK = 0x08, + BNXT_RE_RC_FLAGS_RDMA_SHIFT = 0x03 +}; + +enum bnxt_re_ud_flags_mask { + BNXT_RE_UD_FLAGS_SRQ_RQ_MASK = 0x01, + BNXT_RE_UD_FLAGS_SRQ_RQ_SFT = 0x00, + BNXT_RE_UD_FLAGS_IMM_MASK = 0x02, + BNXT_RE_UD_FLAGS_IMM_SFT = 0x01, + BNXT_RE_UD_FLAGS_IP_VER_MASK = 0x30, + BNXT_RE_UD_FLAGS_IP_VER_SFT = 0x4, + BNXT_RE_UD_FLAGS_META_MASK = 0x3C0, + BNXT_RE_UD_FLAGS_META_SFT = 0x6, + BNXT_RE_UD_FLAGS_EXT_META_MASK = 0xC00, + BNXT_RE_UD_FLAGS_EXT_META_SFT = 0x10, +}; + +enum bnxt_re_ud_cqe_mask { + BNXT_RE_UD_CQE_MAC_MASK = 0xFFFFFFFFFFFFULL, + BNXT_RE_UD_CQE_SRCQPLO_MASK = 0xFFFF, + BNXT_RE_UD_CQE_SRCQPLO_SHIFT = 0x30, + BNXT_RE_UD_CQE_LEN_MASK = 0x3FFFU +}; + +enum bnxt_re_shpg_offt { + BNXT_RE_SHPG_BEG_RESV_OFFT = 0x00, + BNXT_RE_SHPG_AVID_OFFT = 0x10, + BNXT_RE_SHPG_AVID_SIZE = 0x04, + BNXT_RE_SHPG_END_RESV_OFFT = 0xFF0 +}; + +enum bnxt_re_que_flags_mask { + BNXT_RE_FLAG_EPOCH_TAIL_SHIFT = 0x0UL, + BNXT_RE_FLAG_EPOCH_HEAD_SHIFT = 0x1UL, + BNXT_RE_FLAG_EPOCH_TAIL_MASK = 0x1UL, + BNXT_RE_FLAG_EPOCH_HEAD_MASK = 0x2UL, +}; + +enum bnxt_re_db_epoch_flag_shift { + BNXT_RE_DB_EPOCH_TAIL_SHIFT = BNXT_RE_DB_EPOCH_SHIFT, + BNXT_RE_DB_EPOCH_HEAD_SHIFT = (BNXT_RE_DB_EPOCH_SHIFT - 1) +}; + +enum bnxt_re_ppp_st_en_mask { + BNXT_RE_PPP_ENABLED_MASK = 0x1UL, + BNXT_RE_PPP_STATE_MASK = 0x2UL, +}; + +enum bnxt_re_ppp_st_shift { + BNXT_RE_PPP_ST_SHIFT = 0x1UL +}; + +struct bnxt_re_db_hdr { + __u64 typ_qid_indx; /* typ: 4, qid:20, indx:24 */ +}; + +#define BNXT_RE_CHIP_ID0_CHIP_NUM_SFT 0x00 +#define BNXT_RE_CHIP_ID0_CHIP_REV_SFT 0x10 +#define BNXT_RE_CHIP_ID0_CHIP_MET_SFT 0x18 + +enum { + BNXT_RE_COMP_MASK_UCNTX_WC_DPI_ENABLED = 0x01, + BNXT_RE_COMP_MASK_UCNTX_POW2_DISABLED = 0x02, + BNXT_RE_COMP_MASK_UCNTX_RSVD_WQE_DISABLED = 0x04, + BNXT_RE_COMP_MASK_UCNTX_MQP_EX_SUPPORTED = 0x8, + BNXT_RE_COMP_MASK_UCNTX_DBR_PACING_ENABLED = 0x10, + BNXT_RE_COMP_MASK_UCNTX_DBR_RECOVERY_ENABLED = 0x20, + BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED = 0x40 +}; + +enum bnxt_re_req_to_drv { + BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT = 0x01, + BNXT_RE_COMP_MASK_REQ_UCNTX_RSVD_WQE = 0x02 +}; + +#define BNXT_RE_WQE_MODES_WQE_MODE_MASK 0x01 +/* bit wise modes can be extended here. */ +enum bnxt_re_modes { + BNXT_RE_WQE_MODE_STATIC = 0x00, + BNXT_RE_WQE_MODE_VARIABLE = 0x01 + /* Other modes can be here */ +}; + +struct bnxt_re_cntx_req { + struct ibv_get_context cmd; + __aligned_u64 comp_mask; +}; + +struct bnxt_re_cntx_resp { + struct ibv_get_context_resp resp; + __u32 dev_id; + __u32 max_qp; /* To allocate qp-table */ + __u32 pg_size; + __u32 cqe_size; + __u32 max_cqd; + __u32 chip_id0; + __u32 chip_id1; + __u32 modes; + __aligned_u64 comp_mask; +} __attribute__((packed)); + +enum { + BNXT_RE_COMP_MASK_PD_HAS_WC_DPI = 0x01, + BNXT_RE_COMP_MASK_PD_HAS_DBR_BAR_ADDR = 0x02, +}; + +struct bnxt_re_pd_resp { + struct ibv_alloc_pd_resp resp; + __u32 pdid; + __u32 dpi; + __u64 dbr; + __u64 comp_mask; + __u32 wcdpi; + __u64 dbr_bar_map; +} __attribute__((packed)); + +struct bnxt_re_mr_resp { + struct ibv_reg_mr_resp resp; +} __attribute__((packed)); + +/* CQ */ +enum { + BNXT_RE_COMP_MASK_CQ_HAS_DB_INFO = 0x01, + BNXT_RE_COMP_MASK_CQ_HAS_WC_DPI = 0x02, + BNXT_RE_COMP_MASK_CQ_HAS_CQ_PAGE = 0x04 +}; + +enum { + BNXT_RE_COMP_MASK_CQ_REQ_HAS_CAP_MASK = 0x1 +}; + +enum { + BNXT_RE_COMP_MASK_CQ_REQ_CAP_DBR_RECOVERY = 0x1 +}; + +struct bnxt_re_cq_req { + struct ibv_create_cq cmd; + __u64 cq_va; + __u64 cq_handle; + __aligned_u64 comp_mask; + __u16 cq_capab; +} __attribute__((packed)); + +struct bnxt_re_cq_resp { + struct ibv_create_cq_resp resp; + __u32 cqid; + __u32 tail; + __u32 phase; + __u32 rsvd; + __aligned_u64 comp_mask; + __u32 dpi; + __u64 dbr; + __u32 wcdpi; + __u64 cq_page; +} __attribute__((packed)); + +struct bnxt_re_resize_cq_req { + struct ibv_resize_cq cmd; + __u64 cq_va; +} __attribute__((packed)); + +struct bnxt_re_bcqe { + __u32 flg_st_typ_ph; + __u32 qphi_rwrid; +} __attribute__((packed)); + +struct bnxt_re_req_cqe { + __u64 qp_handle; + __u32 con_indx; /* 16 bits valid. */ + __u32 rsvd1; + __u64 rsvd2; +} __attribute__((packed)); + +struct bnxt_re_rc_cqe { + __u32 length; + __u32 imm_key; + __u64 qp_handle; + __u64 mr_handle; +} __attribute__((packed)); + +struct bnxt_re_ud_cqe { + __u32 length; /* 14 bits */ + __u32 immd; + __u64 qp_handle; + __u64 qplo_mac; /* 16:48*/ +} __attribute__((packed)); + +struct bnxt_re_term_cqe { + __u64 qp_handle; + __u32 rq_sq_cidx; + __u32 rsvd; + __u64 rsvd1; +} __attribute__((packed)); + +struct bnxt_re_cutoff_cqe { + __u64 rsvd1; + __u64 rsvd2; + __u64 rsvd3; + __u8 cqe_type_toggle; + __u8 status; + __u16 rsvd4; + __u32 rsvd5; +} __attribute__((packed)); + +/* QP */ +struct bnxt_re_qp_req { + struct ibv_create_qp cmd; + __u64 qpsva; + __u64 qprva; + __u64 qp_handle; +} __attribute__((packed)); + +struct bnxt_re_qp_resp { + struct ibv_create_qp_resp resp; + __u32 qpid; +} __attribute__((packed)); + +enum bnxt_re_modify_ex_mask { + BNXT_RE_MQP_PPP_REQ_EN_MASK = 0x1UL, + BNXT_RE_MQP_PPP_REQ_EN = 0x1UL, + BNXT_RE_MQP_PATH_MTU_MASK = 0x2UL, + BNXT_RE_MQP_PPP_IDX_MASK = 0x7UL, + BNXT_RE_MQP_PPP_STATE = 0x10UL +}; + +/* Modify QP */ +struct bnxt_re_modify_ex_req { + struct ibv_modify_qp_ex cmd; + __aligned_u64 comp_mask; + __u32 dpi; + __u32 rsvd; +}; + +struct bnxt_re_modify_ex_resp { + struct ibv_modify_qp_resp_ex resp; + __aligned_u64 comp_mask; + __u32 ppp_st_idx; + __u32 path_mtu; +}; + +union lower_shdr { + __u64 qkey_len; + __u64 lkey_plkey; + __u64 rva; +}; + +struct bnxt_re_bsqe { + __u32 rsv_ws_fl_wt; + __u32 key_immd; + union lower_shdr lhdr; +} __attribute__((packed)); + +struct bnxt_re_psns_ext { + __u32 opc_spsn; + __u32 flg_npsn; + __u16 st_slot_idx; + __u16 rsvd0; + __u32 rsvd1; +} __attribute__((packed)); + +/* sq_msn_search (size:64b/8B) */ +struct bnxt_re_msns { + __u64 start_idx_next_psn_start_psn; +} __attribute__((packed)); + +struct bnxt_re_psns { + __u32 opc_spsn; + __u32 flg_npsn; +} __attribute__((packed)); + +struct bnxt_re_sge { + __u64 pa; + __u32 lkey; + __u32 length; +} __attribute__((packed)); + +struct bnxt_re_send { + __u32 dst_qp; + __u32 avid; + __u64 rsvd; +} __attribute__((packed)); + +struct bnxt_re_raw { + __u32 cfa_meta; + __u32 rsvd2; + __u64 rsvd3; +} __attribute__((packed)); + +struct bnxt_re_rdma { + __u64 rva; + __u32 rkey; + __u32 rsvd2; +} __attribute__((packed)); + +struct bnxt_re_atomic { + __u64 swp_dt; + __u64 cmp_dt; +} __attribute__((packed)); + +struct bnxt_re_inval { + __u64 rsvd[2]; +} __attribute__((packed)); + +struct bnxt_re_bind { + __u64 va; + __u64 len; /* only 40 bits are valid */ +} __attribute__((packed)); + +struct bnxt_re_brqe { + __u32 rsv_ws_fl_wt; + __u32 rsvd; + __u32 wrid; + __u32 rsvd1; +} __attribute__((packed)); + +struct bnxt_re_rqe { + __u64 rsvd[2]; +} __attribute__((packed)); + +/* SRQ */ +struct bnxt_re_srq_req { + struct ibv_create_srq cmd; + __u64 srqva; + __u64 srq_handle; +} __attribute__((packed)); + +struct bnxt_re_srq_resp { + struct ibv_create_srq_resp resp; + __u32 srqid; +} __attribute__((packed)); + +struct bnxt_re_srqe { + __u64 rsvd[2]; +} __attribute__((packed)); + +struct bnxt_re_push_wqe { + __u64 addr[32]; +} __attribute__((packed));; + +#endif diff --git a/contrib/ofed/libbnxtre/db.c b/contrib/ofed/libbnxtre/db.c new file mode 100644 index 000000000000..8751297c9218 --- /dev/null +++ b/contrib/ofed/libbnxtre/db.c @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Doorbell handling functions. + */ + +#include +#include + +#include "abi.h" +#include "main.h" + +#define BNXT_RE_DB_FIFO_ROOM_MASK_P5 0x1FFF8000 +#define BNXT_RE_MAX_FIFO_DEPTH_P5 0x2c00 + +#define BNXT_RE_DB_FIFO_ROOM_MASK_P7 0x3FFF8000 +#define BNXT_RE_MAX_FIFO_DEPTH_P7 0x8000 + +#define BNXT_RE_DB_FIFO_ROOM_SHIFT 15 +#define BNXT_RE_DB_THRESHOLD 20 + +#define BNXT_RE_DB_FIFO_ROOM_MASK(ctx) \ + (_is_chip_thor2((ctx)) ? \ + BNXT_RE_DB_FIFO_ROOM_MASK_P7 :\ + BNXT_RE_DB_FIFO_ROOM_MASK_P5) +#define BNXT_RE_MAX_FIFO_DEPTH(ctx) \ + (_is_chip_thor2((ctx)) ? \ + BNXT_RE_MAX_FIFO_DEPTH_P7 :\ + BNXT_RE_MAX_FIFO_DEPTH_P5) + +static uint32_t xorshift32(struct xorshift32_state *state) +{ + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ + uint32_t x = state->seed; + + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return state->seed = x; +} + +static uint16_t rnd(struct xorshift32_state *state, uint16_t range) +{ + /* range must be a power of 2 - 1 */ + return (xorshift32(state) & range); +} + +static int calculate_fifo_occupancy(struct bnxt_re_context *cntx) +{ + uint32_t *dbr_map = cntx->bar_map + 0x1a8; + uint32_t read_val, fifo_occup; + + read_val = *dbr_map; + fifo_occup = BNXT_RE_MAX_FIFO_DEPTH(cntx->cctx) - + ((read_val & BNXT_RE_DB_FIFO_ROOM_MASK(cntx->cctx)) >> + BNXT_RE_DB_FIFO_ROOM_SHIFT); + + return fifo_occup; +} + +static inline uint32_t find_min(uint32_t x, uint32_t y) +{ + return (y > x ? x : y); +} + +int bnxt_re_do_pacing(struct bnxt_re_context *cntx, struct xorshift32_state *state) +{ + /* First 4 bytes of shared page (pacing_info) contains the DBR + * pacing information. Second 4 bytes (pacing_th) contains + * the pacing threshold value to determine whether to + * add delay or not + */ + struct bnxt_re_pacing_data *pacing_data = + (struct bnxt_re_pacing_data *)cntx->dbr_page; + uint32_t wait_time = 1; + uint32_t fifo_occup; + + if (!pacing_data) + return 0; + /* If the device in error recovery state, return error to + * not to ring new doorbells in this state. + */ + if (pacing_data->dev_err_state) + return -EFAULT; + + if (rnd(state, BNXT_RE_MAX_DO_PACING) < pacing_data->do_pacing) { + while ((fifo_occup = calculate_fifo_occupancy(cntx)) + > pacing_data->pacing_th) { + struct bnxt_re_cq *cq; + uint32_t usec_wait; + + if (pacing_data->alarm_th && fifo_occup > pacing_data->alarm_th) { + cq = container_of(cntx->dbr_cq, struct bnxt_re_cq, ibvcq); + bnxt_re_poll_kernel_cq(cq); + } + usec_wait = rnd(state, wait_time - 1); + if (usec_wait) + bnxt_re_sub_sec_busy_wait(usec_wait * 1000); + /* wait time capped at 128 us */ + wait_time = find_min(wait_time * 2, 128); + } + } + return 0; +} + +static inline void bnxt_re_ring_db(struct bnxt_re_dpi *dpi, __u64 key, + uint64_t *db_key, uint8_t *lock) +{ + while (1) { + if (__sync_bool_compare_and_swap(lock, 0, 1)) { + *db_key = key; + bnxt_re_wm_barrier(); + iowrite64(dpi->dbpage, key); + bnxt_re_wm_barrier(); + *lock = 0; + break; + } + } +} + +static inline void bnxt_re_init_push_hdr(struct bnxt_re_db_hdr *hdr, + uint32_t indx, uint32_t qid, + uint32_t typ, uint32_t pidx) +{ + __u64 key_lo, key_hi; + + key_lo = (((pidx & BNXT_RE_DB_PILO_MASK) << BNXT_RE_DB_PILO_SHIFT) | + (indx & BNXT_RE_DB_INDX_MASK)); + key_hi = ((((pidx & BNXT_RE_DB_PIHI_MASK) << BNXT_RE_DB_PIHI_SHIFT) | + (qid & BNXT_RE_DB_QID_MASK)) | + ((typ & BNXT_RE_DB_TYP_MASK) << BNXT_RE_DB_TYP_SHIFT) | + (0x1UL << BNXT_RE_DB_VALID_SHIFT)); + hdr->typ_qid_indx = htole64((key_lo | (key_hi << 32))); +} + +static inline void bnxt_re_init_db_hdr(struct bnxt_re_db_hdr *hdr, + uint32_t indx, uint32_t toggle, + uint32_t qid, uint32_t typ) +{ + __u64 key_lo, key_hi; + + key_lo = htole32(indx | toggle); + key_hi = ((qid & BNXT_RE_DB_QID_MASK) | + ((typ & BNXT_RE_DB_TYP_MASK) << BNXT_RE_DB_TYP_SHIFT) | + (0x1UL << BNXT_RE_DB_VALID_SHIFT)); + hdr->typ_qid_indx = htole64((key_lo | (key_hi << 32))); +} + +static inline void __bnxt_re_ring_pend_db(__u64 *ucdb, __u64 key, + struct bnxt_re_qp *qp) +{ + struct bnxt_re_db_hdr hdr; + + bnxt_re_init_db_hdr(&hdr, + (*qp->jsqq->hwque->dbtail | + ((qp->jsqq->hwque->flags & + BNXT_RE_FLAG_EPOCH_TAIL_MASK) << + BNXT_RE_DB_EPOCH_TAIL_SHIFT)), 0, + qp->qpid, + BNXT_RE_QUE_TYPE_SQ); + + while (1) { + if (__sync_bool_compare_and_swap(&qp->sq_dbr_lock, 0, 1)) { + qp->sq_shadow_db_key = hdr.typ_qid_indx; + bnxt_re_wm_barrier(); + iowrite64(ucdb, key); + bnxt_re_wm_barrier(); + qp->sq_dbr_lock = 0; + break; + } + } +} + +void bnxt_re_ring_rq_db(struct bnxt_re_qp *qp) +{ + struct bnxt_re_db_hdr hdr; + + if (bnxt_re_do_pacing(qp->cntx, &qp->rand)) + return; + bnxt_re_init_db_hdr(&hdr, + (*qp->jrqq->hwque->dbtail | + ((qp->jrqq->hwque->flags & + BNXT_RE_FLAG_EPOCH_TAIL_MASK) << + BNXT_RE_DB_EPOCH_TAIL_SHIFT)), 0, + qp->qpid, + BNXT_RE_QUE_TYPE_RQ); + bnxt_re_ring_db(qp->udpi, hdr.typ_qid_indx, &qp->rq_shadow_db_key, + &qp->rq_dbr_lock); +} + +void bnxt_re_ring_sq_db(struct bnxt_re_qp *qp) +{ + struct bnxt_re_db_hdr hdr; + + if (bnxt_re_do_pacing(qp->cntx, &qp->rand)) + return; + bnxt_re_init_db_hdr(&hdr, + (*qp->jsqq->hwque->dbtail | + ((qp->jsqq->hwque->flags & + BNXT_RE_FLAG_EPOCH_TAIL_MASK) << + BNXT_RE_DB_EPOCH_TAIL_SHIFT)), 0, + qp->qpid, + BNXT_RE_QUE_TYPE_SQ); + bnxt_re_ring_db(qp->udpi, hdr.typ_qid_indx, &qp->sq_shadow_db_key, + &qp->sq_dbr_lock); +} + +void bnxt_re_ring_srq_db(struct bnxt_re_srq *srq) +{ + struct bnxt_re_db_hdr hdr; + + if (bnxt_re_do_pacing(srq->uctx, &srq->rand)) + return; + bnxt_re_init_db_hdr(&hdr, + (srq->srqq->tail | + ((srq->srqq->flags & + BNXT_RE_FLAG_EPOCH_TAIL_MASK) << + BNXT_RE_DB_EPOCH_TAIL_SHIFT)), 0, + srq->srqid, BNXT_RE_QUE_TYPE_SRQ); + bnxt_re_ring_db(srq->udpi, hdr.typ_qid_indx, &srq->shadow_db_key, + &srq->dbr_lock); +} + +void bnxt_re_ring_srq_arm(struct bnxt_re_srq *srq) +{ + struct bnxt_re_db_hdr hdr; + + if (bnxt_re_do_pacing(srq->uctx, &srq->rand)) + return; + bnxt_re_init_db_hdr(&hdr, srq->cap.srq_limit, 0, srq->srqid, + BNXT_RE_QUE_TYPE_SRQ_ARM); + bnxt_re_ring_db(srq->udpi, hdr.typ_qid_indx, &srq->shadow_db_key, + &srq->dbr_lock); +} + +void bnxt_re_ring_cq_db(struct bnxt_re_cq *cq) +{ + struct bnxt_re_db_hdr hdr; + + if (bnxt_re_do_pacing(cq->cntx, &cq->rand)) + return; + bnxt_re_init_db_hdr(&hdr, + (cq->cqq->head | + ((cq->cqq->flags & + BNXT_RE_FLAG_EPOCH_HEAD_MASK) << + BNXT_RE_DB_EPOCH_HEAD_SHIFT)), 0, + cq->cqid, + BNXT_RE_QUE_TYPE_CQ); + bnxt_re_ring_db(cq->udpi, hdr.typ_qid_indx, &cq->shadow_db_key, + &cq->dbr_lock); +} + +void bnxt_re_ring_cq_arm_db(struct bnxt_re_cq *cq, uint8_t aflag) +{ + uint32_t *cq_page = cq->cq_page; + struct bnxt_re_db_hdr hdr; + uint32_t toggle = 0; + + if (cq_page) + toggle = *cq_page; + + if (bnxt_re_do_pacing(cq->cntx, &cq->rand)) + return; + bnxt_re_init_db_hdr(&hdr, + (cq->cqq->head | + ((cq->cqq->flags & + BNXT_RE_FLAG_EPOCH_HEAD_MASK) << + BNXT_RE_DB_EPOCH_HEAD_SHIFT)), + toggle << BNXT_RE_DB_TOGGLE_SHIFT, + cq->cqid, aflag); + bnxt_re_ring_db(cq->udpi, hdr.typ_qid_indx, &cq->shadow_db_key, + &cq->dbr_lock); +} + +void bnxt_re_ring_pstart_db(struct bnxt_re_qp *qp, + struct bnxt_re_push_buffer *pbuf) +{ + __u64 key; + + if (bnxt_re_do_pacing(qp->cntx, &qp->rand)) + return; + key = ((((pbuf->wcdpi & BNXT_RE_DB_PIHI_MASK) << + BNXT_RE_DB_PIHI_SHIFT) | (pbuf->qpid & BNXT_RE_DB_QID_MASK)) | + ((BNXT_RE_PUSH_TYPE_START & BNXT_RE_DB_TYP_MASK) << + BNXT_RE_DB_TYP_SHIFT) | (0x1UL << BNXT_RE_DB_VALID_SHIFT)); + key <<= 32; + key |= ((((__u32)pbuf->wcdpi & BNXT_RE_DB_PILO_MASK) << + BNXT_RE_DB_PILO_SHIFT) | (pbuf->st_idx & + BNXT_RE_DB_INDX_MASK)); + bnxt_re_wm_barrier(); + iowrite64(pbuf->ucdb, key); +} + +void bnxt_re_ring_pend_db(struct bnxt_re_qp *qp, + struct bnxt_re_push_buffer *pbuf) +{ + __u64 key; + + if (bnxt_re_do_pacing(qp->cntx, &qp->rand)) + return; + key = ((((pbuf->wcdpi & BNXT_RE_DB_PIHI_MASK) << + BNXT_RE_DB_PIHI_SHIFT) | (pbuf->qpid & BNXT_RE_DB_QID_MASK)) | + ((BNXT_RE_PUSH_TYPE_END & BNXT_RE_DB_TYP_MASK) << + BNXT_RE_DB_TYP_SHIFT) | (0x1UL << BNXT_RE_DB_VALID_SHIFT)); + key <<= 32; + key |= ((((__u32)pbuf->wcdpi & BNXT_RE_DB_PILO_MASK) << + BNXT_RE_DB_PILO_SHIFT) | (pbuf->tail & + BNXT_RE_DB_INDX_MASK)); + __bnxt_re_ring_pend_db(pbuf->ucdb, key, qp); +} + +void bnxt_re_fill_ppp(struct bnxt_re_push_buffer *pbuf, + struct bnxt_re_qp *qp, uint8_t len, uint32_t idx) +{ + struct bnxt_re_db_ppp_hdr phdr = {}; + __u64 *dst, *src; + __u8 plen; + int indx; + + src = (__u64 *)&phdr; + plen = len + sizeof(phdr) + bnxt_re_get_sqe_hdr_sz(); + + bnxt_re_init_db_hdr(&phdr.db_hdr, + (*qp->jsqq->hwque->dbtail | + ((qp->jsqq->hwque->flags & + BNXT_RE_FLAG_EPOCH_TAIL_MASK) << + BNXT_RE_DB_EPOCH_TAIL_SHIFT)), 0, + qp->qpid, + BNXT_RE_QUE_TYPE_SQ); + + phdr.rsv_psz_pidx = ((pbuf->st_idx & BNXT_RE_DB_INDX_MASK) | + (((plen % 8 ? (plen / 8) + 1 : + plen / 8) & BNXT_RE_PUSH_SIZE_MASK) << + BNXT_RE_PUSH_SIZE_SHIFT)); + + bnxt_re_wm_barrier(); + for (indx = 0; indx < 2; indx++) { + dst = (__u64 *)(pbuf->pbuf + indx); + iowrite64(dst, *src); + src++; + } + bnxt_re_copy_data_to_pb(pbuf, 1, idx); + mmio_flush_writes(); +} + +void bnxt_re_fill_push_wcb(struct bnxt_re_qp *qp, + struct bnxt_re_push_buffer *pbuf, uint32_t idx) +{ + bnxt_re_ring_pstart_db(qp, pbuf); + mmio_wc_start(); + bnxt_re_copy_data_to_pb(pbuf, 0, idx); + /* Flush WQE write before push end db. */ + mmio_flush_writes(); + bnxt_re_ring_pend_db(qp, pbuf); +} + +int bnxt_re_init_pbuf_list(struct bnxt_re_context *ucntx) +{ + struct bnxt_re_push_buffer *pbuf; + int indx, wqesz; + int size, offt; + __u64 wcpage; + __u64 dbpage; + void *base; + + size = (sizeof(*ucntx->pbrec) + + 16 * (sizeof(*ucntx->pbrec->pbuf) + + sizeof(struct bnxt_re_push_wqe))); + ucntx->pbrec = calloc(1, size); + if (!ucntx->pbrec) + goto out; + + offt = sizeof(*ucntx->pbrec); + base = ucntx->pbrec; + ucntx->pbrec->pbuf = (base + offt); + ucntx->pbrec->pbmap = ~0x00; + ucntx->pbrec->pbmap &= ~0x7fff; /* 15 bits */ + ucntx->pbrec->udpi = &ucntx->udpi; + + wqesz = sizeof(struct bnxt_re_push_wqe); + wcpage = (__u64)ucntx->udpi.wcdbpg; + dbpage = (__u64)ucntx->udpi.dbpage; + offt = sizeof(*ucntx->pbrec->pbuf) * 16; + base = (char *)ucntx->pbrec->pbuf + offt; + for (indx = 0; indx < 16; indx++) { + pbuf = &ucntx->pbrec->pbuf[indx]; + pbuf->wqe = base + indx * wqesz; + pbuf->pbuf = (__u64 *)(wcpage + indx * wqesz); + pbuf->ucdb = (__u64 *)(dbpage + (indx + 1) * sizeof(__u64)); + pbuf->wcdpi = ucntx->udpi.wcdpi; + } + + return 0; +out: + return -ENOMEM; +} + +struct bnxt_re_push_buffer *bnxt_re_get_pbuf(uint8_t *push_st_en, + uint8_t ppp_idx, + struct bnxt_re_context *cntx) +{ + struct bnxt_re_push_buffer *pbuf = NULL; + uint8_t buf_state = 0; + __u32 old; + int bit; + + if (_is_chip_thor2(cntx->cctx)) { + buf_state = !!(*push_st_en & BNXT_RE_PPP_STATE_MASK); + pbuf = &cntx->pbrec->pbuf[(ppp_idx * 2) + buf_state]; + /* Flip */ + *push_st_en ^= 1UL << BNXT_RE_PPP_ST_SHIFT; + } else { + old = cntx->pbrec->pbmap; + while ((bit = __builtin_ffs(~cntx->pbrec->pbmap)) != 0) { + if (__sync_bool_compare_and_swap + (&cntx->pbrec->pbmap, + old, + (old | 0x01 << (bit - 1)))) + break; + old = cntx->pbrec->pbmap; + } + + if (bit) { + pbuf = &cntx->pbrec->pbuf[bit]; + pbuf->nbit = bit; + } + } + + return pbuf; +} + +void bnxt_re_put_pbuf(struct bnxt_re_context *cntx, + struct bnxt_re_push_buffer *pbuf) +{ + struct bnxt_re_push_rec *pbrec; + __u32 old; + int bit; + + if (_is_chip_thor2(cntx->cctx)) + return; + + pbrec = cntx->pbrec; + + if (pbuf->nbit) { + bit = pbuf->nbit; + pbuf->nbit = 0; + old = pbrec->pbmap; + while (!__sync_bool_compare_and_swap(&pbrec->pbmap, old, + (old & (~(0x01 << + (bit - 1)))))) + old = pbrec->pbmap; + } +} + +void bnxt_re_destroy_pbuf_list(struct bnxt_re_context *cntx) +{ + free(cntx->pbrec); +} + +void bnxt_re_replay_db(struct bnxt_re_context *cntx, + struct xorshift32_state *state, struct bnxt_re_dpi *dpi, + uint64_t *shadow_key, uint8_t *dbr_lock) +{ + if (bnxt_re_do_pacing(cntx, state)) + return; + cntx->replay_cnt++; + if (cntx->replay_cnt % BNXT_RE_DB_REPLAY_YIELD_CNT == 0) + pthread_yield(); + if (__sync_bool_compare_and_swap(dbr_lock, 0, 1)) { + bnxt_re_wm_barrier(); + if (*shadow_key == BNXT_RE_DB_KEY_INVALID) { + *dbr_lock = 0; + return; + } + iowrite64(dpi->dbpage, *shadow_key); + bnxt_re_wm_barrier(); + *dbr_lock = 0; + } +} + +void bnxt_re_db_recovery(struct bnxt_re_context *cntx) +{ + struct bnxt_re_list_node *cur, *tmp; + struct bnxt_re_qp *qp; + struct bnxt_re_cq *cq; + struct bnxt_re_srq *srq; + + pthread_spin_lock(&cntx->qp_dbr_res.lock); + list_for_each_node_safe(cur, tmp, &cntx->qp_dbr_res.head) { + qp = list_node(cur, struct bnxt_re_qp, dbnode); + bnxt_re_replay_db(cntx, &qp->rand, qp->udpi, + &qp->sq_shadow_db_key, &qp->sq_dbr_lock); + bnxt_re_replay_db(cntx, &qp->rand, qp->udpi, + &qp->rq_shadow_db_key, &qp->rq_dbr_lock); + } + pthread_spin_unlock(&cntx->qp_dbr_res.lock); + pthread_spin_lock(&cntx->cq_dbr_res.lock); + list_for_each_node_safe(cur, tmp, &cntx->cq_dbr_res.head) { + cq = list_node(cur, struct bnxt_re_cq, dbnode); + bnxt_re_replay_db(cntx, &cq->rand, cq->udpi, + &cq->shadow_db_key, &cq->dbr_lock); + } + pthread_spin_unlock(&cntx->cq_dbr_res.lock); + pthread_spin_lock(&cntx->srq_dbr_res.lock); + list_for_each_node_safe(cur, tmp, &cntx->srq_dbr_res.head) { + srq = list_node(cur, struct bnxt_re_srq, dbnode); + bnxt_re_replay_db(cntx, &srq->rand, srq->udpi, + &srq->shadow_db_key, &srq->dbr_lock); + } + pthread_spin_unlock(&cntx->srq_dbr_res.lock); +} + +void *bnxt_re_dbr_thread(void *arg) +{ + uint32_t *epoch, *epoch_ack, usr_epoch; + struct bnxt_re_context *cntx = arg; + struct ibv_cq *ev_cq; + void *ev_ctx; + int ret; + + while (1) { + ret = ibv_get_cq_event(cntx->dbr_ev_chan, &ev_cq, &ev_ctx); + if (ret) { + fprintf(stderr, "Failed to get cq_event\n"); + pthread_exit(NULL); + } + epoch = cntx->db_recovery_page; + epoch_ack = epoch + 1; + if (!epoch || !epoch_ack) { + fprintf(stderr, "DB reovery page is NULL\n"); + pthread_exit(NULL); + } + if (*epoch == *epoch_ack) { + ibv_ack_cq_events(ev_cq, 1); + continue; + } + usr_epoch = *epoch; + bnxt_re_db_recovery(cntx); + *epoch_ack = usr_epoch; + ibv_ack_cq_events(ev_cq, 1); + } +} diff --git a/contrib/ofed/libbnxtre/list.h b/contrib/ofed/libbnxtre/list.h new file mode 100644 index 000000000000..cf9206546a48 --- /dev/null +++ b/contrib/ofed/libbnxtre/list.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __BNXT_RE_LIST_H__ +#define __BNXT_RE_LIST_H__ + +struct bnxt_re_list_node { + struct bnxt_re_list_node *next, *prev; + uint8_t valid; +}; + +struct bnxt_re_list_head { + struct bnxt_re_list_node node; + pthread_mutex_t lock; +}; + +#define DBLY_LIST_HEAD_INIT(name) { { true, &(name.node), &(name.node) } , \ + PTHREAD_MUTEX_INITIALIZER } + +#define DBLY_LIST_HEAD(name) \ + struct bnxt_re_list_head name = DBLY_LIST_HEAD_INIT(name); \ + +#define INIT_DBLY_LIST_NODE(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); (ptr)->valid = false; \ +} while (0) + +#define INIT_DBLY_LIST_HEAD(ptr) INIT_DBLY_LIST_NODE(ptr.node) + +static inline void __list_add_node(struct bnxt_re_list_node *new, + struct bnxt_re_list_node *prev, + struct bnxt_re_list_node *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +static inline void list_add_node_tail(struct bnxt_re_list_node *new, + struct bnxt_re_list_head *head) +{ + __list_add_node(new, head->node.prev, &head->node); + new->valid = true; +} + +static inline void __list_del_node(struct bnxt_re_list_node *prev, + struct bnxt_re_list_node *next) +{ + next->prev = prev; + prev->next = next; +} + +static inline void list_del_node(struct bnxt_re_list_node *entry) +{ + __list_del_node(entry->prev, entry->next); + entry->next = entry->prev = 0; + entry->valid = false; +} + +#define bnxt_re_list_empty(head) \ + (((head)->node.next == &(head)->node) && \ + ((head)->node.prev == &(head)->node)) + +#define list_lock(head) pthread_mutex_lock(&((head)->lock)) +#define list_unlock(head) pthread_mutex_unlock(&((head)->lock)) + +#define list_node(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +#define list_node_valid(node) (node)->valid + +/** + * list_for_each_node_safe - iterate over a list safe against removal of list entry + * @pos: the &struct bnxt_re_list_head to use as a loop counter. + * @n: another &struct bnxt_re_list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_node_safe(pos, n, head) \ + for (pos = (head)->node.next, n = pos->next; pos != &((head)->node); \ + pos = n, n = pos->next) + +static inline void bnxt_re_list_add_node(struct bnxt_re_list_node *node, + struct bnxt_re_list_head *head) +{ + if (!list_node_valid(node)) + list_add_node_tail(node, head); +} + +static inline void bnxt_re_list_del_node(struct bnxt_re_list_node *node, + struct bnxt_re_list_head *head) +{ + if (list_node_valid(node)) + list_del_node(node); +} + +#endif /* __bnxt_re_LIST_H__ */ diff --git a/contrib/ofed/libbnxtre/main.c b/contrib/ofed/libbnxtre/main.c new file mode 100644 index 000000000000..8e1903688a53 --- /dev/null +++ b/contrib/ofed/libbnxtre/main.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "abi.h" +#include "main.h" +#include "verbs.h" + +#define PCI_VENDOR_ID_BROADCOM 0x14E4 + +BNXT_RE_DEFINE_CNA_TABLE(cna_table) = { + CNA(BROADCOM, 0x1605), /* BCM57454 Stratus NPAR */ + CNA(BROADCOM, 0x1606), /* BCM57454 Stratus VF */ + CNA(BROADCOM, 0x1614), /* BCM57454 Stratus */ + CNA(BROADCOM, 0x16C0), /* BCM57417 NPAR */ + CNA(BROADCOM, 0x16C1), /* BMC57414 VF */ + CNA(BROADCOM, 0x16CE), /* BMC57311 */ + CNA(BROADCOM, 0x16CF), /* BMC57312 */ + CNA(BROADCOM, 0x16D6), /* BMC57412*/ + CNA(BROADCOM, 0x16D7), /* BMC57414 */ + CNA(BROADCOM, 0x16D8), /* BMC57416 Cu */ + CNA(BROADCOM, 0x16D9), /* BMC57417 Cu */ + CNA(BROADCOM, 0x16DF), /* BMC57314 */ + CNA(BROADCOM, 0x16E2), /* BMC57417 */ + CNA(BROADCOM, 0x16E3), /* BMC57416 */ + CNA(BROADCOM, 0x16E5), /* BMC57314 VF */ + CNA(BROADCOM, 0x16EB), /* BCM57412 NPAR */ + CNA(BROADCOM, 0x16ED), /* BCM57414 NPAR */ + CNA(BROADCOM, 0x16EF), /* BCM57416 NPAR */ + CNA(BROADCOM, 0x16F0), /* BCM58730 */ + CNA(BROADCOM, 0x16F1), /* BCM57452 Stratus Mezz */ + CNA(BROADCOM, 0x1750), /* Chip num 57500 */ + CNA(BROADCOM, 0x1751), /* BCM57504 Gen P5 */ + CNA(BROADCOM, 0x1752), /* BCM57502 Gen P5 */ + CNA(BROADCOM, 0x1760), /* BCM57608 Thor 2*/ + CNA(BROADCOM, 0xD82E), /* BCM5760x TH2 VF */ + CNA(BROADCOM, 0x1803), /* BCM57508 Gen P5 NPAR */ + CNA(BROADCOM, 0x1804), /* BCM57504 Gen P5 NPAR */ + CNA(BROADCOM, 0x1805), /* BCM57502 Gen P5 NPAR */ + CNA(BROADCOM, 0x1807), /* BCM5750x Gen P5 VF */ + CNA(BROADCOM, 0x1809), /* BCM5750x Gen P5 VF HV */ + CNA(BROADCOM, 0xD800), /* BCM880xx SR VF */ + CNA(BROADCOM, 0xD802), /* BCM58802 SR */ + CNA(BROADCOM, 0xD804), /* BCM58804 SR */ + CNA(BROADCOM, 0xD818) /* BCM58818 Gen P5 SR2 */ +}; + +static struct ibv_context_ops bnxt_re_cntx_ops = { + .query_device = bnxt_re_query_device, + .query_port = bnxt_re_query_port, + .alloc_pd = bnxt_re_alloc_pd, + .dealloc_pd = bnxt_re_free_pd, + .reg_mr = bnxt_re_reg_mr, + .dereg_mr = bnxt_re_dereg_mr, + .create_cq = bnxt_re_create_cq, + .poll_cq = bnxt_re_poll_cq, + .req_notify_cq = bnxt_re_arm_cq, + .cq_event = bnxt_re_cq_event, + .resize_cq = bnxt_re_resize_cq, + .destroy_cq = bnxt_re_destroy_cq, + .create_srq = bnxt_re_create_srq, + .modify_srq = bnxt_re_modify_srq, + .query_srq = bnxt_re_query_srq, + .destroy_srq = bnxt_re_destroy_srq, + .post_srq_recv = bnxt_re_post_srq_recv, + .create_qp = bnxt_re_create_qp, + .query_qp = bnxt_re_query_qp, + .modify_qp = bnxt_re_modify_qp, + .destroy_qp = bnxt_re_destroy_qp, + .post_send = bnxt_re_post_send, + .post_recv = bnxt_re_post_recv, + .async_event = bnxt_re_async_event, + .create_ah = bnxt_re_create_ah, + .destroy_ah = bnxt_re_destroy_ah +}; + +bool _is_chip_gen_p5(struct bnxt_re_chip_ctx *cctx) +{ + return (cctx->chip_num == CHIP_NUM_57508 || + cctx->chip_num == CHIP_NUM_57504 || + cctx->chip_num == CHIP_NUM_57502); +} + +bool _is_chip_a0(struct bnxt_re_chip_ctx *cctx) +{ + return !cctx->chip_rev; +} + +bool _is_chip_thor2(struct bnxt_re_chip_ctx *cctx) +{ + return (cctx->chip_num == CHIP_NUM_58818 || + cctx->chip_num == CHIP_NUM_57608); +} + +bool _is_chip_gen_p5_thor2(struct bnxt_re_chip_ctx *cctx) +{ + return(_is_chip_gen_p5(cctx) || _is_chip_thor2(cctx)); +} + +bool _is_db_drop_recovery_enable(struct bnxt_re_context *cntx) +{ + return cntx->comp_mask & BNXT_RE_COMP_MASK_UCNTX_DBR_RECOVERY_ENABLED; +} + +/* Determine the env variable */ +static int single_threaded_app(void) +{ + char *env; + + env = getenv("BNXT_SINGLE_THREADED"); + if (env) + return strcmp(env, "1") ? 0 : 1; + + return 0; +} + +static int enable_dynamic_debug(void) +{ + char *env; + + env = getenv("BNXT_DYN_DBG"); + if (env) + return strcmp(env, "1") ? 0 : 1; + + return 0; +} + +/* Static Context Init functions */ +static int _bnxt_re_init_context(struct bnxt_re_dev *dev, + struct bnxt_re_context *cntx, + struct bnxt_re_cntx_resp *resp, int cmd_fd) +{ + bnxt_single_threaded = 0; + cntx->cctx = malloc(sizeof(struct bnxt_re_chip_ctx)); + if (!cntx->cctx) + goto failed; + + if (BNXT_RE_ABI_VERSION >= 4) { + cntx->cctx->chip_num = resp->chip_id0 & 0xFFFF; + cntx->cctx->chip_rev = (resp->chip_id0 >> + BNXT_RE_CHIP_ID0_CHIP_REV_SFT) & 0xFF; + cntx->cctx->chip_metal = (resp->chip_id0 >> + BNXT_RE_CHIP_ID0_CHIP_MET_SFT) & + 0xFF; + cntx->cctx->chip_is_gen_p5_thor2 = _is_chip_gen_p5_thor2(cntx->cctx); + } + if (BNXT_RE_ABI_VERSION != 4) { + cntx->dev_id = resp->dev_id; + cntx->max_qp = resp->max_qp; + } + + if (BNXT_RE_ABI_VERSION > 5) + cntx->modes = resp->modes; + cntx->comp_mask = resp->comp_mask; + dev->pg_size = resp->pg_size; + dev->cqe_size = resp->cqe_size; + dev->max_cq_depth = resp->max_cqd; + + /* mmap shared page. */ + cntx->shpg = mmap(NULL, dev->pg_size, PROT_READ | PROT_WRITE, + MAP_SHARED, cmd_fd, 0); + if (cntx->shpg == MAP_FAILED) { + cntx->shpg = NULL; + goto free; + } + + if (cntx->comp_mask & BNXT_RE_COMP_MASK_UCNTX_DBR_PACING_ENABLED) { + cntx->dbr_page = mmap(NULL, dev->pg_size, PROT_READ, + MAP_SHARED, cmd_fd, BNXT_RE_DBR_PAGE); + if (cntx->dbr_page == MAP_FAILED) { + munmap(cntx->shpg, dev->pg_size); + cntx->shpg = NULL; + cntx->dbr_page = NULL; + goto free; + } + } + + /* check for ENV for single thread */ + bnxt_single_threaded = single_threaded_app(); + if (bnxt_single_threaded) + fprintf(stderr, DEV " Running in Single threaded mode\n"); + bnxt_dyn_debug = enable_dynamic_debug(); + pthread_mutex_init(&cntx->shlock, NULL); + + return 0; + +free: + free(cntx->cctx); +failed: + fprintf(stderr, DEV "Failed to initialize context for device\n"); + return errno; +} + +static void _bnxt_re_uninit_context(struct bnxt_re_dev *dev, + struct bnxt_re_context *cntx) +{ + int ret; + + if (cntx->comp_mask & BNXT_RE_COMP_MASK_UCNTX_DBR_PACING_ENABLED) + munmap(cntx->dbr_page, dev->pg_size); + /* Unmap if anything device specific was + * mapped in init_context. + */ + pthread_mutex_destroy(&cntx->shlock); + if (cntx->shpg) + munmap(cntx->shpg, dev->pg_size); + + /* Un-map DPI only for the first PD that was + * allocated in this context. + */ + if (cntx->udpi.wcdbpg && cntx->udpi.wcdbpg != MAP_FAILED) { + munmap(cntx->udpi.wcdbpg, dev->pg_size); + cntx->udpi.wcdbpg = NULL; + bnxt_re_destroy_pbuf_list(cntx); + } + + if (cntx->udpi.dbpage && cntx->udpi.dbpage != MAP_FAILED) { + munmap(cntx->udpi.dbpage, dev->pg_size); + cntx->udpi.dbpage = NULL; + } + if (_is_db_drop_recovery_enable(cntx)) { + if (cntx->dbr_cq) { + ret = pthread_cancel(cntx->dbr_thread); + if (ret) + fprintf(stderr, DEV "pthread_cancel error %d\n", ret); + + if (cntx->db_recovery_page) + munmap(cntx->db_recovery_page, dev->pg_size); + ret = ibv_destroy_cq(cntx->dbr_cq); + if (ret) + fprintf(stderr, DEV "ibv_destroy_cq error %d\n", ret); + } + + if (cntx->dbr_ev_chan) { + ret = ibv_destroy_comp_channel(cntx->dbr_ev_chan); + if (ret) + fprintf(stderr, + DEV "ibv_destroy_comp_channel error\n"); + } + pthread_spin_destroy(&cntx->qp_dbr_res.lock); + pthread_spin_destroy(&cntx->cq_dbr_res.lock); + pthread_spin_destroy(&cntx->srq_dbr_res.lock); + } + free(cntx->cctx); +} + +/* Context Init functions */ +int bnxt_re_init_context(struct verbs_device *vdev, struct ibv_context *ibvctx, + int cmd_fd) +{ + struct bnxt_re_cntx_resp resp = {}; + struct bnxt_re_cntx_req req = {}; + struct bnxt_re_context *cntx; + struct bnxt_re_dev *rdev; + int ret = 0; + + rdev = to_bnxt_re_dev(&vdev->device); + cntx = to_bnxt_re_context(ibvctx); + ibvctx->cmd_fd = cmd_fd; + + req.comp_mask |= BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT; + req.comp_mask |= BNXT_RE_COMP_MASK_REQ_UCNTX_RSVD_WQE; + ret = ibv_cmd_get_context(ibvctx, &req.cmd, sizeof(req), + &resp.resp, sizeof(resp)); + + if (ret) { + fprintf(stderr, DEV "Failed to get context for device, ret = 0x%x, errno %d\n", ret, errno); + return errno; + } + + ret = _bnxt_re_init_context(rdev, cntx, &resp, cmd_fd); + if (!ret) + ibvctx->ops = bnxt_re_cntx_ops; + + cntx->rdev = rdev; + ret = bnxt_re_query_device_compat(&cntx->ibvctx, &rdev->devattr); + + return ret; +} + +void bnxt_re_uninit_context(struct verbs_device *vdev, + struct ibv_context *ibvctx) +{ + struct bnxt_re_context *cntx; + struct bnxt_re_dev *rdev; + + cntx = to_bnxt_re_context(ibvctx); + rdev = cntx->rdev; + _bnxt_re_uninit_context(rdev, cntx); +} + +static struct verbs_device_ops bnxt_re_dev_ops = { + .init_context = bnxt_re_init_context, + .uninit_context = bnxt_re_uninit_context, +}; + +static struct verbs_device *bnxt_re_driver_init(const char *uverbs_sys_path, + int abi_version) +{ + char value[10]; + struct bnxt_re_dev *dev; + unsigned vendor, device; + int i; + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", + value, sizeof(value)) < 0) + return NULL; + vendor = strtol(value, NULL, 16); + + if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", + value, sizeof(value)) < 0) + return NULL; + device = strtol(value, NULL, 16); + + for (i = 0; i < sizeof(cna_table) / sizeof(cna_table[0]); ++i) + if (vendor == cna_table[i].vendor && + device == cna_table[i].device) + goto found; + return NULL; +found: + if (abi_version != BNXT_RE_ABI_VERSION) { + fprintf(stderr, DEV "FATAL: Max supported ABI of %s is %d " + "check for the latest version of kernel driver and" + "user library\n", uverbs_sys_path, abi_version); + return NULL; + } + + dev = calloc(1, sizeof(*dev)); + if (!dev) { + fprintf(stderr, DEV "Failed to allocate device for %s\n", + uverbs_sys_path); + return NULL; + } + + dev->vdev.sz = sizeof(*dev); + dev->vdev.size_of_context = + sizeof(struct bnxt_re_context) - sizeof(struct ibv_context); + dev->vdev.ops = &bnxt_re_dev_ops; + + return &dev->vdev; +} + +static __attribute__((constructor)) void bnxt_re_register_driver(void) +{ + verbs_register_driver("bnxtre", bnxt_re_driver_init); +} diff --git a/contrib/ofed/libbnxtre/main.h b/contrib/ofed/libbnxtre/main.h new file mode 100644 index 000000000000..ea258de22abd --- /dev/null +++ b/contrib/ofed/libbnxtre/main.h @@ -0,0 +1,538 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __BNXT_RE_MAIN_H__ +#define __BNXT_RE_MAIN_H__ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "abi.h" +#include "list.h" +#include "memory.h" + +#define DEV "bnxt_re : " +#define BNXT_RE_UD_QP_STALL 0x400000 + +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 +#define CHIP_NUM_58818 0xd818 +#define CHIP_NUM_57608 0x1760 + +#define BNXT_NSEC_PER_SEC 1000000000UL + +struct bnxt_re_chip_ctx { + __u16 chip_num; + __u8 chip_rev; + __u8 chip_metal; + bool chip_is_gen_p5_thor2; +}; + +#define BNXT_RE_MAP_WC 0x1000 +#define BNXT_RE_DBR_PAGE 0x2000 +#define BNXT_RE_DB_RECOVERY_PAGE 0x3000 + +#define BNXT_RE_DB_REPLAY_YIELD_CNT 256 +#define BNXT_RE_DB_KEY_INVALID -1 +#define BNXT_RE_MAX_DO_PACING 0xFFFF +#define bnxt_re_wm_barrier() udma_to_device_barrier() +#define unlikely(x) __builtin_expect(!!(x), 0) +#define likely(x) __builtin_expect(!!(x), 1) + +#define CNA(v, d) \ + { .vendor = PCI_VENDOR_ID_##v, \ + .device = d } +#define BNXT_RE_DEFINE_CNA_TABLE(_name) \ + static const struct { \ + unsigned vendor; \ + unsigned device; \ + } _name[] + +struct bnxt_re_dpi { + __u32 dpindx; + __u32 wcdpi; + __u64 *dbpage; + __u64 *wcdbpg; +}; + +struct bnxt_re_pd { + struct ibv_pd ibvpd; + uint32_t pdid; +}; + +struct xorshift32_state { + uint32_t seed; +}; + +struct bnxt_re_cq { + struct ibv_cq ibvcq; + struct bnxt_re_list_head sfhead; + struct bnxt_re_list_head rfhead; + struct bnxt_re_list_head prev_cq_head; + struct bnxt_re_context *cntx; + struct bnxt_re_queue *cqq; + struct bnxt_re_dpi *udpi; + struct bnxt_re_mem *resize_mem; + struct bnxt_re_mem *mem; + struct bnxt_re_list_node dbnode; + uint64_t shadow_db_key; + uint32_t cqe_sz; + uint32_t cqid; + struct xorshift32_state rand; + int deferred_arm_flags; + bool first_arm; + bool deferred_arm; + bool phase; + uint8_t dbr_lock; + void *cq_page; +}; + +struct bnxt_re_push_buffer { + __u64 *pbuf; /*push wc buffer */ + __u64 *wqe; /* hwqe addresses */ + __u64 *ucdb; + uint32_t st_idx; + uint32_t qpid; + uint16_t wcdpi; + uint16_t nbit; + uint32_t tail; +}; + +enum bnxt_re_push_info_mask { + BNXT_RE_PUSH_SIZE_MASK = 0x1FUL, + BNXT_RE_PUSH_SIZE_SHIFT = 0x18UL +}; + +struct bnxt_re_db_ppp_hdr { + struct bnxt_re_db_hdr db_hdr; + __u64 rsv_psz_pidx; +}; + +struct bnxt_re_push_rec { + struct bnxt_re_dpi *udpi; + struct bnxt_re_push_buffer *pbuf; + __u32 pbmap; /* only 16 bits in use */ +}; + +struct bnxt_re_wrid { + uint64_t wrid; + int next_idx; + uint32_t bytes; + uint8_t sig; + uint8_t slots; + uint8_t wc_opcd; +}; + +struct bnxt_re_qpcap { + uint32_t max_swr; + uint32_t max_rwr; + uint32_t max_ssge; + uint32_t max_rsge; + uint32_t max_inline; + uint8_t sqsig; + uint8_t is_atomic_cap; +}; + +struct bnxt_re_srq { + struct ibv_srq ibvsrq; + struct ibv_srq_attr cap; + uint32_t srqid; + struct bnxt_re_context *uctx; + struct bnxt_re_queue *srqq; + struct bnxt_re_wrid *srwrid; + struct bnxt_re_dpi *udpi; + struct bnxt_re_mem *mem; + int start_idx; + int last_idx; + struct bnxt_re_list_node dbnode; + uint64_t shadow_db_key; + struct xorshift32_state rand; + uint8_t dbr_lock; + bool arm_req; +}; + +struct bnxt_re_joint_queue { + struct bnxt_re_context *cntx; + struct bnxt_re_queue *hwque; + struct bnxt_re_wrid *swque; + uint32_t start_idx; + uint32_t last_idx; +}; + +struct bnxt_re_qp { + struct ibv_qp ibvqp; + struct bnxt_re_qpcap cap; + struct bnxt_re_context *cntx; + struct bnxt_re_chip_ctx *cctx; + struct bnxt_re_joint_queue *jsqq; + struct bnxt_re_joint_queue *jrqq; + struct bnxt_re_dpi *udpi; + uint64_t wqe_cnt; + uint16_t mtu; + uint16_t qpst; + uint8_t qptyp; + uint8_t qpmode; + uint8_t push_st_en; + uint8_t ppp_idx; + uint32_t sq_psn; + uint32_t sq_msn; + uint32_t qpid; + uint16_t max_push_sz; + uint8_t sq_dbr_lock; + uint8_t rq_dbr_lock; + struct xorshift32_state rand; + struct bnxt_re_list_node snode; + struct bnxt_re_list_node rnode; + struct bnxt_re_srq *srq; + struct bnxt_re_cq *rcq; + struct bnxt_re_cq *scq; + struct bnxt_re_mem *mem;/* at cl 6 */ + struct bnxt_re_list_node dbnode; + uint64_t sq_shadow_db_key; + uint64_t rq_shadow_db_key; +}; + +struct bnxt_re_mr { + struct ibv_mr vmr; +}; + +struct bnxt_re_ah { + struct ibv_ah ibvah; + struct bnxt_re_pd *pd; + uint32_t avid; +}; + +struct bnxt_re_dev { + struct verbs_device vdev; + struct ibv_device_attr devattr; + uint32_t pg_size; + uint32_t cqe_size; + uint32_t max_cq_depth; + uint8_t abi_version; +}; + +struct bnxt_re_res_list { + struct bnxt_re_list_head head; + pthread_spinlock_t lock; +}; + +struct bnxt_re_context { + struct ibv_context ibvctx; + struct bnxt_re_dev *rdev; + struct bnxt_re_chip_ctx *cctx; + uint64_t comp_mask; + struct bnxt_re_dpi udpi; + uint32_t dev_id; + uint32_t max_qp; + uint32_t max_srq; + uint32_t modes; + void *shpg; + pthread_mutex_t shlock; + struct bnxt_re_push_rec *pbrec; + void *dbr_page; + void *bar_map; + struct bnxt_re_res_list qp_dbr_res; + struct bnxt_re_res_list cq_dbr_res; + struct bnxt_re_res_list srq_dbr_res; + void *db_recovery_page; + struct ibv_comp_channel *dbr_ev_chan; + struct ibv_cq *dbr_cq; + pthread_t dbr_thread; + uint64_t replay_cnt; +}; + +struct bnxt_re_pacing_data { + uint32_t do_pacing; + uint32_t pacing_th; + uint32_t dev_err_state; + uint32_t alarm_th; +}; + +/* Chip context related functions */ +bool _is_chip_gen_p5(struct bnxt_re_chip_ctx *cctx); +bool _is_chip_a0(struct bnxt_re_chip_ctx *cctx); +bool _is_chip_thor2(struct bnxt_re_chip_ctx *cctx); +bool _is_chip_gen_p5_thor2(struct bnxt_re_chip_ctx *cctx); + +/* DB ring functions used internally*/ +void bnxt_re_ring_rq_db(struct bnxt_re_qp *qp); +void bnxt_re_ring_sq_db(struct bnxt_re_qp *qp); +void bnxt_re_ring_srq_arm(struct bnxt_re_srq *srq); +void bnxt_re_ring_srq_db(struct bnxt_re_srq *srq); +void bnxt_re_ring_cq_db(struct bnxt_re_cq *cq); +void bnxt_re_ring_cq_arm_db(struct bnxt_re_cq *cq, uint8_t aflag); + +void bnxt_re_ring_pstart_db(struct bnxt_re_qp *qp, + struct bnxt_re_push_buffer *pbuf); +void bnxt_re_ring_pend_db(struct bnxt_re_qp *qp, + struct bnxt_re_push_buffer *pbuf); +void bnxt_re_fill_push_wcb(struct bnxt_re_qp *qp, + struct bnxt_re_push_buffer *pbuf, + uint32_t idx); + +void bnxt_re_fill_ppp(struct bnxt_re_push_buffer *pbuf, + struct bnxt_re_qp *qp, uint8_t len, uint32_t idx); +int bnxt_re_init_pbuf_list(struct bnxt_re_context *cntx); +void bnxt_re_destroy_pbuf_list(struct bnxt_re_context *cntx); +struct bnxt_re_push_buffer *bnxt_re_get_pbuf(uint8_t *push_st_en, + uint8_t ppp_idx, + struct bnxt_re_context *cntx); +void bnxt_re_put_pbuf(struct bnxt_re_context *cntx, + struct bnxt_re_push_buffer *pbuf); + +void bnxt_re_db_recovery(struct bnxt_re_context *cntx); +void *bnxt_re_dbr_thread(void *arg); +bool _is_db_drop_recovery_enable(struct bnxt_re_context *cntx); +int bnxt_re_poll_kernel_cq(struct bnxt_re_cq *cq); +extern int bnxt_single_threaded; +extern int bnxt_dyn_debug; + +#define bnxt_re_trace(fmt, ...) \ +{ \ + if (bnxt_dyn_debug) \ + fprintf(stderr, fmt, ##__VA_ARGS__); \ +} + +/* pointer conversion functions*/ +static inline struct bnxt_re_dev *to_bnxt_re_dev(struct ibv_device *ibvdev) +{ + return container_of(ibvdev, struct bnxt_re_dev, vdev); +} + +static inline struct bnxt_re_context *to_bnxt_re_context( + struct ibv_context *ibvctx) +{ + return container_of(ibvctx, struct bnxt_re_context, ibvctx); +} + +static inline struct bnxt_re_pd *to_bnxt_re_pd(struct ibv_pd *ibvpd) +{ + return container_of(ibvpd, struct bnxt_re_pd, ibvpd); +} + +static inline struct bnxt_re_cq *to_bnxt_re_cq(struct ibv_cq *ibvcq) +{ + return container_of(ibvcq, struct bnxt_re_cq, ibvcq); +} + +static inline struct bnxt_re_qp *to_bnxt_re_qp(struct ibv_qp *ibvqp) +{ + return container_of(ibvqp, struct bnxt_re_qp, ibvqp); +} + +static inline struct bnxt_re_srq *to_bnxt_re_srq(struct ibv_srq *ibvsrq) +{ + return container_of(ibvsrq, struct bnxt_re_srq, ibvsrq); +} + +static inline struct bnxt_re_ah *to_bnxt_re_ah(struct ibv_ah *ibvah) +{ + return container_of(ibvah, struct bnxt_re_ah, ibvah); +} + +/* CQE manipulations */ +#define bnxt_re_get_cqe_sz() (sizeof(struct bnxt_re_req_cqe) + \ + sizeof(struct bnxt_re_bcqe)) +#define bnxt_re_get_sqe_hdr_sz() (sizeof(struct bnxt_re_bsqe) + \ + sizeof(struct bnxt_re_send)) +#define bnxt_re_get_srqe_hdr_sz() (sizeof(struct bnxt_re_brqe) + \ + sizeof(struct bnxt_re_srqe)) +#define bnxt_re_get_srqe_sz() (sizeof(struct bnxt_re_brqe) + \ + sizeof(struct bnxt_re_srqe) + \ + BNXT_RE_MAX_INLINE_SIZE) +#define bnxt_re_is_cqe_valid(valid, phase) \ + (((valid) & BNXT_RE_BCQE_PH_MASK) == (phase)) + +static inline void bnxt_re_change_cq_phase(struct bnxt_re_cq *cq) +{ + if (!cq->cqq->head) + cq->phase = !(cq->phase & BNXT_RE_BCQE_PH_MASK); +} + +static inline void *bnxt_re_get_swqe(struct bnxt_re_joint_queue *jqq, + uint32_t *wqe_idx) +{ + if (wqe_idx) + *wqe_idx = jqq->start_idx; + return &jqq->swque[jqq->start_idx]; +} + +static inline void bnxt_re_jqq_mod_start(struct bnxt_re_joint_queue *jqq, + uint32_t idx) +{ + jqq->start_idx = jqq->swque[idx].next_idx; +} + +static inline void bnxt_re_jqq_mod_last(struct bnxt_re_joint_queue *jqq, + uint32_t idx) +{ + jqq->last_idx = jqq->swque[idx].next_idx; +} + +static inline uint32_t bnxt_re_init_depth(uint32_t ent, uint64_t cmask) +{ + return cmask & BNXT_RE_COMP_MASK_UCNTX_POW2_DISABLED ? + ent : roundup_pow_of_two(ent); +} + +static inline uint32_t bnxt_re_get_diff(uint64_t cmask) +{ + return cmask & BNXT_RE_COMP_MASK_UCNTX_RSVD_WQE_DISABLED ? + 0 : BNXT_RE_FULL_FLAG_DELTA; +} + +static inline int bnxt_re_calc_wqe_sz(int nsge) +{ + /* This is used for both sq and rq. In case hdr size differs + * in future move to individual functions. + */ + return sizeof(struct bnxt_re_sge) * nsge + bnxt_re_get_sqe_hdr_sz(); +} + +/* Helper function to copy to push buffers */ +static inline void bnxt_re_copy_data_to_pb(struct bnxt_re_push_buffer *pbuf, + uint8_t offset, uint32_t idx) +{ + __u64 *src; + __u64 *dst; + int indx; + + for (indx = 0; indx < idx; indx++) { + dst = (__u64 *)(pbuf->pbuf + 2 * (indx + offset)); + src = (__u64 *)pbuf->wqe[indx]; + iowrite64(dst, *src); + + dst++; + src++; + iowrite64(dst, *src); + } +} + +static inline int bnxt_re_dp_spin_init(struct bnxt_spinlock *lock, int pshared, int need_lock) +{ + lock->in_use = 0; + lock->need_lock = need_lock; + return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_PRIVATE); +} + +static inline int bnxt_re_dp_spin_destroy(struct bnxt_spinlock *lock) +{ + return pthread_spin_destroy(&lock->lock); +} + +static inline int bnxt_spin_lock(struct bnxt_spinlock *lock) +{ + if (lock->need_lock) + return pthread_spin_lock(&lock->lock); + + if (unlikely(lock->in_use)) { + fprintf(stderr, "*** ERROR: multithreading violation ***\n" + "You are running a multithreaded application but\n" + "you set BNXT_SINGLE_THREADED=1. Please unset it.\n"); + abort(); + } else { + lock->in_use = 1; + /* This fence is not at all correct, but it increases the */ + /* chance that in_use is detected by another thread without */ + /* much runtime cost. */ + atomic_thread_fence(memory_order_acq_rel); + } + + return 0; +} + +static inline int bnxt_spin_unlock(struct bnxt_spinlock *lock) +{ + if (lock->need_lock) + return pthread_spin_unlock(&lock->lock); + + lock->in_use = 0; + return 0; +} + +static void timespec_sub(const struct timespec *a, const struct timespec *b, + struct timespec *res) +{ + res->tv_sec = a->tv_sec - b->tv_sec; + res->tv_nsec = a->tv_nsec - b->tv_nsec; + if (res->tv_nsec < 0) { + res->tv_sec--; + res->tv_nsec += BNXT_NSEC_PER_SEC; + } +} + +/* + * Function waits in a busy loop for a given nano seconds + * The maximum wait period allowed is less than one second + */ +static inline void bnxt_re_sub_sec_busy_wait(uint32_t nsec) +{ + struct timespec start, cur, res; + + if (nsec >= BNXT_NSEC_PER_SEC) + return; + + if (clock_gettime(CLOCK_REALTIME, &start)) { + fprintf(stderr, "%s: failed to get time : %d", + __func__, errno); + return; + } + + while (1) { + if (clock_gettime(CLOCK_REALTIME, &cur)) { + fprintf(stderr, "%s: failed to get time : %d", + __func__, errno); + return; + } + + timespec_sub(&cur, &start, &res); + if (res.tv_nsec >= nsec) + break; + } +} + +#define BNXT_RE_HW_RETX(a) ((a)->comp_mask & BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED) +#define bnxt_re_dp_spin_lock(lock) bnxt_spin_lock(lock) +#define bnxt_re_dp_spin_unlock(lock) bnxt_spin_unlock(lock) + +#endif diff --git a/contrib/ofed/libbnxtre/memory.c b/contrib/ofed/libbnxtre/memory.c new file mode 100644 index 000000000000..f85435ada272 --- /dev/null +++ b/contrib/ofed/libbnxtre/memory.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#include +#include + +#include "main.h" + +void bnxt_re_free_mem(struct bnxt_re_mem *mem) +{ + if (mem->va_head) { + ibv_dofork_range(mem->va_head, mem->size); + munmap(mem->va_head, mem->size); + } + + free(mem); +} + +void *bnxt_re_alloc_mem(size_t size, uint32_t pg_size) +{ + struct bnxt_re_mem *mem; + + mem = calloc(1, sizeof(*mem)); + if (!mem) + return NULL; + + size = get_aligned(size, pg_size); + mem->size = size; + mem->va_head = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem->va_head == MAP_FAILED) + goto bail; + + if (ibv_dontfork_range(mem->va_head, size)) + goto unmap; + + mem->head = 0; + mem->tail = 0; + mem->va_tail = (void *)((char *)mem->va_head + size); + return mem; +unmap: + munmap(mem->va_head, size); +bail: + free(mem); + return NULL; +} + +void *bnxt_re_get_obj(struct bnxt_re_mem *mem, size_t req) +{ + void *va; + + if ((mem->size - mem->tail - req) < mem->head) + return NULL; + mem->tail += req; + va = (void *)((char *)mem->va_tail - mem->tail); + return va; +} + +void *bnxt_re_get_ring(struct bnxt_re_mem *mem, size_t req) +{ + void *va; + + if ((mem->head + req) > (mem->size - mem->tail)) + return NULL; + va = (void *)((char *)mem->va_head + mem->head); + mem->head += req; + return va; +} diff --git a/contrib/ofed/libbnxtre/memory.h b/contrib/ofed/libbnxtre/memory.h new file mode 100644 index 000000000000..fe18f30ad1a5 --- /dev/null +++ b/contrib/ofed/libbnxtre/memory.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __BNXT_RE_MEMORY_H__ +#define __BNXT_RE_MEMORY_H__ + +#include + +#include "main.h" + +struct bnxt_re_mem { + void *va_head; + void *va_tail; + uint32_t head; + uint32_t tail; + uint32_t size; + uint32_t pad; +}; + +#define BNXT_RE_QATTR_SQ_INDX 0 +#define BNXT_RE_QATTR_RQ_INDX 1 +struct bnxt_re_qattr { + uint32_t esize; + uint32_t slots; + uint32_t nwr; + uint32_t sz_ring; + uint32_t sz_shad; +}; + +/* spin lock wrapper struct */ +struct bnxt_spinlock { + pthread_spinlock_t lock; + int in_use; + int need_lock; +}; + +struct bnxt_re_queue { + struct bnxt_spinlock qlock; + uint32_t flags; + uint32_t *dbtail; + void *va; + uint32_t head; + uint32_t depth; /* no of entries */ + void *pad; + uint32_t pad_stride_log2; + uint32_t tail; + uint32_t max_slots; + /* Represents the difference between the real queue depth allocated in + * HW and the user requested queue depth and is used to correctly flag + * queue full condition based on user supplied queue depth. + * This value can vary depending on the type of queue and any HW + * requirements that mandate keeping a fixed gap between the producer + * and the consumer indices in the queue + */ + uint32_t diff; + uint32_t stride; + uint32_t msn; + uint32_t msn_tbl_sz; +}; + +static inline unsigned long get_aligned(uint32_t size, uint32_t al_size) +{ + return (unsigned long) (size + al_size - 1) & ~(al_size - 1); +} + +static inline unsigned long roundup_pow_of_two(unsigned long val) +{ + unsigned long roundup = 1; + + if (val == 1) + return (roundup << 1); + + while (roundup < val) + roundup <<= 1; + + return roundup; +} + +#define iowrite64(dst, val) (*((volatile __u64 *) (dst)) = val) +#define iowrite32(dst, val) (*((volatile __u32 *) (dst)) = val) + +/* Basic queue operation */ +static inline void *bnxt_re_get_hwqe(struct bnxt_re_queue *que, uint32_t idx) +{ + idx += que->tail; + if (idx >= que->depth) + idx -= que->depth; + return (void *)(que->va + (idx << 4)); +} + +static inline void *bnxt_re_get_hwqe_hdr(struct bnxt_re_queue *que) +{ + return (void *)(que->va + ((que->tail) << 4)); +} + +static inline uint32_t bnxt_re_is_que_full(struct bnxt_re_queue *que, + uint32_t slots) +{ + int32_t avail, head, tail; + + head = que->head; + tail = que->tail; + avail = head - tail; + if (head <= tail) + avail += que->depth; + return avail <= (slots + que->diff); +} + +static inline uint32_t bnxt_re_is_que_empty(struct bnxt_re_queue *que) +{ + return que->tail == que->head; +} + +static inline void bnxt_re_incr_tail(struct bnxt_re_queue *que, uint8_t cnt) +{ + que->tail += cnt; + if (que->tail >= que->depth) { + que->tail %= que->depth; + /* Rolled over, Toggle Tail bit in epoch flags */ + que->flags ^= 1UL << BNXT_RE_FLAG_EPOCH_TAIL_SHIFT; + } +} + +static inline void bnxt_re_incr_head(struct bnxt_re_queue *que, uint8_t cnt) +{ + que->head += cnt; + if (que->head >= que->depth) { + que->head %= que->depth; + /* Rolled over, Toggle HEAD bit in epoch flags */ + que->flags ^= 1UL << BNXT_RE_FLAG_EPOCH_HEAD_SHIFT; + } + +} + +void bnxt_re_free_mem(struct bnxt_re_mem *mem); +void *bnxt_re_alloc_mem(size_t size, uint32_t pg_size); +void *bnxt_re_get_obj(struct bnxt_re_mem *mem, size_t req); +void *bnxt_re_get_ring(struct bnxt_re_mem *mem, size_t req); + +#endif diff --git a/contrib/ofed/libbnxtre/verbs.c b/contrib/ofed/libbnxtre/verbs.c new file mode 100644 index 000000000000..7054af34b0c7 --- /dev/null +++ b/contrib/ofed/libbnxtre/verbs.c @@ -0,0 +1,2557 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "main.h" +#include "verbs.h" + +static int ibv_to_bnxt_re_wr_opcd[11] = { + BNXT_RE_WR_OPCD_RDMA_WRITE, + BNXT_RE_WR_OPCD_RDMA_WRITE_IMM, + BNXT_RE_WR_OPCD_SEND, + BNXT_RE_WR_OPCD_SEND_IMM, + BNXT_RE_WR_OPCD_RDMA_READ, + BNXT_RE_WR_OPCD_ATOMIC_CS, + BNXT_RE_WR_OPCD_ATOMIC_FA, + BNXT_RE_WR_OPCD_INVAL, + BNXT_RE_WR_OPCD_INVAL, + BNXT_RE_WR_OPCD_INVAL, + BNXT_RE_WR_OPCD_INVAL +}; + +static int ibv_wr_to_wc_opcd[11] = { + IBV_WC_RDMA_WRITE, + IBV_WC_RDMA_WRITE, + IBV_WC_SEND, + IBV_WC_SEND, + IBV_WC_RDMA_READ, + IBV_WC_COMP_SWAP, + IBV_WC_FETCH_ADD, + 0xFF, + 0xFF, + 0xFF, + 0xFF +}; + +static int bnxt_re_req_to_ibv_status [12] = { + IBV_WC_SUCCESS, + IBV_WC_BAD_RESP_ERR, + IBV_WC_LOC_LEN_ERR, + IBV_WC_LOC_QP_OP_ERR, + IBV_WC_LOC_PROT_ERR, + IBV_WC_MW_BIND_ERR, + IBV_WC_REM_INV_REQ_ERR, + IBV_WC_REM_ACCESS_ERR, + IBV_WC_REM_OP_ERR, + IBV_WC_RNR_RETRY_EXC_ERR, + IBV_WC_RETRY_EXC_ERR, + IBV_WC_WR_FLUSH_ERR +}; + +static int bnxt_re_res_to_ibv_status [9] = { + IBV_WC_SUCCESS, + IBV_WC_LOC_ACCESS_ERR, + IBV_WC_LOC_LEN_ERR, + IBV_WC_LOC_PROT_ERR, + IBV_WC_LOC_QP_OP_ERR, + IBV_WC_MW_BIND_ERR, + IBV_WC_REM_INV_REQ_ERR, + IBV_WC_WR_FLUSH_ERR, + IBV_WC_FATAL_ERR +}; + +static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc, + uint32_t *resize); + +int bnxt_single_threaded; +int bnxt_dyn_debug; +int bnxt_re_query_device(struct ibv_context *ibvctx, + struct ibv_device_attr *dev_attr) +{ + struct ibv_query_device cmd = {}; + uint8_t fw_ver[8]; + int status; + + memset(dev_attr, 0, sizeof(struct ibv_device_attr)); + status = ibv_cmd_query_device(ibvctx, dev_attr, (uint64_t *)&fw_ver, + &cmd, sizeof(cmd)); + snprintf(dev_attr->fw_ver, 64, "%d.%d.%d.%d", + fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3]); + + return status; +} + +int bnxt_re_query_device_compat(struct ibv_context *ibvctx, + struct ibv_device_attr *dev_attr) + +{ + int rc = 0; + + rc = bnxt_re_query_device(ibvctx, dev_attr); + + return rc; +} + +int bnxt_re_query_port(struct ibv_context *ibvctx, uint8_t port, + struct ibv_port_attr *port_attr) +{ + struct ibv_query_port cmd = {}; + + return ibv_cmd_query_port(ibvctx, port, port_attr, &cmd, sizeof(cmd)); +} + +static inline bool bnxt_re_is_wcdpi_enabled(struct bnxt_re_context *cntx) +{ + return cntx->comp_mask & BNXT_RE_COMP_MASK_UCNTX_WC_DPI_ENABLED; +} + +static int bnxt_re_map_db_page(struct ibv_context *ibvctx, + uint64_t dbr, uint32_t dpi, uint32_t wcdpi) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device); + + cntx->udpi.dpindx = dpi; + cntx->udpi.dbpage = mmap(NULL, dev->pg_size, PROT_WRITE, + MAP_SHARED, ibvctx->cmd_fd, dbr); + if (cntx->udpi.dbpage == MAP_FAILED) + return -ENOMEM; + if (wcdpi) { + cntx->udpi.wcdbpg = mmap(NULL, dev->pg_size, PROT_WRITE, + MAP_SHARED, ibvctx->cmd_fd, + BNXT_RE_MAP_WC); + if (cntx->udpi.wcdbpg == MAP_FAILED) + return -ENOMEM; + cntx->udpi.wcdpi = wcdpi; + } + + return 0; +} + +struct ibv_pd *bnxt_re_alloc_pd(struct ibv_context *ibvctx) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + struct bnxt_re_pd_resp resp = {}; + struct ibv_alloc_pd cmd = {}; + struct bnxt_re_pd *pd; + uint64_t dbr_map; + + pd = calloc(1, sizeof(*pd)); + if (!pd) + return NULL; + + if (ibv_cmd_alloc_pd(ibvctx, &pd->ibvpd, &cmd, sizeof(cmd), + &resp.resp, sizeof(resp))) + goto out; + + pd->pdid = resp.pdid; + /* Map DB page now. */ + if (!cntx->udpi.dbpage) { + uint32_t wcdpi = 0; + + if (bnxt_re_is_wcdpi_enabled(cntx) && + resp.comp_mask & BNXT_RE_COMP_MASK_PD_HAS_WC_DPI) + wcdpi = resp.wcdpi; + if (bnxt_re_map_db_page(ibvctx, resp.dbr, resp.dpi, wcdpi)) + goto fail; + if (cntx->cctx->chip_is_gen_p5_thor2 && cntx->udpi.wcdpi) + bnxt_re_init_pbuf_list(cntx); + } + if (resp.comp_mask & BNXT_RE_COMP_MASK_PD_HAS_DBR_BAR_ADDR) { + dbr_map = resp.dbr_bar_map & 0xFFFFFFFFFFFFF000; + cntx->bar_map = mmap(NULL, 4096, PROT_READ, + MAP_SHARED, ibvctx->cmd_fd, dbr_map); + if (cntx->bar_map == MAP_FAILED) + goto fail; + } + + return &pd->ibvpd; +fail: + ibv_cmd_dealloc_pd(&pd->ibvpd); +out: + free(pd); + return NULL; +} + +int bnxt_re_free_pd(struct ibv_pd *ibvpd) +{ + struct bnxt_re_pd *pd = to_bnxt_re_pd(ibvpd); + int status; + + status = ibv_cmd_dealloc_pd(ibvpd); + if (status) + return status; + /* DPI un-mapping will be done during uninit_ucontext */ + free(pd); + + return 0; +} + +struct ibv_mr *get_ibv_mr_from_bnxt_re_mr(struct bnxt_re_mr *mr) +{ + return &mr->vmr; +} + +struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len, + int access) +{ + struct bnxt_re_mr_resp resp = {}; + struct ibv_reg_mr cmd = {}; + struct bnxt_re_mr *mr; + uint64_t hw_va; + hw_va = (uint64_t) sva; + + mr = calloc(1, sizeof(*mr)); + if (!mr) + return NULL; + + if (ibv_cmd_reg_mr(ibvpd, sva, len, hw_va, access, &mr->vmr, + &cmd, sizeof(cmd), &resp.resp, sizeof(resp))) { + free(mr); + return NULL; + } + + return get_ibv_mr_from_bnxt_re_mr(mr); +} + +int bnxt_re_dereg_mr(VERBS_MR *ibvmr) +{ + struct bnxt_re_mr *mr = (struct bnxt_re_mr *)ibvmr; + int status; + + status = ibv_cmd_dereg_mr(ibvmr); + if (status) + return status; + free(mr); + + return 0; +} + +void *bnxt_re_alloc_cqslab(struct bnxt_re_context *cntx, + uint32_t ncqe, uint32_t cur) +{ + struct bnxt_re_mem *mem; + uint32_t depth, sz; + + depth = bnxt_re_init_depth(ncqe + 1, cntx->comp_mask); + if (depth > cntx->rdev->max_cq_depth + 1) + depth = cntx->rdev->max_cq_depth + 1; + if (depth == cur) + return NULL; + sz = get_aligned((depth * cntx->rdev->cqe_size), cntx->rdev->pg_size); + mem = bnxt_re_alloc_mem(sz, cntx->rdev->pg_size); + if (mem) + mem->pad = depth; + return mem; +} + +struct ibv_cq *_bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe, + struct ibv_comp_channel *channel, int vec, + bool soft_cq) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device); + struct bnxt_re_cq_resp resp = {}; + struct bnxt_re_cq_req cmd = {}; + struct bnxt_re_cq *cq; + bool has_dpi; + + if (ncqe > dev->max_cq_depth) + return NULL; + + cq = calloc(1, (sizeof(*cq) + sizeof(struct bnxt_re_queue))); + if (!cq) + return NULL; + cq->cqq = (void *)((char *)cq + sizeof(*cq)); + if (!cq->cqq) + goto mem; + + cq->mem = bnxt_re_alloc_cqslab(cntx, ncqe, 0); + if (!cq->mem) + goto mem; + cq->cqq->depth = cq->mem->pad; + cq->cqq->stride = dev->cqe_size; + /* As an exception no need to call get_ring api we know + * this is the only consumer + */ + cq->cqq->va = cq->mem->va_head; + if (!cq->cqq->va) + goto fail; + + cmd.cq_va = (uint64_t)cq->cqq->va; + cmd.cq_handle = (uint64_t)cq; + if (soft_cq) { + cmd.comp_mask |= BNXT_RE_COMP_MASK_CQ_REQ_HAS_CAP_MASK; + cmd.cq_capab |= BNXT_RE_COMP_MASK_CQ_REQ_CAP_DBR_RECOVERY; + } + if (ibv_cmd_create_cq(ibvctx, ncqe, channel, vec, + &cq->ibvcq, &cmd.cmd, sizeof(cmd), + &resp.resp, sizeof(resp))) + goto fail; + + has_dpi = resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_DB_INFO; + if (!cntx->udpi.dbpage && has_dpi) { + uint32_t wcdpi = 0; + + if (bnxt_re_is_wcdpi_enabled(cntx) && + resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_WC_DPI) + wcdpi = resp.wcdpi; + if (bnxt_re_map_db_page(ibvctx, resp.dbr, resp.dpi, wcdpi)) + goto fail; + if (cntx->cctx->chip_is_gen_p5_thor2 && cntx->udpi.wcdpi) + bnxt_re_init_pbuf_list(cntx); + } + + if (resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_CQ_PAGE) { + cq->cq_page = mmap(NULL, dev->pg_size, PROT_WRITE, MAP_SHARED, + ibvctx->cmd_fd, resp.cq_page); + if (!cq->cq_page) + fprintf(stderr, DEV "Valid cq_page not mapped\n"); + } + + cq->cqid = resp.cqid; + cq->phase = resp.phase; + cq->cqq->tail = resp.tail; + cq->udpi = &cntx->udpi; + cq->first_arm = true; + cq->cntx = cntx; + cq->rand.seed = cq->cqid; + cq->shadow_db_key = BNXT_RE_DB_KEY_INVALID; + bnxt_re_dp_spin_init(&cq->cqq->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded); + INIT_DBLY_LIST_HEAD(&cq->sfhead); + INIT_DBLY_LIST_HEAD(&cq->rfhead); + INIT_DBLY_LIST_HEAD(&cq->prev_cq_head); + if (_is_db_drop_recovery_enable(cntx) && !soft_cq) { + INIT_DBLY_LIST_NODE(&cq->dbnode); + pthread_spin_lock(&cntx->cq_dbr_res.lock); + bnxt_re_list_add_node(&cq->dbnode, &cntx->cq_dbr_res.head); + pthread_spin_unlock(&cntx->cq_dbr_res.lock); + } + + return &cq->ibvcq; +fail: + bnxt_re_free_mem(cq->mem); +mem: + free(cq); + return NULL; +} + +struct ibv_cq *bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe, + struct ibv_comp_channel *channel, int vec) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device); + sigset_t block_sig_set, old_sig_set; + int ret; + + if (_is_db_drop_recovery_enable(cntx) && !cntx->dbr_cq) { + cntx->dbr_ev_chan = + ibv_create_comp_channel(ibvctx); + if (!cntx->dbr_ev_chan) { + fprintf(stderr, + DEV "Failed to create completion channel\n"); + goto free; + } + cntx->dbr_cq = _bnxt_re_create_cq(ibvctx, 1, cntx->dbr_ev_chan, vec, 1); + if (!cntx->dbr_cq) { + fprintf(stderr, DEV "Couldn't create CQ\n"); + goto free; + } + cntx->db_recovery_page = mmap(NULL, dev->pg_size, PROT_READ | + PROT_WRITE, MAP_SHARED, + ibvctx->cmd_fd, BNXT_RE_DB_RECOVERY_PAGE); + if (cntx->db_recovery_page == MAP_FAILED) { + fprintf(stderr, DEV "Couldn't map DB recovery page\n"); + goto free; + } + /* Create pthread to handle the doorbell drop events. This thread is + * not going to handle any signals. Before creation block all the + * signals, and after creation restore the old signal mask. + */ + sigfillset(&block_sig_set); + pthread_sigmask(SIG_BLOCK, &block_sig_set, &old_sig_set); + ret = pthread_create(&cntx->dbr_thread, NULL, bnxt_re_dbr_thread, cntx); + if (ret) { + fprintf(stderr, DEV "Couldn't create pthread\n"); + pthread_sigmask(SIG_SETMASK, &old_sig_set, NULL); + goto free; + } + pthread_sigmask(SIG_SETMASK, &old_sig_set, NULL); + INIT_DBLY_LIST_HEAD(&cntx->qp_dbr_res.head); + pthread_spin_init(&cntx->qp_dbr_res.lock, PTHREAD_PROCESS_PRIVATE); + INIT_DBLY_LIST_HEAD(&cntx->cq_dbr_res.head); + pthread_spin_init(&cntx->cq_dbr_res.lock, PTHREAD_PROCESS_PRIVATE); + INIT_DBLY_LIST_HEAD(&cntx->srq_dbr_res.head); + pthread_spin_init(&cntx->srq_dbr_res.lock, PTHREAD_PROCESS_PRIVATE); + } + return(_bnxt_re_create_cq(ibvctx, ncqe, channel, vec, 0)); +free: + if (cntx->dbr_ev_chan) { + ret = ibv_destroy_comp_channel(cntx->dbr_ev_chan); + if (ret) + fprintf(stderr, DEV "ibv_destroy_comp_channel error\n"); + } + + if (cntx->dbr_cq) { + if (cntx->db_recovery_page) + munmap(cntx->db_recovery_page, dev->pg_size); + ret = ibv_destroy_cq(cntx->dbr_cq); + if (ret) + fprintf(stderr, DEV "ibv_destroy_cq error\n"); + } + return NULL; +} + +int bnxt_re_poll_kernel_cq(struct bnxt_re_cq *cq) +{ + struct ibv_wc tmp_wc; + int rc; + + rc = ibv_cmd_poll_cq(&cq->ibvcq, 1, &tmp_wc); + if (unlikely(rc)) + fprintf(stderr, "ibv_cmd_poll_cq failed: %d\n", rc); + return rc; +} + +#define BNXT_RE_QUEUE_START_PHASE 0x01 + +/* + * Function to complete the last steps in CQ resize. Invoke poll function + * in the kernel driver; this serves as a signal to the driver to complete CQ + * resize steps required. Free memory mapped for the original CQ and switch + * over to the memory mapped for CQ with the new size. Finally Ack the Cutoff + * CQE. This function must be called under cq->cqq.lock. + */ +void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(cq->ibvcq.context); + + bnxt_re_poll_kernel_cq(cq); + bnxt_re_free_mem(cq->mem); + + cq->mem = cq->resize_mem; + cq->resize_mem = NULL; + /* As an exception no need to call get_ring api we know + * this is the only consumer + */ + cq->cqq->va = cq->mem->va_head; + /* + * We don't want to memcpy() the entire cqq structure below; otherwise + * we'd end up overwriting cq->cqq.lock that is held by the caller. + * So we copy the members piecemeal. cqq->head, cqq->tail implicitly + * set to 0 before cutoff_ack DB. + */ + cq->cqq->depth = cq->mem->pad; + cq->cqq->stride = cntx->rdev->cqe_size; + cq->cqq->head = 0; + cq->cqq->tail = 0; + cq->phase = BNXT_RE_QUEUE_START_PHASE; + /* Reset epoch portion of the flags */ + cq->cqq->flags &= ~(BNXT_RE_FLAG_EPOCH_TAIL_MASK | + BNXT_RE_FLAG_EPOCH_HEAD_MASK); + bnxt_re_ring_cq_arm_db(cq, BNXT_RE_QUE_TYPE_CQ_CUT_ACK); +} + +int bnxt_re_resize_cq(struct ibv_cq *ibvcq, int ncqe) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvcq->context); + struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvcq->context->device); + struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq); + struct bnxt_re_resize_cq_req req = {}; + uint32_t exit_cnt = 20; + + struct ibv_resize_cq_resp resp = {}; + int rc = 0; + + if (ncqe > dev->max_cq_depth) + return -EINVAL; + + bnxt_re_dp_spin_lock(&cq->cqq->qlock); + cq->resize_mem = bnxt_re_alloc_cqslab(cntx, ncqe, cq->cqq->depth); + if (unlikely(!cq->resize_mem)) { + rc = -ENOMEM; + goto done; + } + /* As an exception no need to call get_ring api we know + * this is the only consumer + */ + req.cq_va = (uint64_t)cq->resize_mem->va_head; + rc = ibv_cmd_resize_cq(ibvcq, ncqe, &req.cmd, + sizeof(req), &resp, sizeof(resp)); + if (unlikely(rc)) { + bnxt_re_free_mem(cq->resize_mem); + goto done; + } + + while(true) { + struct ibv_wc tmp_wc = {0}; + uint32_t resize = 0; + int dqed = 0; + + struct bnxt_re_work_compl *compl = NULL; + dqed = bnxt_re_poll_one(cq, 1, &tmp_wc, &resize); + if (resize) { + break; + } + if (dqed) { + compl = calloc(1, sizeof(*compl)); + if (unlikely(!compl)) { + fprintf(stderr, "%s: No Memory.. Continue\n", __func__); + break; + } + memcpy(&compl->wc, &tmp_wc, sizeof(tmp_wc)); + bnxt_re_list_add_node(&compl->cnode, &cq->prev_cq_head); + compl = NULL; + memset(&tmp_wc, 0, sizeof(tmp_wc)); + } else { + exit_cnt--; + if (unlikely(!exit_cnt)) { + rc = -EIO; + break; + } else { + /* wait for 100 milli seconds */ + bnxt_re_sub_sec_busy_wait(100 * 1000000); + } + } + } +done: + bnxt_re_dp_spin_unlock(&cq->cqq->qlock); + return rc; +} + +static void bnxt_re_destroy_resize_cq_list(struct bnxt_re_cq *cq) +{ + struct bnxt_re_list_node *cur, *tmp; + struct bnxt_re_work_compl *compl; + + if (bnxt_re_list_empty(&cq->prev_cq_head)) + return; + + list_for_each_node_safe(cur, tmp, &cq->prev_cq_head) { + compl = list_node(cur, struct bnxt_re_work_compl, cnode); + bnxt_re_list_del_node(&compl->cnode, &cq->prev_cq_head); + free(compl); + } + +} + +int bnxt_re_destroy_cq(struct ibv_cq *ibvcq) +{ + struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq); + int status; + + if (_is_db_drop_recovery_enable(cq->cntx) && + ibvcq != cq->cntx->dbr_cq) { + pthread_spin_lock(&cq->cntx->cq_dbr_res.lock); + bnxt_re_list_del_node(&cq->dbnode, + &cq->cntx->cq_dbr_res.head); + pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock); + } + status = ibv_cmd_destroy_cq(ibvcq); + if (status) { + if (_is_db_drop_recovery_enable(cq->cntx) && + ibvcq != cq->cntx->dbr_cq) { + pthread_spin_lock(&cq->cntx->cq_dbr_res.lock); + bnxt_re_list_add_node(&cq->dbnode, + &cq->cntx->cq_dbr_res.head); + pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock); + } + return status; + } + bnxt_re_destroy_resize_cq_list(cq); + bnxt_re_free_mem(cq->mem); + free(cq); + return 0; +} + +static uint8_t bnxt_re_poll_err_scqe(struct bnxt_re_qp *qp, + struct ibv_wc *ibvwc, + struct bnxt_re_req_cqe *scqe, + uint32_t flg_val, int *cnt) +{ + struct bnxt_re_queue *sq = qp->jsqq->hwque; + struct bnxt_re_wrid *swrid; + struct bnxt_re_cq *scq; + uint8_t status; + uint32_t head; + + scq = to_bnxt_re_cq(qp->ibvqp.send_cq); + + head = qp->jsqq->last_idx; + swrid = &qp->jsqq->swque[head]; + + *cnt = 1; + status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) & + BNXT_RE_BCQE_STATUS_MASK; + ibvwc->status = bnxt_re_req_to_ibv_status[status]; + ibvwc->wc_flags = 0; + ibvwc->wr_id = swrid->wrid; + ibvwc->qp_num = qp->qpid; + ibvwc->opcode = swrid->wc_opcd; + ibvwc->byte_len = 0; + + bnxt_re_incr_head(sq, swrid->slots); + bnxt_re_jqq_mod_last(qp->jsqq, head); + + if (qp->qpst != IBV_QPS_ERR) + qp->qpst = IBV_QPS_ERR; + bnxt_re_list_add_node(&qp->snode, &scq->sfhead); + bnxt_re_trace("%s: qp_num = 0x%x status = %d\n", + __func__, ibvwc->qp_num, ibvwc->status) + + return false; +} + +static uint8_t bnxt_re_poll_success_scqe(struct bnxt_re_qp *qp, + struct ibv_wc *ibvwc, + struct bnxt_re_req_cqe *scqe, int *cnt) +{ + struct bnxt_re_queue *sq = qp->jsqq->hwque; + struct bnxt_re_wrid *swrid; + uint8_t pcqe = false; + uint32_t cindx, head; + + head = qp->jsqq->last_idx; + swrid = &qp->jsqq->swque[head]; + cindx = le32toh(scqe->con_indx) % qp->cap.max_swr; + + if (!(swrid->sig & IBV_SEND_SIGNALED)) { + *cnt = 0; + } else { + ibvwc->status = IBV_WC_SUCCESS; + ibvwc->wc_flags = 0; + ibvwc->qp_num = qp->qpid; + ibvwc->wr_id = swrid->wrid; + ibvwc->opcode = swrid->wc_opcd; + if (ibvwc->opcode == IBV_WC_RDMA_READ || + ibvwc->opcode == IBV_WC_COMP_SWAP || + ibvwc->opcode == IBV_WC_FETCH_ADD) + ibvwc->byte_len = swrid->bytes; + *cnt = 1; + } + bnxt_re_incr_head(sq, swrid->slots); + bnxt_re_jqq_mod_last(qp->jsqq, head); + if (qp->jsqq->last_idx != cindx) + pcqe = true; + + return pcqe; +} + +static uint8_t bnxt_re_poll_scqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc, + void *cqe, uint32_t flg_val, int *cnt) +{ + uint8_t status, pcqe = false; + + status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) & + BNXT_RE_BCQE_STATUS_MASK; + if (status == BNXT_RE_REQ_ST_OK) + pcqe = bnxt_re_poll_success_scqe(qp, ibvwc, cqe, cnt); + else + pcqe = bnxt_re_poll_err_scqe(qp, ibvwc, cqe, flg_val, cnt); + + return pcqe; +} + +static void bnxt_re_release_srqe(struct bnxt_re_srq *srq, int tag) +{ + bnxt_re_dp_spin_lock(&srq->srqq->qlock); + srq->srwrid[srq->last_idx].next_idx = tag; + srq->last_idx = tag; + srq->srwrid[srq->last_idx].next_idx = -1; + bnxt_re_dp_spin_unlock(&srq->srqq->qlock); +} + +static int bnxt_re_poll_err_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc, + struct bnxt_re_bcqe *hdr, + uint32_t flg_val, void *cqe) +{ + struct bnxt_re_wrid *swque; + struct bnxt_re_queue *rq; + struct bnxt_re_cq *rcq; + uint8_t status, cnt; + uint32_t head = 0; + + rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq); + + status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) & + BNXT_RE_BCQE_STATUS_MASK; + /* skip h/w flush errors */ + if (status == BNXT_RE_RSP_ST_HW_FLUSH) + return 0; + + if (!qp->srq) { + rq = qp->jrqq->hwque; + head = qp->jrqq->last_idx; + swque = &qp->jrqq->swque[head]; + ibvwc->wr_id = swque->wrid; + cnt = swque->slots; + } else { + struct bnxt_re_srq *srq; + int tag; + + srq = qp->srq; + rq = srq->srqq; + cnt = 1; + tag = le32toh(hdr->qphi_rwrid) & BNXT_RE_BCQE_RWRID_MASK; + ibvwc->wr_id = srq->srwrid[tag].wrid; + bnxt_re_release_srqe(srq, tag); + } + + ibvwc->status = bnxt_re_res_to_ibv_status[status]; + ibvwc->qp_num = qp->qpid; + ibvwc->opcode = IBV_WC_RECV; + ibvwc->byte_len = 0; + ibvwc->wc_flags = 0; + if (qp->qptyp == IBV_QPT_UD) + ibvwc->src_qp = 0; + + if (!qp->srq) + bnxt_re_jqq_mod_last(qp->jrqq, head); + bnxt_re_incr_head(rq, cnt); + + if (!qp->srq) + bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead); + + bnxt_re_trace("%s: qp_num = 0x%x status = %d\n", + __func__, ibvwc->qp_num, ibvwc->status) + return 1; +} + +static void bnxt_re_fill_ud_cqe(struct ibv_wc *ibvwc, + struct bnxt_re_bcqe *hdr, void *cqe, + uint8_t flags) +{ + struct bnxt_re_ud_cqe *ucqe = cqe; + uint32_t qpid; + + qpid = ((le32toh(hdr->qphi_rwrid) >> BNXT_RE_BCQE_SRCQP_SHIFT) & + BNXT_RE_BCQE_SRCQP_SHIFT) << 0x10; /* higher 8 bits of 24 */ + qpid |= (le64toh(ucqe->qplo_mac) >> BNXT_RE_UD_CQE_SRCQPLO_SHIFT) & + BNXT_RE_UD_CQE_SRCQPLO_MASK; /*lower 16 of 24 */ + ibvwc->src_qp = qpid; + ibvwc->wc_flags |= IBV_WC_GRH; + ibvwc->sl = (flags & BNXT_RE_UD_FLAGS_IP_VER_MASK) >> + BNXT_RE_UD_FLAGS_IP_VER_SFT; + /*IB-stack ABI in user do not ask for MAC to be reported. */ +} + +static void bnxt_re_poll_success_rcqe(struct bnxt_re_qp *qp, + struct ibv_wc *ibvwc, + struct bnxt_re_bcqe *hdr, + uint32_t flg_val, void *cqe) +{ + uint8_t flags, is_imm, is_rdma; + struct bnxt_re_rc_cqe *rcqe; + struct bnxt_re_wrid *swque; + struct bnxt_re_queue *rq; + uint32_t head = 0; + uint32_t rcqe_len; + uint8_t cnt; + + rcqe = cqe; + if (!qp->srq) { + rq = qp->jrqq->hwque; + head = qp->jrqq->last_idx; + swque = &qp->jrqq->swque[head]; + cnt = swque->slots; + ibvwc->wr_id = swque->wrid; + } else { + struct bnxt_re_srq *srq; + int tag; + + srq = qp->srq; + rq = srq->srqq; + cnt = 1; + tag = le32toh(hdr->qphi_rwrid) & BNXT_RE_BCQE_RWRID_MASK; + ibvwc->wr_id = srq->srwrid[tag].wrid; + bnxt_re_release_srqe(srq, tag); + } + + ibvwc->status = IBV_WC_SUCCESS; + ibvwc->qp_num = qp->qpid; + rcqe_len = le32toh(rcqe->length); + ibvwc->byte_len = (qp->qptyp == IBV_QPT_UD) ? + rcqe_len & BNXT_RE_UD_CQE_LEN_MASK : rcqe_len; + ibvwc->opcode = IBV_WC_RECV; + + flags = (flg_val >> BNXT_RE_BCQE_FLAGS_SHIFT) & + BNXT_RE_BCQE_FLAGS_MASK; + is_imm = (flags & BNXT_RE_RC_FLAGS_IMM_MASK) >> + BNXT_RE_RC_FLAGS_IMM_SHIFT; + is_rdma = (flags & BNXT_RE_RC_FLAGS_RDMA_MASK) >> + BNXT_RE_RC_FLAGS_RDMA_SHIFT; + ibvwc->wc_flags = 0; + if (is_imm) { + ibvwc->wc_flags |= IBV_WC_WITH_IMM; + /* The HW is returning imm_data in little-endian format, + * swap to Big Endian as expected by application + */ + ibvwc->imm_data = htobe32(le32toh(rcqe->imm_key)); + if (is_rdma) + ibvwc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; + } + + if (qp->qptyp == IBV_QPT_UD) { + bnxt_re_fill_ud_cqe(ibvwc, hdr, cqe, flags); + } + + if (!qp->srq) + bnxt_re_jqq_mod_last(qp->jrqq, head); + bnxt_re_incr_head(rq, cnt); +} + +static uint8_t bnxt_re_poll_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc, + void *cqe, uint32_t flg_val, int *cnt) +{ + struct bnxt_re_bcqe *hdr; + uint8_t status, pcqe = false; + + hdr = cqe + sizeof(struct bnxt_re_rc_cqe); + + status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) & + BNXT_RE_BCQE_STATUS_MASK; + *cnt = 1; + if (status == BNXT_RE_RSP_ST_OK) + bnxt_re_poll_success_rcqe(qp, ibvwc, hdr, flg_val, cqe); + else + *cnt = bnxt_re_poll_err_rcqe(qp, ibvwc, hdr, flg_val, cqe); + + return pcqe; +} + +static void bnxt_re_qp_move_flush_err(struct bnxt_re_qp *qp) +{ + struct bnxt_re_cq *scq, *rcq; + + scq = to_bnxt_re_cq(qp->ibvqp.send_cq); + rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq); + + if (qp->qpst != IBV_QPS_ERR) + qp->qpst = IBV_QPS_ERR; + bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead); + bnxt_re_list_add_node(&qp->snode, &scq->sfhead); +} + +/* Always return false */ +static uint8_t bnxt_re_poll_term_cqe(struct bnxt_re_qp *qp, int *cnt) +{ + /* For now just add the QP to flush list without + * considering the index reported in the CQE. + * Continue reporting flush completions until the + * SQ and RQ are empty. + */ + *cnt = 0; + if (qp->qpst != IBV_QPS_RESET) + bnxt_re_qp_move_flush_err(qp); + + return false; +} + +static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc, + uint32_t *resize) +{ + int type, cnt = 0, dqed = 0, hw_polled = 0; + struct bnxt_re_queue *cqq = cq->cqq; + struct bnxt_re_req_cqe *scqe; + struct bnxt_re_ud_cqe *rcqe; + uint64_t *qp_handle = NULL; + struct bnxt_re_bcqe *hdr; + struct bnxt_re_qp *qp; + uint8_t pcqe = false; + uint32_t flg_val; + void *cqe; + + while (nwc) { + cqe = cqq->va + cqq->head * bnxt_re_get_cqe_sz(); + hdr = cqe + sizeof(struct bnxt_re_req_cqe); + flg_val = le32toh(hdr->flg_st_typ_ph); + if (unlikely(!bnxt_re_is_cqe_valid(flg_val, cq->phase))) + break; + type = (flg_val >> BNXT_RE_BCQE_TYPE_SHIFT) & + BNXT_RE_BCQE_TYPE_MASK; + switch (type) { + case BNXT_RE_WC_TYPE_SEND: + scqe = cqe; + qp_handle = (uint64_t *)&scqe->qp_handle; + qp = (struct bnxt_re_qp *) + (uintptr_t)le64toh(scqe->qp_handle); + if (!qp) + break; /*stale cqe. should be rung.*/ + pcqe = bnxt_re_poll_scqe(qp, wc, cqe, flg_val, &cnt); + break; + case BNXT_RE_WC_TYPE_RECV_RC: + case BNXT_RE_WC_TYPE_RECV_UD: + rcqe = cqe; + qp_handle = (uint64_t *)&rcqe->qp_handle; + qp = (struct bnxt_re_qp *) + (uintptr_t)le64toh(rcqe->qp_handle); + if (!qp) + break; /*stale cqe. should be rung.*/ + pcqe = bnxt_re_poll_rcqe(qp, wc, cqe, flg_val, &cnt); + break; + case BNXT_RE_WC_TYPE_RECV_RAW: + break; + case BNXT_RE_WC_TYPE_TERM: + scqe = cqe; + qp_handle = (uint64_t *)&scqe->qp_handle; + qp = (struct bnxt_re_qp *) + (uintptr_t)le64toh(scqe->qp_handle); + if (!qp) + break; + pcqe = bnxt_re_poll_term_cqe(qp, &cnt); + break; + case BNXT_RE_WC_TYPE_COFF: + /* Stop further processing and return */ + bnxt_re_resize_cq_complete(cq); + if (unlikely(resize)) + *resize = 1; + return dqed; + default: + break; + }; + + if (pcqe) + goto skipp_real; + + hw_polled++; + if (qp_handle) { + *qp_handle = 0x0ULL; /* mark cqe as read */ + qp_handle = NULL; + } + bnxt_re_incr_head(cq->cqq, 1); + bnxt_re_change_cq_phase(cq); +skipp_real: + if (cnt) { + cnt = 0; + dqed++; + nwc--; + wc++; + } + } + + if (likely(hw_polled)) + bnxt_re_ring_cq_db(cq); + + return dqed; +} + +static int bnxt_re_poll_flush_wcs(struct bnxt_re_joint_queue *jqq, + struct ibv_wc *ibvwc, uint32_t qpid, + int nwc) +{ + struct bnxt_re_queue *que; + struct bnxt_re_wrid *wrid; + uint32_t cnt = 0; + + que = jqq->hwque; + while(nwc) { + if (bnxt_re_is_que_empty(que)) + break; + wrid = &jqq->swque[jqq->last_idx]; + ibvwc->status = IBV_WC_WR_FLUSH_ERR; + ibvwc->opcode = wrid->wc_opcd; + ibvwc->wr_id = wrid->wrid; + ibvwc->qp_num = qpid; + ibvwc->byte_len = 0; + ibvwc->wc_flags = 0; + + bnxt_re_jqq_mod_last(jqq, jqq->last_idx); + bnxt_re_incr_head(que, wrid->slots); + nwc--; + cnt++; + ibvwc++; + } + + return cnt; +} + +static int bnxt_re_poll_flush_wqes(struct bnxt_re_cq *cq, + struct bnxt_re_list_head *lhead, + struct ibv_wc *ibvwc, + uint32_t nwc) +{ + struct bnxt_re_list_node *cur, *tmp; + struct bnxt_re_joint_queue *jqq; + struct bnxt_re_qp *qp; + bool sq_list = false; + uint32_t polled = 0; + + sq_list = (lhead == &cq->sfhead) ? true : false; + if (!bnxt_re_list_empty(lhead)) { + list_for_each_node_safe(cur, tmp, lhead) { + if (sq_list) { + qp = list_node(cur, struct bnxt_re_qp, snode); + jqq = qp->jsqq; + } else { + qp = list_node(cur, struct bnxt_re_qp, rnode); + jqq = qp->jrqq; + if (!jqq) /* Using srq no need to flush */ + goto done; + } + + if (bnxt_re_is_que_empty(jqq->hwque)) + continue; + polled += bnxt_re_poll_flush_wcs(jqq, ibvwc + polled, + qp->qpid, nwc - polled); + if (!(nwc - polled)) + break; + } + } +done: + return polled; +} + +static int bnxt_re_poll_flush_lists(struct bnxt_re_cq *cq, uint32_t nwc, + struct ibv_wc *ibvwc) +{ + int left, polled = 0; + + polled = bnxt_re_poll_flush_wqes(cq, &cq->sfhead, ibvwc, nwc); + left = nwc - polled; + + if (!left) + return polled; + + polled += bnxt_re_poll_flush_wqes(cq, &cq->rfhead, + ibvwc + polled, left); + return polled; +} + +static int bnxt_re_poll_resize_cq_list(struct bnxt_re_cq *cq, uint32_t nwc, + struct ibv_wc *ibvwc) +{ + struct bnxt_re_list_node *cur, *tmp; + struct bnxt_re_work_compl *compl; + int left; + + left = nwc; + list_for_each_node_safe(cur, tmp, &cq->prev_cq_head) { + compl = list_node(cur, struct bnxt_re_work_compl, cnode); + if (!left) + break; + memcpy(ibvwc, &compl->wc, sizeof(*ibvwc)); + ibvwc++; + left--; + bnxt_re_list_del_node(&compl->cnode, &cq->prev_cq_head); + free(compl); + } + + return nwc - left; +} + + +int bnxt_re_poll_cq(struct ibv_cq *ibvcq, int nwc, struct ibv_wc *wc) +{ + int dqed = 0, left = 0; + struct bnxt_re_cq *cq; + uint32_t resize = 0; + + cq = container_of(ibvcq, struct bnxt_re_cq, ibvcq); + bnxt_re_dp_spin_lock(&cq->cqq->qlock); + + left = nwc; + /* Check whether we have anything to be completed from prev cq context */ + if (unlikely(!bnxt_re_list_empty(&cq->prev_cq_head))) { + dqed = bnxt_re_poll_resize_cq_list(cq, nwc, wc); + left = nwc - dqed; + if (!left) { + bnxt_re_dp_spin_unlock(&cq->cqq->qlock); + return dqed; + } + } + + dqed += bnxt_re_poll_one(cq, left, wc + dqed, &resize); + /* Check if anything is there to flush. */ + left = nwc - dqed; + if (left && (!bnxt_re_list_empty(&cq->sfhead) || + !bnxt_re_list_empty(&cq->rfhead))) + dqed += bnxt_re_poll_flush_lists(cq, left, (wc + dqed)); + bnxt_re_dp_spin_unlock(&cq->cqq->qlock); + + return dqed; +} + +void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq) +{ + struct bnxt_re_queue *que = cq->cqq; + struct bnxt_re_req_cqe *scqe; + struct bnxt_re_rc_cqe *rcqe; + struct bnxt_re_bcqe *hdr; + int indx, type; + void *cqe; + + + bnxt_re_dp_spin_lock(&que->qlock); + for(indx = 0; indx < que->depth; indx++) { + cqe = que->va + indx * bnxt_re_get_cqe_sz(); + hdr = cqe + sizeof(struct bnxt_re_req_cqe); + type = (hdr->flg_st_typ_ph >> BNXT_RE_BCQE_TYPE_SHIFT) & + BNXT_RE_BCQE_TYPE_MASK; + + if (type == BNXT_RE_WC_TYPE_COFF) + continue; + if (type == BNXT_RE_WC_TYPE_SEND || + type == BNXT_RE_WC_TYPE_TERM) { + scqe = cqe; + if (scqe->qp_handle == (uint64_t)qp) + scqe->qp_handle = 0ULL; + } else { + rcqe = cqe; + if (rcqe->qp_handle == (uint64_t)qp) + rcqe->qp_handle = 0ULL; + } + + } + + if (_is_db_drop_recovery_enable(cq->cntx)) { + pthread_spin_lock(&cq->cntx->cq_dbr_res.lock); + bnxt_re_list_del_node(&cq->dbnode, &cq->cntx->cq_dbr_res.head); + pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock); + } + bnxt_re_list_del_node(&qp->snode, &cq->sfhead); + bnxt_re_list_del_node(&qp->rnode, &cq->rfhead); + bnxt_re_dp_spin_unlock(&que->qlock); +} + +void bnxt_re_cq_event(struct ibv_cq *ibvcq) +{ + +} + +int bnxt_re_arm_cq(struct ibv_cq *ibvcq, int flags) +{ + struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq); + + bnxt_re_dp_spin_lock(&cq->cqq->qlock); + flags = !flags ? BNXT_RE_QUE_TYPE_CQ_ARMALL : + BNXT_RE_QUE_TYPE_CQ_ARMSE; + + bnxt_re_ring_cq_arm_db(cq, flags); + bnxt_re_dp_spin_unlock(&cq->cqq->qlock); + + return 0; +} + +static int bnxt_re_check_qp_limits(struct bnxt_re_context *cntx, + struct ibv_qp_init_attr *attr) +{ + struct ibv_device_attr *devattr; + struct bnxt_re_dev *rdev; + + rdev = cntx->rdev; + devattr = &rdev->devattr; + if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) + return EINVAL; + if (attr->cap.max_send_sge > devattr->max_sge) + return EINVAL; + if (attr->cap.max_recv_sge > devattr->max_sge) + return EINVAL; + if (cntx->modes & BNXT_RE_WQE_MODE_VARIABLE) { + if (attr->cap.max_inline_data > BNXT_RE_MAX_INLINE_SIZE_VAR_WQE) + return -EINVAL; + } else if (attr->cap.max_inline_data > BNXT_RE_MAX_INLINE_SIZE) { + return EINVAL; + } + if (attr->cap.max_send_wr > devattr->max_qp_wr) + attr->cap.max_send_wr = devattr->max_qp_wr; + if (attr->cap.max_recv_wr > devattr->max_qp_wr) + attr->cap.max_recv_wr = devattr->max_qp_wr; + + return 0; +} + +static int bnxt_re_get_rq_slots(struct bnxt_re_dev *rdev, uint8_t qpmode, + uint32_t nrwr, uint32_t nsge, uint32_t *esz) +{ + uint32_t max_wqesz; + uint32_t wqe_size; + uint32_t stride; + uint32_t slots; + + stride = sizeof(struct bnxt_re_sge); + max_wqesz = bnxt_re_calc_wqe_sz(rdev->devattr.max_sge); + + wqe_size = bnxt_re_calc_wqe_sz(nsge); + if (wqe_size > max_wqesz) + return -EINVAL; + + if (qpmode == BNXT_RE_WQE_MODE_STATIC) + wqe_size = bnxt_re_calc_wqe_sz(6); + + if (esz) + *esz = wqe_size; + + slots = (nrwr * wqe_size) / stride; + return slots; +} + +static int bnxt_re_get_sq_slots(struct bnxt_re_dev *rdev, + uint8_t qpmode, uint32_t nswr, + uint32_t nsge, uint32_t ils, uint32_t *esize) +{ + uint32_t max_wqesz; + uint32_t wqe_size; + uint32_t cal_ils; + uint32_t stride; + uint32_t ilsize; + uint32_t hdr_sz; + uint32_t slots; + + hdr_sz = bnxt_re_get_sqe_hdr_sz(); + stride = sizeof(struct bnxt_re_sge); + max_wqesz = bnxt_re_calc_wqe_sz(rdev->devattr.max_sge); + ilsize = get_aligned(ils, hdr_sz); + + wqe_size = bnxt_re_calc_wqe_sz(nsge); + if (ilsize) { + cal_ils = hdr_sz + ilsize; + wqe_size = MAX(cal_ils, wqe_size); + wqe_size = get_aligned(wqe_size, hdr_sz); + } + if (wqe_size > max_wqesz) + return -EINVAL; + + if (qpmode == BNXT_RE_WQE_MODE_STATIC) + wqe_size = bnxt_re_calc_wqe_sz(6); + + if (esize) + *esize = wqe_size; + slots = (nswr * wqe_size) / stride; + return slots; +} + +static int bnxt_re_get_sqmem_size(struct bnxt_re_context *cntx, + struct ibv_qp_init_attr *attr, + struct bnxt_re_qattr *qattr) +{ + uint32_t nsge, nswr, diff = 0; + size_t bytes = 0; + uint32_t npsn; + uint32_t ils; + uint8_t mode; + uint32_t esz; + int nslots; + + mode = cntx->modes & BNXT_RE_WQE_MODE_VARIABLE; + nsge = attr->cap.max_send_sge; + diff = bnxt_re_get_diff(cntx->comp_mask); + nswr = attr->cap.max_send_wr + 1 + diff; + nswr = bnxt_re_init_depth(nswr, cntx->comp_mask); + ils = attr->cap.max_inline_data; + nslots = bnxt_re_get_sq_slots(cntx->rdev, mode, nswr, + nsge, ils, &esz); + if (nslots < 0) + return nslots; + npsn = bnxt_re_get_npsn(mode, nswr, nslots); + if (BNXT_RE_HW_RETX(cntx)) + npsn = roundup_pow_of_two(npsn); + + qattr->nwr = nswr; + qattr->slots = nslots; + qattr->esize = esz; + + bytes = nslots * sizeof(struct bnxt_re_sge); /* ring */ + bytes += npsn * bnxt_re_get_psne_size(cntx); /* psn */ + qattr->sz_ring = get_aligned(bytes, cntx->rdev->pg_size); + qattr->sz_shad = nswr * sizeof(struct bnxt_re_wrid); /* shadow */ + return 0; +} + +static int bnxt_re_get_rqmem_size(struct bnxt_re_context *cntx, + struct ibv_qp_init_attr *attr, + struct bnxt_re_qattr *qattr) +{ + uint32_t nrwr, nsge; + size_t bytes = 0; + uint32_t esz; + int nslots; + + nsge = attr->cap.max_recv_sge; + nrwr = attr->cap.max_recv_wr + 1; + nrwr = bnxt_re_init_depth(nrwr, cntx->comp_mask); + nslots = bnxt_re_get_rq_slots(cntx->rdev, cntx->modes, + nrwr, nsge, &esz); + if (nslots < 0) + return nslots; + qattr->nwr = nrwr; + qattr->slots = nslots; + qattr->esize = esz; + + bytes = nslots * sizeof(struct bnxt_re_sge); + qattr->sz_ring = get_aligned(bytes, cntx->rdev->pg_size); + qattr->sz_shad = nrwr * sizeof(struct bnxt_re_wrid); + return 0; +} + +static int bnxt_re_get_qpmem_size(struct bnxt_re_context *cntx, + struct ibv_qp_init_attr *attr, + struct bnxt_re_qattr *qattr) +{ + int size = 0; + int tmp; + int rc; + + size = sizeof(struct bnxt_re_qp); + tmp = sizeof(struct bnxt_re_joint_queue); + tmp += sizeof(struct bnxt_re_queue); + size += tmp; + + rc = bnxt_re_get_sqmem_size(cntx, attr, &qattr[BNXT_RE_QATTR_SQ_INDX]); + if (rc < 0) + return -EINVAL; + size += qattr[BNXT_RE_QATTR_SQ_INDX].sz_ring; + size += qattr[BNXT_RE_QATTR_SQ_INDX].sz_shad; + + if (!attr->srq) { + tmp = sizeof(struct bnxt_re_joint_queue); + tmp += sizeof(struct bnxt_re_queue); + size += tmp; + rc = bnxt_re_get_rqmem_size(cntx, attr, + &qattr[BNXT_RE_QATTR_RQ_INDX]); + if (rc < 0) + return -EINVAL; + size += qattr[BNXT_RE_QATTR_RQ_INDX].sz_ring; + size += qattr[BNXT_RE_QATTR_RQ_INDX].sz_shad; + } + return size; +} + +static void *bnxt_re_alloc_qpslab(struct bnxt_re_context *cntx, + struct ibv_qp_init_attr *attr, + struct bnxt_re_qattr *qattr) +{ + int bytes; + + bytes = bnxt_re_get_qpmem_size(cntx, attr, qattr); + if (bytes < 0) + return NULL; + return bnxt_re_alloc_mem(bytes, cntx->rdev->pg_size); +} + +static int bnxt_re_alloc_queue_ptr(struct bnxt_re_qp *qp, + struct ibv_qp_init_attr *attr) +{ + int rc = -ENOMEM; + int jqsz, qsz; + + jqsz = sizeof(struct bnxt_re_joint_queue); + qsz = sizeof(struct bnxt_re_queue); + qp->jsqq = bnxt_re_get_obj(qp->mem, jqsz); + if (!qp->jsqq) + return rc; + qp->jsqq->hwque = bnxt_re_get_obj(qp->mem, qsz); + if (!qp->jsqq->hwque) + goto fail; + + if (!attr->srq) { + qp->jrqq = bnxt_re_get_obj(qp->mem, jqsz); + if (!qp->jrqq) + goto fail; + qp->jrqq->hwque = bnxt_re_get_obj(qp->mem, qsz); + if (!qp->jrqq->hwque) + goto fail; + } + + return 0; +fail: + return rc; +} + +static int bnxt_re_alloc_init_swque(struct bnxt_re_joint_queue *jqq, + struct bnxt_re_mem *mem, + struct bnxt_re_qattr *qattr) +{ + int indx; + + jqq->swque = bnxt_re_get_obj(mem, qattr->sz_shad); + if (!jqq->swque) + return -ENOMEM; + jqq->start_idx = 0; + jqq->last_idx = qattr->nwr - 1; + for (indx = 0; indx < qattr->nwr; indx++) + jqq->swque[indx].next_idx = indx + 1; + jqq->swque[jqq->last_idx].next_idx = 0; + jqq->last_idx = 0; + + return 0; +} + +static inline int bnxt_log2(int n) +{ + int t; + + if (n <= 0) + return -1; + + t = 0; + while ((1 << t) < n) + ++t; + + return t; +} + +static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp, + struct ibv_qp_init_attr *attr, + struct bnxt_re_qattr *qattr) +{ + struct bnxt_re_context *cntx; + struct bnxt_re_queue *que; + uint32_t psn_size; + uint8_t indx; + int ret; + + cntx = qp->cntx; + + indx = BNXT_RE_QATTR_SQ_INDX; + que = qp->jsqq->hwque; + que->stride = sizeof(struct bnxt_re_sge); + que->depth = qattr[indx].slots; + que->diff = (bnxt_re_get_diff(cntx->comp_mask) * qattr[indx].esize) / + que->stride; + que->va = bnxt_re_get_ring(qp->mem, qattr[indx].sz_ring); + if (!que->va) + return -ENOMEM; + /* PSN-search memory is allocated without checking for + * QP-Type. Kernel driver do not map this memory if it + * is UD-qp. UD-qp use this memory to maintain WC-opcode. + * See definition of bnxt_re_fill_psns() for the use case. + */ + que->pad = (que->va + que->depth * que->stride); + psn_size = bnxt_re_get_psne_size(qp->cntx); + que->pad_stride_log2 = (uint32_t)bnxt_log2((double)psn_size); + + ret = bnxt_re_alloc_init_swque(qp->jsqq, qp->mem, &qattr[indx]); + if (ret) + goto fail; + + qp->cap.max_swr = qattr[indx].nwr; + qp->jsqq->cntx = qp->cntx; + que->dbtail = (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE) ? + &que->tail : &qp->jsqq->start_idx; + + /* Init and adjust MSN table size according to qp mode */ + if (!BNXT_RE_HW_RETX(qp->cntx)) + goto skip_msn; + que->msn = 0; + que->msn_tbl_sz = 0; + if (qp->qpmode & BNXT_RE_WQE_MODE_VARIABLE) + que->msn_tbl_sz = roundup_pow_of_two(qattr->slots) / 2; + else + que->msn_tbl_sz = roundup_pow_of_two(qattr->nwr); +skip_msn: + bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded); + + if (qp->jrqq) { + indx = BNXT_RE_QATTR_RQ_INDX; + que = qp->jrqq->hwque; + que->stride = sizeof(struct bnxt_re_sge); + que->depth = qattr[indx].slots; + que->max_slots = qattr[indx].esize / que->stride; + que->dbtail = &qp->jrqq->start_idx; + que->va = bnxt_re_get_ring(qp->mem, qattr[indx].sz_ring); + if (!que->va) + return -ENOMEM; + /* For RQ only bnxt_re_wri.wrid is used. */ + ret = bnxt_re_alloc_init_swque(qp->jrqq, qp->mem, &qattr[indx]); + if (ret) + goto fail; + + bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded); + qp->cap.max_rwr = qattr[indx].nwr; + qp->jrqq->cntx = qp->cntx; + } + + return 0; +fail: + return ret; +} + +void bnxt_re_async_event(struct ibv_async_event *event) +{ + struct ibv_qp *ibvqp; + struct bnxt_re_qp *qp; + + switch (event->event_type) { + case IBV_EVENT_CQ_ERR: + break; + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_PATH_MIG_ERR: { + ibvqp = event->element.qp; + qp = to_bnxt_re_qp(ibvqp); + bnxt_re_qp_move_flush_err(qp); + break; + } + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_COMM_EST: + case IBV_EVENT_QP_LAST_WQE_REACHED: + case IBV_EVENT_SRQ_LIMIT_REACHED: + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_PORT_ERR: + default: + break; + } +} + +struct ibv_qp *bnxt_re_create_qp(struct ibv_pd *ibvpd, + struct ibv_qp_init_attr *attr) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvpd->context); + struct bnxt_re_qp_resp resp = {}; + struct ibv_device_attr *devattr; + struct bnxt_re_qp_req req = {}; + struct bnxt_re_qattr qattr[2]; + struct bnxt_re_qpcap *cap; + struct bnxt_re_dev *rdev; + struct bnxt_re_qp *qp; + void *mem; + + if (bnxt_re_check_qp_limits(cntx, attr)) + return NULL; + + memset(qattr, 0, (2 * sizeof(*qattr))); + mem = bnxt_re_alloc_qpslab(cntx, attr, qattr); + if (!mem) + return NULL; + qp = bnxt_re_get_obj(mem, sizeof(*qp)); + if (!qp) + goto fail; + qp->mem = mem; + + qp->cctx = cntx->cctx; + + qp->cntx = cntx; + qp->qpmode = cntx->modes & BNXT_RE_WQE_MODE_VARIABLE; + /* alloc queue pointers */ + if (bnxt_re_alloc_queue_ptr(qp, attr)) + goto fail; + /* alloc queues */ + if (bnxt_re_alloc_queues(qp, attr, qattr)) + goto fail; + /* Fill ibv_cmd */ + cap = &qp->cap; + req.qpsva = (uint64_t)qp->jsqq->hwque->va; + req.qprva = qp->jrqq ? (uint64_t)qp->jrqq->hwque->va : 0; + req.qp_handle = (uint64_t)qp; + + if (ibv_cmd_create_qp(ibvpd, &qp->ibvqp, attr, &req.cmd, sizeof(req), + &resp.resp, sizeof(resp))) + goto fail; + + qp->qpid = resp.qpid; + qp->qptyp = attr->qp_type; + qp->qpst = IBV_QPS_RESET; + qp->scq = to_bnxt_re_cq(attr->send_cq); + qp->rcq = to_bnxt_re_cq(attr->recv_cq); + if (attr->srq) + qp->srq = to_bnxt_re_srq(attr->srq); + qp->udpi = &cntx->udpi; + qp->rand.seed = qp->qpid; + qp->sq_shadow_db_key = BNXT_RE_DB_KEY_INVALID; + qp->rq_shadow_db_key = BNXT_RE_DB_KEY_INVALID; + qp->sq_msn = 0; + + rdev = cntx->rdev; + devattr = &rdev->devattr; + cap->max_ssge = attr->cap.max_send_sge; + cap->max_rsge = attr->cap.max_recv_sge; + cap->max_inline = attr->cap.max_inline_data; + cap->sqsig = attr->sq_sig_all; + cap->is_atomic_cap = devattr->atomic_cap; + INIT_DBLY_LIST_NODE(&qp->snode); + INIT_DBLY_LIST_NODE(&qp->rnode); + INIT_DBLY_LIST_NODE(&qp->dbnode); + + /* For SR2, push will be negotiated at modify qp */ + if (_is_chip_gen_p5(qp->cctx) && cntx->udpi.wcdpi) { + qp->push_st_en = 1; + qp->max_push_sz = BNXT_RE_MAX_INLINE_SIZE; + } + + if (_is_db_drop_recovery_enable(cntx)) { + pthread_spin_lock(&cntx->qp_dbr_res.lock); + bnxt_re_list_add_node(&qp->dbnode, &cntx->qp_dbr_res.head); + pthread_spin_unlock(&cntx->qp_dbr_res.lock); + } + return &qp->ibvqp; +fail: + bnxt_re_free_mem(mem); + return NULL; +} + +int bnxt_re_modify_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr, + int attr_mask) +{ + struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); + int rc; + + struct bnxt_re_modify_ex_resp resp = {}; + struct bnxt_re_modify_ex_req req = {}; + bool can_issue_mqp_ex = false; + + if (bnxt_re_is_mqp_ex_supported(qp->cntx)) { + can_issue_mqp_ex = true; + /* Request for PPP */ + if (can_request_ppp(qp, attr, attr_mask)) { + req.comp_mask |= BNXT_RE_MQP_PPP_REQ_EN; + req.dpi = qp->udpi->wcdpi; + } + if (attr_mask & IBV_QP_PATH_MTU) + req.comp_mask |= BNXT_RE_MQP_PATH_MTU_MASK; + } + rc = ibv_cmd_modify_qp_compat(ibvqp, attr, attr_mask, + can_issue_mqp_ex, &req, &resp); + if (!rc) { + if (attr_mask & IBV_QP_STATE) { + qp->qpst = attr->qp_state; + /* transition to reset */ + if (qp->qpst == IBV_QPS_RESET) { + qp->jsqq->hwque->head = 0; + qp->jsqq->hwque->tail = 0; + *qp->jsqq->hwque->dbtail = 0; + qp->jsqq->start_idx = 0; + qp->jsqq->last_idx = 0; + bnxt_re_cleanup_cq(qp, qp->scq); + if (qp->jrqq) { + qp->jrqq->hwque->head = 0; + qp->jrqq->hwque->tail = 0; + *qp->jrqq->hwque->dbtail = 0; + qp->jrqq->start_idx = 0; + qp->jrqq->last_idx = 0; + bnxt_re_cleanup_cq(qp, qp->rcq); + } + } + /* Copy if PUSH was enabled */ + if (resp.comp_mask & BNXT_RE_MQP_PPP_REQ_EN_MASK) { + qp->push_st_en = BNXT_RE_MQP_PPP_REQ_EN; + /* Set the next posting state + * based on current h/w state + */ + qp->push_st_en |= + !(!!(resp.ppp_st_idx & + BNXT_RE_MQP_PPP_STATE)) << + BNXT_RE_PPP_ST_SHIFT; + qp->ppp_idx = + (resp.ppp_st_idx & + BNXT_RE_MQP_PPP_IDX_MASK); + if (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE) + qp->max_push_sz = + BNXT_RE_MAX_PUSH_SIZE_VAR_WQE; + else + qp->max_push_sz = + BNXT_RE_MAX_INLINE_SIZE; + } + } + + if (attr_mask & IBV_QP_SQ_PSN) + qp->sq_psn = attr->sq_psn; + + if (resp.comp_mask & BNXT_RE_MQP_PATH_MTU_MASK) + qp->mtu = resp.path_mtu; + else if (attr_mask & IBV_QP_PATH_MTU) + qp->mtu = (0x80 << attr->path_mtu); + } + + return rc; +} + +int bnxt_re_query_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr, + int attr_mask, struct ibv_qp_init_attr *init_attr) +{ + struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); + struct ibv_query_qp cmd = {}; + int rc; + + rc = ibv_cmd_query_qp(ibvqp, attr, attr_mask, init_attr, + &cmd, sizeof(cmd)); + if (!rc) + qp->qpst = ibvqp->state; + + return rc; +} + +int bnxt_re_destroy_qp(struct ibv_qp *ibvqp) +{ + struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); + struct bnxt_re_mem *mem; + int status; + + qp->qpst = IBV_QPS_RESET; + if (_is_db_drop_recovery_enable(qp->cntx)) { + pthread_spin_lock(&qp->cntx->qp_dbr_res.lock); + bnxt_re_list_del_node(&qp->dbnode, &qp->cntx->qp_dbr_res.head); + pthread_spin_unlock(&qp->cntx->qp_dbr_res.lock); + } + status = ibv_cmd_destroy_qp(ibvqp); + if (status) { + if (_is_db_drop_recovery_enable(qp->cntx)) { + pthread_spin_lock(&qp->cntx->qp_dbr_res.lock); + bnxt_re_list_add_node(&qp->dbnode, + &qp->cntx->qp_dbr_res.head); + pthread_spin_unlock(&qp->cntx->qp_dbr_res.lock); + } + return status; + } + bnxt_re_cleanup_cq(qp, qp->rcq); + bnxt_re_cleanup_cq(qp, qp->scq); + mem = qp->mem; + bnxt_re_free_mem(mem); + return 0; +} + +static void bnxt_re_put_rx_sge(struct bnxt_re_queue *que, uint32_t *idx, + struct ibv_sge *sgl, int nsg) +{ + struct bnxt_re_sge *sge; + int indx; + + for (indx = 0; indx < nsg; indx++) { + sge = bnxt_re_get_hwqe(que, (*idx)++); + sge->pa = htole64(sgl[indx].addr); + sge->lkey = htole32(sgl[indx].lkey); + sge->length = htole32(sgl[indx].length); + } +} + +static int bnxt_re_put_tx_sge(struct bnxt_re_queue *que, uint32_t *idx, + struct ibv_sge *sgl, int nsg) +{ + struct bnxt_re_sge *sge; + int indx; + int len; + + len = 0; + for (indx = 0; indx < nsg; indx++) { + sge = bnxt_re_get_hwqe(que, (*idx)++); + sge->pa = htole64(sgl[indx].addr); + sge->lkey = htole32(sgl[indx].lkey); + sge->length = htole32(sgl[indx].length); + len += sgl[indx].length; + } + return len; +} + +static inline int bnxt_re_calc_inline_len(struct ibv_send_wr *swr) +{ + int illen, indx; + + illen = 0; + for (indx = 0; indx < swr->num_sge; indx++) + illen += swr->sg_list[indx].length; + return get_aligned(illen, sizeof(struct bnxt_re_sge)); +} + +static int bnxt_re_put_inline(struct bnxt_re_queue *que, uint32_t *idx, + struct bnxt_re_push_buffer *pbuf, + struct ibv_sge *sgl, uint32_t nsg, + uint16_t max_ils) +{ + int len, t_len, offt = 0; + int t_cplen = 0, cplen; + bool pull_dst = true; + void *il_dst = NULL; + void *il_src = NULL; + int alsize; + int indx; + + alsize = sizeof(struct bnxt_re_sge); + + t_len = 0; + for (indx = 0; indx < nsg; indx++) { + len = sgl[indx].length; + il_src = (void *)sgl[indx].addr; + t_len += len; + if (t_len > max_ils) + goto bad; + while (len) { + if (pull_dst) { + pull_dst = false; + il_dst = bnxt_re_get_hwqe(que, (*idx)++); + if (pbuf) + pbuf->wqe[*idx - 1] = + (__u64)il_dst; + t_cplen = 0; + offt = 0; + } + cplen = MIN(len, alsize); + cplen = MIN(cplen,(alsize - offt)); + memcpy(il_dst, il_src, cplen); + t_cplen += cplen; + il_src += cplen; + il_dst += cplen; + offt += cplen; + len -= cplen; + if (t_cplen == alsize) + pull_dst = true; + } + } + + return t_len; +bad: + return -ENOMEM; +} + +static int bnxt_re_required_slots(struct bnxt_re_qp *qp, struct ibv_send_wr *wr, + uint32_t *wqe_sz, void **pbuf) +{ + uint32_t wqe_byte; + int ilsize; + + if (wr->send_flags & IBV_SEND_INLINE) { + ilsize = bnxt_re_calc_inline_len(wr); + if (ilsize > qp->cap.max_inline) + return -EINVAL; + if (qp->push_st_en && ilsize <= qp->max_push_sz) + *pbuf = bnxt_re_get_pbuf(&qp->push_st_en, qp->ppp_idx, qp->cntx); + wqe_byte = (ilsize + bnxt_re_get_sqe_hdr_sz()); + } else { + wqe_byte = bnxt_re_calc_wqe_sz(wr->num_sge); + } + + /* que->stride is always 2^4 = 16, thus using hard-coding */ + *wqe_sz = wqe_byte >> 4; + if (qp->qpmode == BNXT_RE_WQE_MODE_STATIC) + return 8; + return *wqe_sz; +} + +static inline void bnxt_re_set_hdr_flags(struct bnxt_re_bsqe *hdr, + struct ibv_send_wr *wr, + uint32_t slots, uint8_t sqsig) +{ + uint32_t send_flags; + uint32_t hdrval = 0; + uint8_t opcd; + + send_flags = wr->send_flags; + if (send_flags & IBV_SEND_SIGNALED || sqsig) + hdrval |= ((BNXT_RE_WR_FLAGS_SIGNALED & BNXT_RE_HDR_FLAGS_MASK) + << BNXT_RE_HDR_FLAGS_SHIFT); + if (send_flags & IBV_SEND_FENCE) + hdrval |= ((BNXT_RE_WR_FLAGS_UC_FENCE & BNXT_RE_HDR_FLAGS_MASK) + << BNXT_RE_HDR_FLAGS_SHIFT); + if (send_flags & IBV_SEND_SOLICITED) + hdrval |= ((BNXT_RE_WR_FLAGS_SE & BNXT_RE_HDR_FLAGS_MASK) + << BNXT_RE_HDR_FLAGS_SHIFT); + if (send_flags & IBV_SEND_INLINE) + hdrval |= ((BNXT_RE_WR_FLAGS_INLINE & BNXT_RE_HDR_FLAGS_MASK) + << BNXT_RE_HDR_FLAGS_SHIFT); + hdrval |= (slots & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT; + + /* Fill opcode */ + opcd = ibv_to_bnxt_re_wr_opcd[wr->opcode]; + hdrval |= (opcd & BNXT_RE_HDR_WT_MASK); + hdr->rsv_ws_fl_wt = htole32(hdrval); +} + +static int bnxt_re_build_tx_sge(struct bnxt_re_queue *que, uint32_t *idx, + struct bnxt_re_push_buffer *pbuf, + struct ibv_send_wr *wr, + uint16_t max_il) +{ + if (wr->send_flags & IBV_SEND_INLINE) + return bnxt_re_put_inline(que, idx, pbuf, wr->sg_list, wr->num_sge, max_il); + + return bnxt_re_put_tx_sge(que, idx, wr->sg_list, wr->num_sge); +} + +static void *bnxt_re_pull_psn_buff(struct bnxt_re_queue *que, bool hw_retx) +{ + if (hw_retx) + return (void *)(que->pad + ((que->msn) << que->pad_stride_log2)); + return (void *)(que->pad + ((*que->dbtail) << que->pad_stride_log2)); +} + +static void bnxt_re_fill_psns_for_msntbl(struct bnxt_re_qp *qp, uint32_t len, + uint32_t st_idx, uint8_t opcode) +{ + uint32_t npsn = 0, start_psn = 0, next_psn = 0; + struct bnxt_re_msns *msns; + uint32_t pkt_cnt = 0; + + msns = bnxt_re_pull_psn_buff(qp->jsqq->hwque, true); + msns->start_idx_next_psn_start_psn = 0; + + if (qp->qptyp == IBV_QPT_RC) { + start_psn = qp->sq_psn; + pkt_cnt = (len / qp->mtu); + if (len % qp->mtu) + pkt_cnt++; + /* Increment the psn even for 0 len packets + * e.g. for opcode rdma-write-with-imm-data + * with length field = 0 + */ + if (bnxt_re_is_zero_len_pkt(len, opcode)) + pkt_cnt = 1; + /* make it 24 bit */ + next_psn = qp->sq_psn + pkt_cnt; + npsn = next_psn; + qp->sq_psn = next_psn; + msns->start_idx_next_psn_start_psn |= + bnxt_re_update_msn_tbl(st_idx, npsn, start_psn); + qp->jsqq->hwque->msn++; + qp->jsqq->hwque->msn %= qp->jsqq->hwque->msn_tbl_sz; + } +} + +static void bnxt_re_fill_psns(struct bnxt_re_qp *qp, uint32_t len, + uint32_t st_idx, uint8_t opcode) +{ + uint32_t opc_spsn = 0, flg_npsn = 0; + struct bnxt_re_psns_ext *psns_ext; + uint32_t pkt_cnt = 0, nxt_psn = 0; + struct bnxt_re_psns *psns; + + psns = bnxt_re_pull_psn_buff(qp->jsqq->hwque, false); + psns_ext = (struct bnxt_re_psns_ext *)psns; + + if (qp->qptyp == IBV_QPT_RC) { + opc_spsn = qp->sq_psn & BNXT_RE_PSNS_SPSN_MASK; + pkt_cnt = (len / qp->mtu); + if (len % qp->mtu) + pkt_cnt++; + /* Increment the psn even for 0 len packets + * e.g. for opcode rdma-write-with-imm-data + * with length field = 0 + */ + if (bnxt_re_is_zero_len_pkt(len, opcode)) + pkt_cnt = 1; + nxt_psn = ((qp->sq_psn + pkt_cnt) & BNXT_RE_PSNS_NPSN_MASK); + flg_npsn = nxt_psn; + qp->sq_psn = nxt_psn; + } + psns->opc_spsn = htole32(opc_spsn); + psns->flg_npsn = htole32(flg_npsn); + /* Update for Thor p5 not Thor2 */ + if (!BNXT_RE_HW_RETX(qp->cntx) && qp->cctx->chip_is_gen_p5_thor2) + psns_ext->st_slot_idx = st_idx; +} + +static int bnxt_re_build_ud_sqe(struct ibv_send_wr *wr, + struct bnxt_re_bsqe *hdr, + struct bnxt_re_send *sqe) +{ + struct bnxt_re_ah *ah; + uint64_t qkey; + + ah = to_bnxt_re_ah(wr->wr.ud.ah); + if (!wr->wr.ud.ah) + return -EINVAL; + qkey = wr->wr.ud.remote_qkey; + hdr->lhdr.qkey_len |= htole64(qkey << 32); + sqe->dst_qp = htole32(wr->wr.ud.remote_qpn); + sqe->avid = htole32(ah->avid & 0xFFFFF); + + return 0; +} + +static void bnxt_re_build_cns_sqe(struct ibv_send_wr *wr, + struct bnxt_re_bsqe *hdr, + void *hdr2) +{ + struct bnxt_re_atomic *sqe = hdr2; + + hdr->key_immd = htole32(wr->wr.atomic.rkey); + hdr->lhdr.rva = htole64(wr->wr.atomic.remote_addr); + sqe->cmp_dt = htole64(wr->wr.atomic.compare_add); + sqe->swp_dt = htole64(wr->wr.atomic.swap); +} + +static void bnxt_re_build_fna_sqe(struct ibv_send_wr *wr, + struct bnxt_re_bsqe *hdr, + void *hdr2) +{ + struct bnxt_re_atomic *sqe = hdr2; + + hdr->key_immd = htole32(wr->wr.atomic.rkey); + hdr->lhdr.rva = htole64(wr->wr.atomic.remote_addr); + sqe->swp_dt = htole64(wr->wr.atomic.compare_add); +} + +void bnxt_re_force_rts2rts(struct bnxt_re_qp *qp) +{ + struct ibv_qp_attr attr = {}; + int attr_mask; + attr_mask = IBV_QP_STATE; + attr.qp_state = IBV_QPS_RTS; + bnxt_re_modify_qp(&qp->ibvqp, &attr, attr_mask); + qp->wqe_cnt = 0; +} + +int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad) +{ + struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); + struct bnxt_re_queue *sq = qp->jsqq->hwque; + struct bnxt_re_push_buffer *pbuf = NULL; + bool chip_is_not_gen_p5_thor2; + int slots, ret = 0, len = 0; + uint32_t swq_idx, wqe_size; + struct bnxt_re_wrid *wrid; + struct bnxt_re_rdma *rsqe; + struct bnxt_re_bsqe *hdr; + struct bnxt_re_send *sqe; + bool ring_db = false; + uint32_t idx; + + bnxt_re_dp_spin_lock(&sq->qlock); + chip_is_not_gen_p5_thor2 = !qp->cctx->chip_is_gen_p5_thor2; + while (wr) { + slots = bnxt_re_required_slots(qp, wr, &wqe_size, (void **)&pbuf); + if (unlikely(slots < 0 || bnxt_re_is_que_full(sq, slots)) || + wr->num_sge > qp->cap.max_ssge) { + *bad = wr; + ret = ENOMEM; + goto bad_wr; + } + if ((wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD) && + !qp->cap.is_atomic_cap) { + *bad = wr; + ret = EINVAL; + goto bad_wr; + } + idx = 0; + len = 0; + hdr = bnxt_re_get_hwqe(sq, idx++); + sqe = bnxt_re_get_hwqe(sq, idx++); + /* populate push buffer */ + if (pbuf) { + pbuf->qpid = qp->qpid; + pbuf->wqe[0] = (__u64)hdr; + pbuf->wqe[1] = (__u64)sqe; + pbuf->st_idx = *sq->dbtail; + } + if (wr->num_sge) { + len = bnxt_re_build_tx_sge(sq, &idx, pbuf, wr, qp->cap.max_inline); + if (unlikely(len < 0)) { + ret = ENOMEM; + *bad = wr; + goto bad_wr; + } + } + hdr->lhdr.qkey_len = htole32(len); + bnxt_re_set_hdr_flags(hdr, wr, wqe_size, qp->cap.sqsig); + switch (wr->opcode) { + case IBV_WR_SEND_WITH_IMM: + /* HW is swapping the immediate data before + * sending it out on the wire. To workaround + * this, swap the imm_data value as sent by + * the application so that the value going out + * on the wire is in big-endian format. + */ + hdr->key_immd = htole32(be32toh(wr->imm_data)); + if (qp->qptyp == IBV_QPT_UD) { + if (chip_is_not_gen_p5_thor2 && + qp->wqe_cnt == BNXT_RE_UD_QP_STALL) + bnxt_re_force_rts2rts(qp); + + len = bnxt_re_build_ud_sqe(wr, hdr, sqe); + } + break; + case IBV_WR_SEND: + if (qp->qptyp == IBV_QPT_UD) { + if (chip_is_not_gen_p5_thor2 && + qp->wqe_cnt == BNXT_RE_UD_QP_STALL) + bnxt_re_force_rts2rts(qp); + + len = bnxt_re_build_ud_sqe(wr, hdr, sqe); + } + break; + case IBV_WR_RDMA_WRITE_WITH_IMM: + hdr->key_immd = htole32(be32toh(wr->imm_data)); + case IBV_WR_RDMA_WRITE: + case IBV_WR_RDMA_READ: + rsqe = (struct bnxt_re_rdma *)sqe; + rsqe->rva = htole64(wr->wr.rdma.remote_addr); + rsqe->rkey = htole32(wr->wr.rdma.rkey); + break; + case IBV_WR_ATOMIC_CMP_AND_SWP: + bnxt_re_build_cns_sqe(wr, hdr, sqe); + break; + case IBV_WR_ATOMIC_FETCH_AND_ADD: + bnxt_re_build_fna_sqe(wr, hdr, sqe); + break; + default : + len = -EINVAL; + break; + } + + if (unlikely(len < 0)) { + ret = (len == -EINVAL) ? EINVAL : ENOMEM; + *bad = wr; + break; + } + if (BNXT_RE_HW_RETX(qp->cntx)) + bnxt_re_fill_psns_for_msntbl(qp, len, *sq->dbtail, wr->opcode); + else + bnxt_re_fill_psns(qp, len, *sq->dbtail, wr->opcode); + + wrid = bnxt_re_get_swqe(qp->jsqq, &swq_idx); + wrid->wrid = wr->wr_id; + wrid->bytes = len; + wrid->slots = slots; + wrid->sig = (wr->send_flags & IBV_SEND_SIGNALED || qp->cap.sqsig) ? + IBV_SEND_SIGNALED : 0; + wrid->wc_opcd = ibv_wr_to_wc_opcd[wr->opcode]; + + bnxt_re_incr_tail(sq, slots); + bnxt_re_jqq_mod_start(qp->jsqq, swq_idx); + ring_db = true; + if (pbuf) { + ring_db = false; + pbuf->tail = *sq->dbtail; + if (_is_chip_thor2(qp->cctx)) { + /* WA for SR2 A0, ring additional db */ + ring_db |= _is_chip_a0(qp->cctx); + bnxt_re_fill_ppp(pbuf, qp, len, idx); + } else { + bnxt_re_fill_push_wcb(qp, pbuf, idx); + } + + bnxt_re_put_pbuf(qp->cntx, pbuf); + pbuf = NULL; + } + qp->wqe_cnt++; + qp->sq_msn++; + wr = wr->next; + } + +bad_wr: + if (ring_db) + bnxt_re_ring_sq_db(qp); + + if (pbuf) + bnxt_re_put_pbuf(qp->cntx, pbuf); + + bnxt_re_dp_spin_unlock(&sq->qlock); + return ret; +} + +int bnxt_re_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad) +{ + struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); + struct bnxt_re_queue *rq = qp->jrqq->hwque; + struct bnxt_re_wrid *swque; + struct bnxt_re_brqe *hdr; + struct bnxt_re_sge *sge; + bool ring_db = false; + uint32_t swq_idx; + uint32_t hdrval; + uint32_t idx; + int rc = 0; + + bnxt_re_dp_spin_lock(&rq->qlock); + while (wr) { + if (unlikely(bnxt_re_is_que_full(rq, rq->max_slots) || + wr->num_sge > qp->cap.max_rsge)) { + *bad = wr; + rc = ENOMEM; + break; + } + swque = bnxt_re_get_swqe(qp->jrqq, &swq_idx); + + /* + * Initialize idx to 2 since the length of header wqe is 32 bytes + * i.e. sizeof(struct bnxt_re_brqe) + sizeof(struct bnxt_re_send) + */ + idx = 2; + hdr = bnxt_re_get_hwqe_hdr(rq); + + if (!wr->num_sge) { + /* + * HW needs at least one SGE for RQ Entries. + * Create an entry if num_sge = 0, + * update the idx and set length of sge to 0. + */ + sge = bnxt_re_get_hwqe(rq, idx++); + sge->length = 0; + } else { + /* Fill SGEs */ + bnxt_re_put_rx_sge(rq, &idx, wr->sg_list, wr->num_sge); + } + hdrval = BNXT_RE_WR_OPCD_RECV; + hdrval |= ((idx & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT); + hdr->rsv_ws_fl_wt = htole32(hdrval); + hdr->wrid = htole32(swq_idx); + + swque->wrid = wr->wr_id; + swque->slots = rq->max_slots; + swque->wc_opcd = BNXT_RE_WC_OPCD_RECV; + + bnxt_re_jqq_mod_start(qp->jrqq, swq_idx); + bnxt_re_incr_tail(rq, rq->max_slots); + ring_db = true; + wr = wr->next; + } + if (ring_db) + bnxt_re_ring_rq_db(qp); + bnxt_re_dp_spin_unlock(&rq->qlock); + + return rc; +} + +static size_t bnxt_re_get_srqmem_size(struct bnxt_re_context *cntx, + struct ibv_srq_init_attr *attr, + struct bnxt_re_qattr *qattr) +{ + uint32_t stride, nswr; + size_t size = 0; + + size = sizeof(struct bnxt_re_srq); + size += sizeof(struct bnxt_re_queue); + /* allocate 1 extra to determin full condition */ + nswr = attr->attr.max_wr + 1; + nswr = bnxt_re_init_depth(nswr, cntx->comp_mask); + stride = bnxt_re_get_srqe_sz(); + + qattr->nwr = nswr; + qattr->slots = nswr; + qattr->esize = stride; + + qattr->sz_ring = get_aligned((nswr * stride), cntx->rdev->pg_size); + qattr->sz_shad = nswr * sizeof(struct bnxt_re_wrid); /* shadow */ + + size += qattr->sz_ring; + size += qattr->sz_shad; + return size; +} + +static void *bnxt_re_alloc_srqslab(struct bnxt_re_context *cntx, + struct ibv_srq_init_attr *attr, + struct bnxt_re_qattr *qattr) +{ + size_t bytes; + + bytes = bnxt_re_get_srqmem_size(cntx, attr, qattr); + return bnxt_re_alloc_mem(bytes, cntx->rdev->pg_size); +} + +static struct bnxt_re_srq *bnxt_re_srq_alloc_queue_ptr(struct bnxt_re_mem *mem) +{ + struct bnxt_re_srq *srq; + + srq = bnxt_re_get_obj(mem, sizeof(*srq)); + if (!srq) + return NULL; + srq->srqq = bnxt_re_get_obj(mem, sizeof(struct bnxt_re_queue)); + if (!srq->srqq) + return NULL; + return srq; +} + +static int bnxt_re_srq_alloc_queue(struct bnxt_re_srq *srq, + struct ibv_srq_init_attr *attr, + struct bnxt_re_qattr *qattr) +{ + struct bnxt_re_queue *que; + int ret = -ENOMEM; + int idx; + + que = srq->srqq; + que->depth = qattr->slots; + que->stride = qattr->esize; + que->va = bnxt_re_get_ring(srq->mem, qattr->sz_ring); + if (!que->va) + goto bail; + bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded); + /* For SRQ only bnxt_re_wrid.wrid is used. */ + srq->srwrid = bnxt_re_get_obj(srq->mem, qattr->sz_shad); + if (!srq->srwrid) + goto bail; + + srq->start_idx = 0; + srq->last_idx = que->depth - 1; + for (idx = 0; idx < que->depth; idx++) + srq->srwrid[idx].next_idx = idx + 1; + srq->srwrid[srq->last_idx].next_idx = -1; + return 0; +bail: + bnxt_re_dp_spin_destroy(&srq->srqq->qlock); + return ret; +} + +struct ibv_srq *bnxt_re_create_srq(struct ibv_pd *ibvpd, + struct ibv_srq_init_attr *attr) +{ + struct bnxt_re_srq_resp resp = {}; + struct bnxt_re_srq_req cmd = {}; + struct bnxt_re_qattr qattr = {}; + struct bnxt_re_context *uctx; + struct bnxt_re_srq *srq; + void *mem; + int ret; + + uctx = to_bnxt_re_context(ibvpd->context); + mem = bnxt_re_alloc_srqslab(uctx, attr, &qattr); + if (!mem) + return NULL; + + srq = bnxt_re_srq_alloc_queue_ptr(mem); + if (!srq) + goto fail; + srq->uctx = uctx; + srq->mem = mem; + if (bnxt_re_srq_alloc_queue(srq, attr, &qattr)) + goto fail; + + cmd.srqva = (uint64_t)srq->srqq->va; + cmd.srq_handle = (uint64_t)srq; + ret = ibv_cmd_create_srq(ibvpd, &srq->ibvsrq, attr, + &cmd.cmd, sizeof(cmd), + &resp.resp, sizeof(resp)); + if (ret) + goto fail; + + srq->srqid = resp.srqid; + srq->udpi = &uctx->udpi; + srq->cap.max_wr = srq->srqq->depth; + srq->cap.max_sge = attr->attr.max_sge; + srq->cap.srq_limit = attr->attr.srq_limit; + srq->arm_req = false; + srq->rand.seed = srq->srqid; + srq->shadow_db_key = BNXT_RE_DB_KEY_INVALID; + + INIT_DBLY_LIST_NODE(&srq->dbnode); + if (_is_db_drop_recovery_enable(uctx)) { + pthread_spin_lock(&uctx->srq_dbr_res.lock); + bnxt_re_list_add_node(&srq->dbnode, &uctx->srq_dbr_res.head); + pthread_spin_unlock(&uctx->srq_dbr_res.lock); + } + return &srq->ibvsrq; +fail: + bnxt_re_free_mem(mem); + return NULL; +} + +int bnxt_re_modify_srq(struct ibv_srq *ibvsrq, struct ibv_srq_attr *attr, + int attr_mask) +{ + struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq); + struct ibv_modify_srq cmd = {}; + int status = 0; + + status = ibv_cmd_modify_srq(ibvsrq, attr, attr_mask, + &cmd, sizeof(cmd)); + if (!status && ((attr_mask & IBV_SRQ_LIMIT) && + (srq->cap.srq_limit != attr->srq_limit))) { + srq->cap.srq_limit = attr->srq_limit; + } + srq->arm_req = true; + return status; +} + +int bnxt_re_destroy_srq(struct ibv_srq *ibvsrq) +{ + struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq); + struct bnxt_re_mem *mem; + int ret; + + if (_is_db_drop_recovery_enable(srq->uctx)) { + pthread_spin_lock(&srq->uctx->srq_dbr_res.lock); + bnxt_re_list_del_node(&srq->dbnode, &srq->uctx->srq_dbr_res.head); + pthread_spin_unlock(&srq->uctx->srq_dbr_res.lock); + } + ret = ibv_cmd_destroy_srq(ibvsrq); + if (ret) { + if (_is_db_drop_recovery_enable(srq->uctx)) { + pthread_spin_lock(&srq->uctx->srq_dbr_res.lock); + bnxt_re_list_add_node(&srq->dbnode, + &srq->uctx->srq_dbr_res.head); + pthread_spin_unlock(&srq->uctx->srq_dbr_res.lock); + } + return ret; + } + bnxt_re_dp_spin_destroy(&srq->srqq->qlock); + mem = srq->mem; + bnxt_re_free_mem(mem); + return 0; +} + +int bnxt_re_query_srq(struct ibv_srq *ibvsrq, struct ibv_srq_attr *attr) +{ + struct ibv_query_srq cmd = {}; + + return ibv_cmd_query_srq(ibvsrq, attr, &cmd, sizeof cmd); +} + +static int bnxt_re_build_srqe(struct bnxt_re_srq *srq, + struct ibv_recv_wr *wr, void *srqe) +{ + struct bnxt_re_brqe *hdr = srqe; + struct bnxt_re_wrid *wrid; + struct bnxt_re_sge *sge; + int wqe_sz, len, next; + uint32_t hdrval = 0; + int indx; + + sge = (srqe + bnxt_re_get_srqe_hdr_sz()); + next = srq->start_idx; + wrid = &srq->srwrid[next]; + + len = 0; + for (indx = 0; indx < wr->num_sge; indx++, sge++) { + sge->pa = htole64(wr->sg_list[indx].addr); + sge->lkey = htole32(wr->sg_list[indx].lkey); + sge->length = htole32(wr->sg_list[indx].length); + len += wr->sg_list[indx].length; + } + + hdrval = BNXT_RE_WR_OPCD_RECV; + wqe_sz = wr->num_sge + (bnxt_re_get_srqe_hdr_sz() >> 4); /* 16B align */ + /* HW needs at least one SGE for SRQ Entries. + * Increment SRQ WQE size if num_sge = 0 to + * include the extra SGE. Set the sge length to + * zero. + */ + if (!wr->num_sge) { + wqe_sz++; + sge->length = 0; + } + hdrval |= ((wqe_sz & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT); + hdr->rsv_ws_fl_wt = htole32(hdrval); + hdr->wrid = htole32((uint32_t)next); + + /* Fill wrid */ + wrid->wrid = wr->wr_id; + wrid->bytes = len; /* N.A. for RQE */ + wrid->sig = 0; /* N.A. for RQE */ + + return len; +} + +int bnxt_re_post_srq_recv(struct ibv_srq *ibvsrq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad) +{ + struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq); + struct bnxt_re_queue *rq = srq->srqq; + int ret, count = 0; + void *srqe; + + bnxt_re_dp_spin_lock(&rq->qlock); + count = rq->tail > rq->head ? rq->tail - rq->head : + rq->depth - rq->head + rq->tail; + while (wr) { + if (srq->start_idx == srq->last_idx || + wr->num_sge > srq->cap.max_sge) { + *bad = wr; + bnxt_re_dp_spin_unlock(&rq->qlock); + return ENOMEM; + } + + srqe = (void *) (rq->va + (rq->tail * rq->stride)); + memset(srqe, 0, bnxt_re_get_srqe_sz()); + ret = bnxt_re_build_srqe(srq, wr, srqe); + if (ret < 0) { + bnxt_re_dp_spin_unlock(&rq->qlock); + *bad = wr; + return ENOMEM; + } + + srq->start_idx = srq->srwrid[srq->start_idx].next_idx; + bnxt_re_incr_tail(rq, 1); + wr = wr->next; + bnxt_re_ring_srq_db(srq); + count++; + if (srq->arm_req == true && count > srq->cap.srq_limit) { + srq->arm_req = false; + bnxt_re_ring_srq_arm(srq); + } + } + bnxt_re_dp_spin_unlock(&rq->qlock); + + return 0; +} + +struct ibv_ah *bnxt_re_create_ah(struct ibv_pd *ibvpd, struct ibv_ah_attr *attr) +{ + struct bnxt_re_context *uctx; + struct bnxt_re_pd *pd; + struct bnxt_re_ah *ah; + int status; + struct ibv_create_ah_resp resp = {}; + + pd = to_bnxt_re_pd(ibvpd); + uctx = to_bnxt_re_context(ibvpd->context); + + ah = calloc(1, sizeof(struct bnxt_re_ah)); + if (!ah) { + goto failed; + } + + ah->pd = pd; + pthread_mutex_lock(&uctx->shlock); + status = ibv_cmd_create_ah(ibvpd, &ah->ibvah, attr, + &resp, sizeof(resp)); + + if (status) + { + pthread_mutex_unlock(&uctx->shlock); + free(ah); + goto failed; + } + /* read AV ID now. */ + ah->avid = *(uint32_t *)(uctx->shpg + BNXT_RE_SHPG_AVID_OFFT); + pthread_mutex_unlock(&uctx->shlock); + + return &ah->ibvah; +failed: + return NULL; +} + +int bnxt_re_destroy_ah(struct ibv_ah *ibvah) +{ + struct bnxt_re_ah *ah; + int status; + + ah = to_bnxt_re_ah(ibvah); + status = ibv_cmd_destroy_ah(ibvah); + if (status) + return status; + free(ah); + + return 0; +} diff --git a/contrib/ofed/libbnxtre/verbs.h b/contrib/ofed/libbnxtre/verbs.h new file mode 100644 index 000000000000..249b23e4433a --- /dev/null +++ b/contrib/ofed/libbnxtre/verbs.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BNXT_RE_VERBS_H__ +#define __BNXT_RE_VERBS_H__ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int bnxt_re_query_device(struct ibv_context *ibvctx, + struct ibv_device_attr *dev_attr); + +int bnxt_re_query_device_compat(struct ibv_context *ibvctx, + struct ibv_device_attr *dev_attr); + +int bnxt_re_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *); + +struct ibv_pd *bnxt_re_alloc_pd(struct ibv_context *); +int bnxt_re_free_pd(struct ibv_pd *); + +typedef struct ibv_mr VERBS_MR; + +struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *, void *, size_t, + int ibv_access_flags); +int bnxt_re_dereg_mr(VERBS_MR*); + +struct ibv_cq *bnxt_re_create_cq(struct ibv_context *, int, + struct ibv_comp_channel *, int); +int bnxt_re_resize_cq(struct ibv_cq *, int); +int bnxt_re_destroy_cq(struct ibv_cq *); +int bnxt_re_poll_cq(struct ibv_cq *, int, struct ibv_wc *); +void bnxt_re_cq_event(struct ibv_cq *); +int bnxt_re_arm_cq(struct ibv_cq *, int); + +struct ibv_qp *bnxt_re_create_qp(struct ibv_pd *, struct ibv_qp_init_attr *); +int bnxt_re_modify_qp(struct ibv_qp *, struct ibv_qp_attr *, + int ibv_qp_attr_mask); +int bnxt_re_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask, struct ibv_qp_init_attr *init_attr); +int bnxt_re_destroy_qp(struct ibv_qp *); +int bnxt_re_post_send(struct ibv_qp *, struct ibv_send_wr *, + struct ibv_send_wr **); +int bnxt_re_post_recv(struct ibv_qp *, struct ibv_recv_wr *, + struct ibv_recv_wr **); + +struct ibv_srq *bnxt_re_create_srq(struct ibv_pd *, + struct ibv_srq_init_attr *); +int bnxt_re_modify_srq(struct ibv_srq *, struct ibv_srq_attr *, int); +int bnxt_re_destroy_srq(struct ibv_srq *); +int bnxt_re_query_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr); +int bnxt_re_post_srq_recv(struct ibv_srq *, struct ibv_recv_wr *, + struct ibv_recv_wr **); + +struct ibv_ah *bnxt_re_create_ah(struct ibv_pd *, struct ibv_ah_attr *); +int bnxt_re_destroy_ah(struct ibv_ah *); + +int bnxt_re_attach_mcast(struct ibv_qp *, const union ibv_gid *, uint16_t); +int bnxt_re_detach_mcast(struct ibv_qp *, const union ibv_gid *, uint16_t); + +void bnxt_re_async_event(struct ibv_async_event *event); + +struct bnxt_re_work_compl { + struct bnxt_re_list_node cnode; + struct ibv_wc wc; +}; + +static inline uint8_t bnxt_re_get_psne_size(struct bnxt_re_context *cntx) +{ + return (BNXT_RE_HW_RETX(cntx)) ? sizeof(struct bnxt_re_msns) : + (cntx->cctx->chip_is_gen_p5_thor2) ? + sizeof(struct bnxt_re_psns_ext) : + sizeof(struct bnxt_re_psns); +} + +static inline uint32_t bnxt_re_get_npsn(uint8_t mode, uint32_t nwr, + uint32_t slots) +{ + return mode == BNXT_RE_WQE_MODE_VARIABLE ? slots : nwr; +} + +static inline bool bnxt_re_is_mqp_ex_supported(struct bnxt_re_context *cntx) +{ + return cntx->comp_mask & BNXT_RE_COMP_MASK_UCNTX_MQP_EX_SUPPORTED; +} + +static inline bool can_request_ppp(struct bnxt_re_qp *re_qp, + struct ibv_qp_attr *attr, int attr_mask) +{ + struct bnxt_re_context *cntx; + struct bnxt_re_qp *qp; + bool request = false; + + qp = re_qp; + cntx = qp->cntx; + if (!qp->push_st_en && cntx->udpi.wcdpi && (attr_mask & IBV_QP_STATE) && + qp->qpst == IBV_QPS_RESET && attr->qp_state == IBV_QPS_INIT) { + request = true; + } + return request; +} + +static inline uint64_t bnxt_re_update_msn_tbl(uint32_t st_idx, uint32_t npsn, uint32_t start_psn) +{ + return htole64((((uint64_t)(st_idx) << BNXT_RE_SQ_MSN_SEARCH_START_IDX_SHIFT) & + BNXT_RE_SQ_MSN_SEARCH_START_IDX_MASK) | + (((uint64_t)(npsn) << BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_SHIFT) & + BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_MASK) | + (((start_psn) << BNXT_RE_SQ_MSN_SEARCH_START_PSN_SHIFT) & + BNXT_RE_SQ_MSN_SEARCH_START_PSN_MASK)); +} + +static inline int ibv_cmd_modify_qp_compat(struct ibv_qp *ibvqp, + struct ibv_qp_attr *attr, + int attr_mask, bool issue_mqp_ex, + struct bnxt_re_modify_ex_req *mreq, + struct bnxt_re_modify_ex_resp *mresp) +{ + int rc; + + if (issue_mqp_ex) { + struct bnxt_re_modify_ex_resp *resp; + struct bnxt_re_modify_ex_req *req; + + req = mreq; + resp = mresp; + rc = ibv_cmd_modify_qp_ex(ibvqp, attr, attr_mask, &req->cmd, + sizeof(req->cmd), sizeof(*req), + &resp->resp, sizeof(resp->resp), + sizeof(*resp)); + } else { + struct ibv_modify_qp cmd = {}; + + rc = ibv_cmd_modify_qp(ibvqp, attr, attr_mask, + &cmd, sizeof(cmd)); + } + return rc; +} + +#define bnxt_re_is_zero_len_pkt(len, opcd) (len == 0) +#define BNXT_RE_MSN_IDX(m) (((m) & BNXT_RE_SQ_MSN_SEARCH_START_IDX_MASK) >> \ + BNXT_RE_SQ_MSN_SEARCH_START_IDX_SHIFT) +#define BNXT_RE_MSN_NPSN(m) (((m) & BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_MASK) >> \ + BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_SHIFT) +#define BNXT_RE_MSN_SPSN(m) (((m) & BNXT_RE_SQ_MSN_SEARCH_START_PSN_MASK) >> \ + BNXT_RE_SQ_MSN_SEARCH_START_PSN_SHIFT) + +#endif /* __BNXT_RE_VERBS_H__ */ diff --git a/contrib/ofed/libbnxtre/version.h b/contrib/ofed/libbnxtre/version.h new file mode 100644 index 000000000000..f77e8835141f --- /dev/null +++ b/contrib/ofed/libbnxtre/version.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2024, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BNXT_RE_VERSION_H__ +#define __BNXT_RE_VERSION_H__ + +#define MAJOR_VERSION "230" +#define MINOR_VERSION "0" +#define SUB_MAJOR_VERSION "133" +#define SUB_MINOR_VERSION "0" + +#define LIBBNXT_RE_VERSION(a, b, c, d) a"."b"."c"."d + +#define LIBBNXT_RE_REL_VERSION LIBBNXT_RE_VERSION(MAJOR_VERSION,\ + MINOR_VERSION,\ + SUB_MAJOR_VERSION,\ + SUB_MINOR_VERSION) +#define LIBBNXT_RE_BUILD_VERSION 230.0.133.0 + +#endif /* __BNXT_RE_VERSION_H__ */ diff --git a/lib/ofed/libbnxtre/Makefile b/lib/ofed/libbnxtre/Makefile new file mode 100755 index 000000000000..e78f2a93ba44 --- /dev/null +++ b/lib/ofed/libbnxtre/Makefile @@ -0,0 +1,14 @@ +SPATH= ${SRCTOP}/contrib/ofed/libbnxtre +.PATH: ${SPATH} + +SHLIBDIR?= /lib +LIB= bnxtre +SHLIB_MAJOR= 1 +MK_PROFILE= no + +SRCS= main.c db.c memory.c verbs.c + +LIBADD= ibverbs pthread +CFLAGS+= -I${SPATH} + +.include