* [PATCH for-next 1/4] RDMA/bnxt_re: Add support for optimized modify QP
2024-10-11 5:03 [PATCH for-next 0/4] RDMA/bnxt_re: driver update Selvin Xavier
@ 2024-10-11 5:03 ` Selvin Xavier
2024-10-14 15:09 ` kernel test robot
2024-10-11 5:03 ` [PATCH for-next 2/4] RDMA/bnxt_re: Add support for CQ rx coalescing Selvin Xavier
` (2 subsequent siblings)
3 siblings, 1 reply; 6+ messages in thread
From: Selvin Xavier @ 2024-10-11 5:03 UTC (permalink / raw)
To: leon, jgg
Cc: linux-rdma, andrew.gospodarek, kalesh-anakkur.purayil,
Selvin Xavier
From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Modify QP improvements are for state transitions
from INIT -> RTR and RTR -> RTS.
In order to support the Modify QP Optimization feature,
the driver is expected to check for the feature support
in the CMDQ_QUERY_FUNC and register its support for this
feature with the FW in CMDQ_INITIALIZE_FIRMWARE.
Additionally, the driver is required to specify the new
fields and attribute masks for the transitions as follows:
1. INIT -> RTR:
- New fields: srq_used, type.
- enable srq_used when RC QP is configured to use SRQ.
- set the type based on the QP type.
- Mandatory masks:
- RC: CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS,
CMDQ_MODIFY_QP_MODIFY_MASK_PKEY
- UD QP and QP1: CMDQ_MODIFY_QP_MODIFY_MASK_PKEY,
CMDQ_MODIFY_QP_MODIFY_MASK_QKEY
2. RTR -> RTS:
- New fields: type
- set the type based on the QP type.
- Mandatory masks:
- RC: CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS
- UD QP and QP1: CMDQ_MODIFY_QP_MODIFY_MASK_QKEY
Reviewed-by: Saravanan Vajravel <saravanan.vajravel@broadcom.com>
Reviewed-by: Tushar Rane <tushar.rane@broadcom.com>
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
drivers/infiniband/hw/bnxt_re/qplib_fp.c | 40 ++++++++++++++++++++++++++++++
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 6 ++++-
drivers/infiniband/hw/bnxt_re/qplib_res.h | 5 ++++
drivers/infiniband/hw/bnxt_re/roce_hsi.h | 3 +++
4 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 42e98e5..775604f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1277,6 +1277,40 @@ static void __filter_modify_flags(struct bnxt_qplib_qp *qp)
}
}
+static void bnxt_set_mandatory_attributes(struct bnxt_qplib_qp *qp,
+ struct cmdq_modify_qp *req)
+{
+ u32 mandatory_flags = 0;
+
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC)
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
+
+ if (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) {
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC && qp->srq)
+ req->flags = CMDQ_MODIFY_QP_FLAGS_SRQ_USED;
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ }
+
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_UD ||
+ qp->type == CMDQ_MODIFY_QP_QP_TYPE_GSI)
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+
+ qp->modify_flags |= mandatory_flags;
+ req->qp_type = qp->type;
+}
+
+static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp)
+{
+ if ((qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) ||
+ (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS))
+ return true;
+
+ return false;
+}
+
int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
@@ -1293,6 +1327,12 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* Filter out the qp_attr_mask based on the state->new transition */
__filter_modify_flags(qp);
+ if (qp->modify_flags & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
+ /* Set mandatory attributes for INIT -> RTR and RTR -> RTS transition */
+ if (_is_optimize_modify_qp_supported(res->dattr->dev_cap_flags2) &&
+ is_optimized_state_transition(qp))
+ bnxt_set_mandatory_attributes(qp, &req);
+ }
bmask = qp->modify_flags;
req.modify_mask = cpu_to_le32(qp->modify_flags);
req.qp_cid = cpu_to_le32(qp->id);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 3ffaef0c..f66c5e4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -832,6 +832,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct creq_initialize_fw_resp resp = {};
struct cmdq_initialize_fw req = {};
struct bnxt_qplib_cmdqmsg msg = {};
+ u16 flags = 0;
u8 pgsz, lvl;
int rc;
@@ -906,7 +907,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
skip_ctx_setup:
if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
- req.flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED);
+ flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED);
+ if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
+ req.flags |= cpu_to_le16(flags);
req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index c2f7103..ef198a6 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -576,4 +576,9 @@ static inline bool _is_relaxed_ordering_supported(u16 dev_cap_ext_flags2)
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED;
}
+static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
+}
+
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 3ec8952..69d50d7 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -216,6 +216,8 @@ struct cmdq_initialize_fw {
__le16 flags;
#define CMDQ_INITIALIZE_FW_FLAGS_MRAV_RESERVATION_SPLIT 0x1UL
#define CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED 0x2UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_DRV_VERSION 0x4UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED 0x8UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@@ -559,6 +561,7 @@ struct cmdq_modify_qp {
#define CMDQ_MODIFY_QP_OPCODE_LAST CMDQ_MODIFY_QP_OPCODE_MODIFY_QP
u8 cmd_size;
__le16 flags;
+ #define CMDQ_MODIFY_QP_FLAGS_SRQ_USED 0x1UL
__le16 cookie;
u8 resp_size;
u8 qp_type;
--
2.5.5
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH for-next 1/4] RDMA/bnxt_re: Add support for optimized modify QP
2024-10-11 5:03 ` [PATCH for-next 1/4] RDMA/bnxt_re: Add support for optimized modify QP Selvin Xavier
@ 2024-10-14 15:09 ` kernel test robot
0 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2024-10-14 15:09 UTC (permalink / raw)
To: Selvin Xavier, leon, jgg
Cc: oe-kbuild-all, linux-rdma, andrew.gospodarek,
kalesh-anakkur.purayil, Selvin Xavier
Hi Selvin,
kernel test robot noticed the following build warnings:
[auto build test WARNING on rdma/for-next]
[also build test WARNING on linus/master v6.12-rc3 next-20241014]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Selvin-Xavier/RDMA-bnxt_re-Add-support-for-optimized-modify-QP/20241011-133536
base: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
patch link: https://lore.kernel.org/r/1728623035-30657-2-git-send-email-selvin.xavier%40broadcom.com
patch subject: [PATCH for-next 1/4] RDMA/bnxt_re: Add support for optimized modify QP
config: x86_64-randconfig-123-20241014 (https://download.01.org/0day-ci/archive/20241014/202410142256.J3j7bGkA-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241014/202410142256.J3j7bGkA-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410142256.J3j7bGkA-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
>> drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:910:23: sparse: sparse: invalid assignment: |=
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:910:23: sparse: left side has type unsigned short
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:910:23: sparse: right side has type restricted __le16
--
>> drivers/infiniband/hw/bnxt_re/qplib_fp.c:1291:36: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le16 [usertype] flags @@ got unsigned long @@
drivers/infiniband/hw/bnxt_re/qplib_fp.c:1291:36: sparse: expected restricted __le16 [usertype] flags
drivers/infiniband/hw/bnxt_re/qplib_fp.c:1291:36: sparse: got unsigned long
drivers/infiniband/hw/bnxt_re/qplib_fp.c: note: in included file (through include/linux/smp.h, include/linux/alloc_tag.h, include/linux/percpu.h, ...):
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
vim +910 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
828
829 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
830 struct bnxt_qplib_ctx *ctx, int is_virtfn)
831 {
832 struct creq_initialize_fw_resp resp = {};
833 struct cmdq_initialize_fw req = {};
834 struct bnxt_qplib_cmdqmsg msg = {};
835 u16 flags = 0;
836 u8 pgsz, lvl;
837 int rc;
838
839 bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
840 CMDQ_BASE_OPCODE_INITIALIZE_FW,
841 sizeof(req));
842 /* Supply (log-base-2-of-host-page-size - base-page-shift)
843 * to bono to adjust the doorbell page sizes.
844 */
845 req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
846 RCFW_DBR_BASE_PAGE_SHIFT);
847 /*
848 * Gen P5 devices doesn't require this allocation
849 * as the L2 driver does the same for RoCE also.
850 * Also, VFs need not setup the HW context area, PF
851 * shall setup this area for VF. Skipping the
852 * HW programming
853 */
854 if (is_virtfn)
855 goto skip_ctx_setup;
856 if (bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
857 goto config_vf_res;
858
859 lvl = ctx->qpc_tbl.level;
860 pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl);
861 req.qpc_pg_size_qpc_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
862 lvl;
863 lvl = ctx->mrw_tbl.level;
864 pgsz = bnxt_qplib_base_pg_size(&ctx->mrw_tbl);
865 req.mrw_pg_size_mrw_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
866 lvl;
867 lvl = ctx->srqc_tbl.level;
868 pgsz = bnxt_qplib_base_pg_size(&ctx->srqc_tbl);
869 req.srq_pg_size_srq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
870 lvl;
871 lvl = ctx->cq_tbl.level;
872 pgsz = bnxt_qplib_base_pg_size(&ctx->cq_tbl);
873 req.cq_pg_size_cq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
874 lvl;
875 lvl = ctx->tim_tbl.level;
876 pgsz = bnxt_qplib_base_pg_size(&ctx->tim_tbl);
877 req.tim_pg_size_tim_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
878 lvl;
879 lvl = ctx->tqm_ctx.pde.level;
880 pgsz = bnxt_qplib_base_pg_size(&ctx->tqm_ctx.pde);
881 req.tqm_pg_size_tqm_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
882 lvl;
883 req.qpc_page_dir =
884 cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
885 req.mrw_page_dir =
886 cpu_to_le64(ctx->mrw_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
887 req.srq_page_dir =
888 cpu_to_le64(ctx->srqc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
889 req.cq_page_dir =
890 cpu_to_le64(ctx->cq_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
891 req.tim_page_dir =
892 cpu_to_le64(ctx->tim_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
893 req.tqm_page_dir =
894 cpu_to_le64(ctx->tqm_ctx.pde.pbl[PBL_LVL_0].pg_map_arr[0]);
895
896 req.number_of_qp = cpu_to_le32(ctx->qpc_tbl.max_elements);
897 req.number_of_mrw = cpu_to_le32(ctx->mrw_tbl.max_elements);
898 req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
899 req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
900
901 config_vf_res:
902 req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
903 req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
904 req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
905 req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
906 req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
907
908 skip_ctx_setup:
909 if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
> 910 flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED);
911 if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
912 flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
913 req.flags |= cpu_to_le16(flags);
914 req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
915 bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
916 rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
917 if (rc)
918 return rc;
919 set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
920 return 0;
921 }
922
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH for-next 2/4] RDMA/bnxt_re: Add support for CQ rx coalescing
2024-10-11 5:03 [PATCH for-next 0/4] RDMA/bnxt_re: driver update Selvin Xavier
2024-10-11 5:03 ` [PATCH for-next 1/4] RDMA/bnxt_re: Add support for optimized modify QP Selvin Xavier
@ 2024-10-11 5:03 ` Selvin Xavier
2024-10-11 5:03 ` [PATCH for-next 3/4] RDMA/bnxt_re: Add support for modify_device hook Selvin Xavier
2024-10-11 5:03 ` [PATCH for-next 4/4] RDMA/ bnxt_re: Fix access flags for MR and QP modify Selvin Xavier
3 siblings, 0 replies; 6+ messages in thread
From: Selvin Xavier @ 2024-10-11 5:03 UTC (permalink / raw)
To: leon, jgg
Cc: linux-rdma, andrew.gospodarek, kalesh-anakkur.purayil,
Chandramohan Akula, Selvin Xavier
From: Chandramohan Akula <chandramohan.akula@broadcom.com>
RoCE message rate performance is heavily degraded
without the use of cq coalescing. With proper coalescing,
message rates get better. Furthermore, coalescing
significantly reduces contention on the PCIe Root
Complex/Memory subsystems.
Add the changes to configure CQ rx colascing parameters
based on adapter revision when CQ is created.
Signed-off-by: Chandramohan Akula <chandramohan.akula@broadcom.com>
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
drivers/infiniband/hw/bnxt_re/bnxt_re.h | 8 ++++++++
drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 +
drivers/infiniband/hw/bnxt_re/main.c | 9 +++++++++
drivers/infiniband/hw/bnxt_re/qplib_fp.c | 20 ++++++++++++++++++++
drivers/infiniband/hw/bnxt_re/qplib_fp.h | 20 ++++++++++++++++++++
drivers/infiniband/hw/bnxt_re/qplib_res.h | 5 +++++
drivers/infiniband/hw/bnxt_re/roce_hsi.h | 14 +++++++++++++-
7 files changed, 76 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index e94518b..bb28a1f 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -156,6 +156,13 @@ struct bnxt_re_pacing {
#define MAX_CQ_HASH_BITS (16)
#define MAX_SRQ_HASH_BITS (16)
+
+static inline bool bnxt_re_chip_gen_p7(u16 chip_num)
+{
+ return (chip_num == CHIP_NUM_58818 ||
+ chip_num == CHIP_NUM_57608);
+}
+
struct bnxt_re_dev {
struct ib_device ibdev;
struct list_head list;
@@ -195,6 +202,7 @@ struct bnxt_re_dev {
struct bnxt_qplib_ctx qplib_ctx;
struct bnxt_qplib_res qplib_res;
struct bnxt_qplib_dpi dpi_privileged;
+ struct bnxt_qplib_cq_coal_param cq_coalescing;
struct mutex qp_lock; /* protect qp list */
struct list_head qp_list;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 460f339..55a3cc8 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3065,6 +3065,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->qplib_cq.max_wqe = entries;
cq->qplib_cq.cnq_hw_ring_id = nq->ring_id;
cq->qplib_cq.nq = nq;
+ cq->qplib_cq.coalescing = &rdev->cq_coalescing;
rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
if (rc) {
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 777068d..3a01818 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -986,6 +986,15 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv,
atomic_set(&rdev->stats.res.pd_count, 0);
rdev->cosq[0] = 0xFFFF;
rdev->cosq[1] = 0xFFFF;
+ rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME;
+ if (bnxt_re_chip_gen_p7(en_dev->chip_num)) {
+ rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7;
+ rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7;
+ } else {
+ rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5;
+ rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5;
+ }
+ rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE;
return rdev;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 775604f..ffa7634 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -2182,6 +2182,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_create_cq req = {};
struct bnxt_qplib_pbl *pbl;
+ u32 coalescing = 0;
u32 pg_sz_lvl;
int rc;
@@ -2208,6 +2209,25 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
req.cq_size = cpu_to_le32(cq->max_wqe);
+
+ if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2)) {
+ req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID);
+ coalescing |= ((cq->coalescing->buf_maxtime <<
+ CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) &
+ CMDQ_CREATE_CQ_BUF_MAXTIME_MASK);
+ coalescing |= ((cq->coalescing->normal_maxbuf <<
+ CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT) &
+ CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK);
+ coalescing |= ((cq->coalescing->during_maxbuf <<
+ CMDQ_CREATE_CQ_DURING_MAXBUF_SFT) &
+ CMDQ_CREATE_CQ_DURING_MAXBUF_MASK);
+ if (cq->coalescing->en_ring_idle_mode)
+ coalescing |= CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
+ else
+ coalescing &= ~CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
+ req.coalescing = cpu_to_le32(coalescing);
+ }
+
pbl = &cq->hwq.pbl[PBL_LVL_0];
pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
CMDQ_CREATE_CQ_PG_SIZE_SFT);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index b62df87..fb01576 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -383,6 +383,25 @@ static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
return avail <= slots;
}
+/* CQ coalescing parameters */
+struct bnxt_qplib_cq_coal_param {
+ u16 buf_maxtime;
+ u8 normal_maxbuf;
+ u8 during_maxbuf;
+ u8 en_ring_idle_mode;
+};
+
+#define BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7 0x8
+#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7 0x8
+#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE 0x1
+#define BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME 0x1bf
+#define BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF 0x1f
+#define BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF 0x1f
+#define BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE 0x1
+
struct bnxt_qplib_cqe {
u8 status;
u8 type;
@@ -445,6 +464,7 @@ struct bnxt_qplib_cq {
*/
spinlock_t flush_lock; /* QP flush management */
u16 cnq_events;
+ struct bnxt_qplib_cq_coal_param *coalescing;
};
#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index ef198a6..115910c 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -581,4 +581,9 @@ static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
}
+static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED;
+}
+
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 69d50d7..58df876 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -1140,6 +1140,7 @@ struct cmdq_create_cq {
#define CMDQ_CREATE_CQ_FLAGS_DISABLE_CQ_OVERFLOW_DETECTION 0x1UL
#define CMDQ_CREATE_CQ_FLAGS_STEERING_TAG_VALID 0x2UL
#define CMDQ_CREATE_CQ_FLAGS_INFINITE_CQ_MODE 0x4UL
+ #define CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID 0x8UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@@ -1172,7 +1173,18 @@ struct cmdq_create_cq {
__le32 cq_size;
__le64 pbl;
__le16 steering_tag;
- u8 reserved48[6];
+ u8 reserved48[2];
+ __le32 coalescing;
+ #define CMDQ_CREATE_CQ_BUF_MAXTIME_MASK 0x1ffUL
+ #define CMDQ_CREATE_CQ_BUF_MAXTIME_SFT 0
+ #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK 0x3e00UL
+ #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT 9
+ #define CMDQ_CREATE_CQ_DURING_MAXBUF_MASK 0x7c000UL
+ #define CMDQ_CREATE_CQ_DURING_MAXBUF_SFT 14
+ #define CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE 0x80000UL
+ #define CMDQ_CREATE_CQ_UNUSED12_MASK 0xfff00000UL
+ #define CMDQ_CREATE_CQ_UNUSED12_SFT 20
+ __le64 reserved64;
};
/* creq_create_cq_resp (size:128b/16B) */
--
2.5.5
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH for-next 3/4] RDMA/bnxt_re: Add support for modify_device hook
2024-10-11 5:03 [PATCH for-next 0/4] RDMA/bnxt_re: driver update Selvin Xavier
2024-10-11 5:03 ` [PATCH for-next 1/4] RDMA/bnxt_re: Add support for optimized modify QP Selvin Xavier
2024-10-11 5:03 ` [PATCH for-next 2/4] RDMA/bnxt_re: Add support for CQ rx coalescing Selvin Xavier
@ 2024-10-11 5:03 ` Selvin Xavier
2024-10-11 5:03 ` [PATCH for-next 4/4] RDMA/ bnxt_re: Fix access flags for MR and QP modify Selvin Xavier
3 siblings, 0 replies; 6+ messages in thread
From: Selvin Xavier @ 2024-10-11 5:03 UTC (permalink / raw)
To: leon, jgg
Cc: linux-rdma, andrew.gospodarek, kalesh-anakkur.purayil,
Selvin Xavier
From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Adds support for modify_device in the driver
for node desc changes.
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
drivers/infiniband/hw/bnxt_re/ib_verbs.c | 16 ++++++++++++++++
drivers/infiniband/hw/bnxt_re/ib_verbs.h | 3 +++
drivers/infiniband/hw/bnxt_re/main.c | 1 +
3 files changed, 20 insertions(+)
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 55a3cc8..2a21a90 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -211,6 +211,22 @@ int bnxt_re_query_device(struct ib_device *ibdev,
return 0;
}
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ ibdev_dbg(ibdev, "Modify device with mask 0x%x", device_modify_mask);
+
+ if (device_modify_mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (!(device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC))
+ return 0;
+
+ memcpy(ibdev->node_desc, device_modify->node_desc, IB_DEVICE_NODE_DESC_MAX);
+ return 0;
+}
+
/* Port */
int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr *port_attr)
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index b789e47..83a584e 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -196,6 +196,9 @@ static inline bool bnxt_re_is_var_size_supported(struct bnxt_re_dev *rdev,
int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_device_attr *ib_attr,
struct ib_udata *udata);
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify);
int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr *port_attr);
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 3a01818..d825eda 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -911,6 +911,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.post_srq_recv = bnxt_re_post_srq_recv,
.query_ah = bnxt_re_query_ah,
.query_device = bnxt_re_query_device,
+ .modify_device = bnxt_re_modify_device,
.query_pkey = bnxt_re_query_pkey,
.query_port = bnxt_re_query_port,
.query_qp = bnxt_re_query_qp,
--
2.5.5
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH for-next 4/4] RDMA/ bnxt_re: Fix access flags for MR and QP modify
2024-10-11 5:03 [PATCH for-next 0/4] RDMA/bnxt_re: driver update Selvin Xavier
` (2 preceding siblings ...)
2024-10-11 5:03 ` [PATCH for-next 3/4] RDMA/bnxt_re: Add support for modify_device hook Selvin Xavier
@ 2024-10-11 5:03 ` Selvin Xavier
3 siblings, 0 replies; 6+ messages in thread
From: Selvin Xavier @ 2024-10-11 5:03 UTC (permalink / raw)
To: leon, jgg
Cc: linux-rdma, andrew.gospodarek, kalesh-anakkur.purayil,
Hongguang Gao, Selvin Xavier
From: Hongguang Gao <hongguang.gao@broadcom.com>
Access flag definition in MR and QP is different
in FW. Currently both reg/bind MR and modify/query QP uses
the same flags. Add a different function to map
the QP access flags for newer adapters.
Signed-off-by: Hongguang Gao <hongguang.gao@broadcom.com>
Reviewed-by: Damodharam Ammepalli <damodharam.ammepalli@broadcom.com>
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
drivers/infiniband/hw/bnxt_re/ib_verbs.c | 59 +++++++++++++++++++++++++++-----
1 file changed, 50 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 2a21a90..5008c28 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -94,9 +94,9 @@ static int __from_ib_access_flags(int iflags)
return qflags;
};
-static enum ib_access_flags __to_ib_access_flags(int qflags)
+static int __to_ib_access_flags(int qflags)
{
- enum ib_access_flags iflags = 0;
+ int iflags = 0;
if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
iflags |= IB_ACCESS_LOCAL_WRITE;
@@ -113,7 +113,49 @@ static enum ib_access_flags __to_ib_access_flags(int qflags)
if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
iflags |= IB_ACCESS_ON_DEMAND;
return iflags;
-};
+}
+
+static u8 __qp_access_flags_from_ib(struct bnxt_qplib_chip_ctx *cctx, int iflags)
+{
+ u8 qflags = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ /* For Wh+ */
+ return (u8)__from_ib_access_flags(iflags);
+
+ /* For P5, P7 and later chips */
+ if (iflags & IB_ACCESS_LOCAL_WRITE)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_WRITE)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_READ)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
+ if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC;
+
+ return qflags;
+}
+
+static int __qp_access_flags_to_ib(struct bnxt_qplib_chip_ctx *cctx, u8 qflags)
+{
+ int iflags = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ /* For Wh+ */
+ return __to_ib_access_flags(qflags);
+
+ /* For P5, P7 and later chips */
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE)
+ iflags |= IB_ACCESS_LOCAL_WRITE;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE)
+ iflags |= IB_ACCESS_REMOTE_WRITE;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_READ)
+ iflags |= IB_ACCESS_REMOTE_READ;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC)
+ iflags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return iflags;
+}
static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev,
struct bnxt_qplib_mrw *qplib_mr)
@@ -2053,12 +2095,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
qp->qplib_qp.access =
- __from_ib_access_flags(qp_attr->qp_access_flags);
+ __qp_access_flags_from_ib(qp->qplib_qp.cctx,
+ qp_attr->qp_access_flags);
/* LOCAL_WRITE access must be set to allow RC receive */
- qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
- /* Temp: Set all params on QP as of now */
- qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
- qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
+ qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
}
if (qp_attr_mask & IB_QP_PKEY_INDEX) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
@@ -2263,7 +2303,8 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state);
qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
- qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access);
+ qp_attr->qp_access_flags = __qp_access_flags_to_ib(qplib_qp->cctx,
+ qplib_qp->access);
qp_attr->pkey_index = qplib_qp->pkey_index;
qp_attr->qkey = qplib_qp->qkey;
qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
--
2.5.5
^ permalink raw reply related [flat|nested] 6+ messages in thread