* [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel
@ 2024-10-14 13:36 Selvin Xavier
2024-10-14 13:36 ` [PATCH for-rc v3 1/2] RDMA/bnxt_re: Fix the usage of control path spin locks Selvin Xavier
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Selvin Xavier @ 2024-10-14 13:36 UTC (permalink / raw)
To: leon, jgg
Cc: linux-rdma, andrew.gospodarek, kalesh-anakkur.purayil, Zhu Yanjun,
Selvin Xavier
Hi,
Posting the v3 series for the 2 patches that was not merged from
the series. Please review and apply if it is okay to be merged.
Thanks,
Selvin Xavier
v2 - v3:
- Only the patches that got deferred from the previous posting of
this series
- Addressing Jason's comment to avoid using the lockdep
annotation for the new spin_lock, as this is not a nested
lock.
v1 - v2:
- Add a patch that removes irq variant of spinlock and use
spin_lock_bh as the control path processing happens from
tasklet context
- Address the comments from Zhu Yanjun by initializing the
newly added spin lock.
- One more fix included in the series
Selvin Xavier (2):
RDMA/bnxt_re: Fix the usage of control path spin locks
RDMA/bnxt_re: synchronize the qp-handle table array
drivers/infiniband/hw/bnxt_re/qplib_fp.c | 4 ++++
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 38 +++++++++++++++---------------
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 2 ++
3 files changed, 25 insertions(+), 19 deletions(-)
--
2.5.5
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH for-rc v3 1/2] RDMA/bnxt_re: Fix the usage of control path spin locks
2024-10-14 13:36 [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel Selvin Xavier
@ 2024-10-14 13:36 ` Selvin Xavier
2024-10-14 13:36 ` [PATCH for-rc v3 2/2] RDMA/bnxt_re: synchronize the qp-handle table array Selvin Xavier
2024-10-21 16:34 ` [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel Jason Gunthorpe
2 siblings, 0 replies; 4+ messages in thread
From: Selvin Xavier @ 2024-10-14 13:36 UTC (permalink / raw)
To: leon, jgg
Cc: linux-rdma, andrew.gospodarek, kalesh-anakkur.purayil, Zhu Yanjun,
Selvin Xavier
Control path completion processing always runs in
tasklet context. To synchronize with the posting
thread, there is no need to use the irq variant of
spin lock. Use spin_lock_bh instead.
Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver")
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 7294221..ca26b88 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -290,7 +290,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_hwq *hwq;
u32 sw_prod, cmdq_prod;
struct pci_dev *pdev;
- unsigned long flags;
u16 cookie;
u8 *preq;
@@ -301,7 +300,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
/* Cmdq are in 16-byte units, each request can consume 1 or more
* cmdqe
*/
- spin_lock_irqsave(&hwq->lock, flags);
+ spin_lock_bh(&hwq->lock);
required_slots = bnxt_qplib_get_cmd_slots(msg->req);
free_slots = HWQ_FREE_SLOTS(hwq);
cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
@@ -311,7 +310,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
dev_info_ratelimited(&pdev->dev,
"CMDQ is full req/free %d/%d!",
required_slots, free_slots);
- spin_unlock_irqrestore(&hwq->lock, flags);
+ spin_unlock_bh(&hwq->lock);
return -EAGAIN;
}
if (msg->block)
@@ -367,7 +366,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
wmb();
writel(cmdq_prod, cmdq->cmdq_mbox.prod);
writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
- spin_unlock_irqrestore(&hwq->lock, flags);
+ spin_unlock_bh(&hwq->lock);
/* Return the CREQ response pointer */
return 0;
}
@@ -486,7 +485,6 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
{
struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
struct bnxt_qplib_crsqe *crsqe;
- unsigned long flags;
u16 cookie;
int rc;
u8 opcode;
@@ -512,12 +510,12 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
rc = __poll_for_resp(rcfw, cookie);
if (rc) {
- spin_lock_irqsave(&rcfw->cmdq.hwq.lock, flags);
+ spin_lock_bh(&rcfw->cmdq.hwq.lock);
crsqe = &rcfw->crsqe_tbl[cookie];
crsqe->is_waiter_alive = false;
if (rc == -ENODEV)
set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags);
- spin_unlock_irqrestore(&rcfw->cmdq.hwq.lock, flags);
+ spin_unlock_bh(&rcfw->cmdq.hwq.lock);
return -ETIMEDOUT;
}
@@ -628,7 +626,6 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
u16 cookie, blocked = 0;
bool is_waiter_alive;
struct pci_dev *pdev;
- unsigned long flags;
u32 wait_cmds = 0;
int rc = 0;
@@ -659,8 +656,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
*
*/
- spin_lock_irqsave_nested(&hwq->lock, flags,
- SINGLE_DEPTH_NESTING);
+ spin_lock_nested(&hwq->lock, SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie);
blocked = cookie & RCFW_CMD_IS_BLOCKING;
cookie &= RCFW_MAX_COOKIE_VALUE;
@@ -672,7 +668,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
dev_info(&pdev->dev,
"rcfw timedout: cookie = %#x, free_slots = %d",
cookie, crsqe->free_slots);
- spin_unlock_irqrestore(&hwq->lock, flags);
+ spin_unlock(&hwq->lock);
return rc;
}
@@ -720,7 +716,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
__destroy_timedout_ah(rcfw,
(struct creq_create_ah_resp *)
qp_event);
- spin_unlock_irqrestore(&hwq->lock, flags);
+ spin_unlock(&hwq->lock);
}
*num_wait += wait_cmds;
return rc;
@@ -734,12 +730,11 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
struct bnxt_qplib_hwq *hwq = &creq->hwq;
struct creq_base *creqe;
- unsigned long flags;
u32 num_wakeup = 0;
u32 hw_polled = 0;
/* Service the CREQ until budget is over */
- spin_lock_irqsave(&hwq->lock, flags);
+ spin_lock_bh(&hwq->lock);
while (budget > 0) {
creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
@@ -782,7 +777,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
if (hw_polled)
bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo,
rcfw->res->cctx, true);
- spin_unlock_irqrestore(&hwq->lock, flags);
+ spin_unlock_bh(&hwq->lock);
if (num_wakeup)
wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
}
--
2.5.5
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH for-rc v3 2/2] RDMA/bnxt_re: synchronize the qp-handle table array
2024-10-14 13:36 [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel Selvin Xavier
2024-10-14 13:36 ` [PATCH for-rc v3 1/2] RDMA/bnxt_re: Fix the usage of control path spin locks Selvin Xavier
@ 2024-10-14 13:36 ` Selvin Xavier
2024-10-21 16:34 ` [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel Jason Gunthorpe
2 siblings, 0 replies; 4+ messages in thread
From: Selvin Xavier @ 2024-10-14 13:36 UTC (permalink / raw)
To: leon, jgg
Cc: linux-rdma, andrew.gospodarek, kalesh-anakkur.purayil, Zhu Yanjun,
Selvin Xavier
There is a race between the CREQ tasklet and destroy
qp when accessing the qp-handle table. There is
a chance of reading a valid qp-handle in the
CREQ tasklet handler while the QP is already moving
ahead with the destruction.
Fixing this race by implementing a table-lock to
synchronize the access.
Fixes: f218d67ef004 ("RDMA/bnxt_re: Allow posting when QPs are in error")
Fixes: 84cf229f4001 ("RDMA/bnxt_re: Fix the qp table indexing")
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
drivers/infiniband/hw/bnxt_re/qplib_fp.c | 4 ++++
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 13 +++++++++----
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 2 ++
3 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 2ebcb2d..7ad8356 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1532,9 +1532,11 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
u32 tbl_indx;
int rc;
+ spin_lock_bh(&rcfw->tbl_lock);
tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw);
rcfw->qp_tbl[tbl_indx].qp_id = BNXT_QPLIB_QP_ID_INVALID;
rcfw->qp_tbl[tbl_indx].qp_handle = NULL;
+ spin_unlock_bh(&rcfw->tbl_lock);
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_DESTROY_QP,
@@ -1545,8 +1547,10 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc) {
+ spin_lock_bh(&rcfw->tbl_lock);
rcfw->qp_tbl[tbl_indx].qp_id = qp->id;
rcfw->qp_tbl[tbl_indx].qp_handle = qp;
+ spin_unlock_bh(&rcfw->tbl_lock);
return rc;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index ca26b88..e82bd37 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -634,17 +634,21 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
err_event = (struct creq_qp_error_notification *)qp_event;
qp_id = le32_to_cpu(err_event->xid);
+ spin_lock(&rcfw->tbl_lock);
tbl_indx = map_qp_id_to_tbl_indx(qp_id, rcfw);
qp = rcfw->qp_tbl[tbl_indx].qp_handle;
+ if (!qp) {
+ spin_unlock(&rcfw->tbl_lock);
+ break;
+ }
+ bnxt_qplib_mark_qp_error(qp);
+ rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp);
+ spin_unlock(&rcfw->tbl_lock);
dev_dbg(&pdev->dev, "Received QP error notification\n");
dev_dbg(&pdev->dev,
"qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
qp_id, err_event->req_err_state_reason,
err_event->res_err_state_reason);
- if (!qp)
- break;
- bnxt_qplib_mark_qp_error(qp);
- rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp);
break;
default:
/*
@@ -973,6 +977,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
GFP_KERNEL);
if (!rcfw->qp_tbl)
goto fail;
+ spin_lock_init(&rcfw->tbl_lock);
rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 45996e6..07779ae 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -224,6 +224,8 @@ struct bnxt_qplib_rcfw {
struct bnxt_qplib_crsqe *crsqe_tbl;
int qp_tbl_size;
struct bnxt_qplib_qp_node *qp_tbl;
+ /* To synchronize the qp-handle hash table */
+ spinlock_t tbl_lock;
u64 oos_prev;
u32 init_oos_stats;
u32 cmdq_depth;
--
2.5.5
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel
2024-10-14 13:36 [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel Selvin Xavier
2024-10-14 13:36 ` [PATCH for-rc v3 1/2] RDMA/bnxt_re: Fix the usage of control path spin locks Selvin Xavier
2024-10-14 13:36 ` [PATCH for-rc v3 2/2] RDMA/bnxt_re: synchronize the qp-handle table array Selvin Xavier
@ 2024-10-21 16:34 ` Jason Gunthorpe
2 siblings, 0 replies; 4+ messages in thread
From: Jason Gunthorpe @ 2024-10-21 16:34 UTC (permalink / raw)
To: Selvin Xavier
Cc: leon, linux-rdma, andrew.gospodarek, kalesh-anakkur.purayil,
Zhu Yanjun
On Mon, Oct 14, 2024 at 06:36:13AM -0700, Selvin Xavier wrote:
> Hi,
> Posting the v3 series for the 2 patches that was not merged from
> the series. Please review and apply if it is okay to be merged.
>
> Thanks,
> Selvin Xavier
>
> v2 - v3:
> - Only the patches that got deferred from the previous posting of
> this series
> - Addressing Jason's comment to avoid using the lockdep
> annotation for the new spin_lock, as this is not a nested
> lock.
>
> v1 - v2:
> - Add a patch that removes irq variant of spinlock and use
> spin_lock_bh as the control path processing happens from
> tasklet context
> - Address the comments from Zhu Yanjun by initializing the
> newly added spin lock.
> - One more fix included in the series
>
> Selvin Xavier (2):
> RDMA/bnxt_re: Fix the usage of control path spin locks
> RDMA/bnxt_re: synchronize the qp-handle table array
Applied to for-rc, thanks
Jason
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-10-21 16:34 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-14 13:36 [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel Selvin Xavier
2024-10-14 13:36 ` [PATCH for-rc v3 1/2] RDMA/bnxt_re: Fix the usage of control path spin locks Selvin Xavier
2024-10-14 13:36 ` [PATCH for-rc v3 2/2] RDMA/bnxt_re: synchronize the qp-handle table array Selvin Xavier
2024-10-21 16:34 ` [PATCH for-rc v3 0/2] RDMA/bnxt_re: Bug fixes for 6.12 kernel Jason Gunthorpe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).