* [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
@ 2024-07-11 1:40 Honggang LI
2024-07-11 3:06 ` Greg Sword
0 siblings, 1 reply; 7+ messages in thread
From: Honggang LI @ 2024-07-11 1:40 UTC (permalink / raw)
To: zyjzyj2000; +Cc: jgg, leon, rpearsonhpe, linux-rdma, Honggang LI
If ib_req_notify_cq() was called in complete handler, deadlock occurs
in receive path.
rxe_req_notify_cq+0x21/0x70 [rdma_rxe]
krping_cq_event_handler+0x26f/0x2c0 [rdma_krping]
rxe_cq_post+0x128/0x1d0 [rdma_rxe]
? copy_data+0xe0/0x230 [rdma_rxe]
rxe_responder+0xbe8/0x23a0 [rdma_rxe]
do_task+0x68/0x1e0 [rdma_rxe]
? __pfx_rxe_udp_encap_recv+0x10/0x10 [rdma_rxe]
rxe_udp_encap_recv+0x79/0xe0 [rdma_rxe]
udp_queue_rcv_one_skb+0x272/0x540
udp_unicast_rcv_skb+0x76/0x90
__udp4_lib_rcv+0xab2/0xd60
? raw_local_deliver+0xd2/0x2a0
ip_protocol_deliver_rcu+0xd1/0x1b0
ip_local_deliver_finish+0x76/0xa0
ip_local_deliver+0x68/0x100
? ip_rcv_finish_core.isra.0+0xc2/0x430
ip_sublist_rcv+0x2a0/0x340
ip_list_rcv+0x13b/0x170
__netif_receive_skb_list_core+0x2bb/0x2e0
netif_receive_skb_list_internal+0x1cd/0x300
napi_complete_done+0x72/0x200
e1000e_poll+0xcf/0x320 [e1000e]
__napi_poll+0x2b/0x160
net_rx_action+0x2c6/0x3b0
? enqueue_hrtimer+0x42/0xa0
handle_softirqs+0xf7/0x340
? sched_clock_cpu+0xf/0x1f0
__irq_exit_rcu+0x97/0xb0
common_interrupt+0x85/0xa0
Fixes: 0c7e314a6352 ("RDMA/rxe: Fix rxe_cq_post")
Fixes: 78b26a335310 ("RDMA/rxe: Remove tasklet call from rxe_cq.c")
Signed-off-by: Honggang LI <honggangli@163.com>
---
drivers/infiniband/sw/rxe/rxe_cq.c | 35 ++++++++++++++++++++++++---
drivers/infiniband/sw/rxe/rxe_loc.h | 2 ++
drivers/infiniband/sw/rxe/rxe_verbs.c | 2 ++
drivers/infiniband/sw/rxe/rxe_verbs.h | 2 ++
4 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index fec87c9030ab..97a537994aee 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -39,6 +39,21 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
return -EINVAL;
}
+static void rxe_send_complete(struct tasklet_struct *t)
+{
+ struct rxe_cq *cq = from_tasklet(cq, t, comp_task);
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ if (cq->is_dying) {
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_udata *udata,
struct rxe_create_cq_resp __user *uresp)
@@ -64,6 +79,10 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
cq->is_user = uresp;
+ cq->is_dying = false;
+
+ tasklet_setup(&cq->comp_task, rxe_send_complete);
+
spin_lock_init(&cq->cq_lock);
cq->ibcq.cqe = cqe;
return 0;
@@ -84,7 +103,6 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe,
return err;
}
-/* caller holds reference to cq */
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
{
struct ib_event ev;
@@ -113,17 +131,26 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
if ((cq->notify & IB_CQ_NEXT_COMP) ||
(cq->notify & IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
- cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+ tasklet_schedule(&cq->comp_task);
}
- spin_unlock_irqrestore(&cq->cq_lock, flags);
-
return 0;
}
+void rxe_cq_disable(struct rxe_cq *cq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ cq->is_dying = true;
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+}
+
void rxe_cq_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_cq *cq = container_of(elem, typeof(*cq), elem);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index ded46119151b..ba84f780aa3d 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -31,6 +31,8 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe,
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
+void rxe_cq_disable(struct rxe_cq *cq);
+
void rxe_cq_cleanup(struct rxe_pool_elem *elem);
/* rxe_mcast.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index de6238ee4379..a964d86789f6 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1205,6 +1205,8 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
goto err_out;
}
+ rxe_cq_disable(cq);
+
err = rxe_cleanup(cq);
if (err)
rxe_err_cq(cq, "cleanup failed, err = %d\n", err);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 3c1354f82283..03df97e83570 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -63,7 +63,9 @@ struct rxe_cq {
struct rxe_queue *queue;
spinlock_t cq_lock;
u8 notify;
+ bool is_dying;
bool is_user;
+ struct tasklet_struct comp_task;
atomic_t num_wq;
};
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
2024-07-11 1:40 [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c Honggang LI
@ 2024-07-11 3:06 ` Greg Sword
2024-07-11 7:01 ` Honggang LI
0 siblings, 1 reply; 7+ messages in thread
From: Greg Sword @ 2024-07-11 3:06 UTC (permalink / raw)
To: Honggang LI; +Cc: zyjzyj2000, jgg, leon, rpearsonhpe, linux-rdma
On Thu, Jul 11, 2024 at 9:41 AM Honggang LI <honggangli@163.com> wrote:
>
> If ib_req_notify_cq() was called in complete handler, deadlock occurs
> in receive path.
>
> rxe_req_notify_cq+0x21/0x70 [rdma_rxe]
> krping_cq_event_handler+0x26f/0x2c0 [rdma_krping]
What is rdma_krping? What is the deadlock?
Please explain the deadlock in details.
> rxe_cq_post+0x128/0x1d0 [rdma_rxe]
> ? copy_data+0xe0/0x230 [rdma_rxe]
> rxe_responder+0xbe8/0x23a0 [rdma_rxe]
> do_task+0x68/0x1e0 [rdma_rxe]
> ? __pfx_rxe_udp_encap_recv+0x10/0x10 [rdma_rxe]
> rxe_udp_encap_recv+0x79/0xe0 [rdma_rxe]
> udp_queue_rcv_one_skb+0x272/0x540
> udp_unicast_rcv_skb+0x76/0x90
> __udp4_lib_rcv+0xab2/0xd60
> ? raw_local_deliver+0xd2/0x2a0
> ip_protocol_deliver_rcu+0xd1/0x1b0
> ip_local_deliver_finish+0x76/0xa0
> ip_local_deliver+0x68/0x100
> ? ip_rcv_finish_core.isra.0+0xc2/0x430
> ip_sublist_rcv+0x2a0/0x340
> ip_list_rcv+0x13b/0x170
> __netif_receive_skb_list_core+0x2bb/0x2e0
> netif_receive_skb_list_internal+0x1cd/0x300
> napi_complete_done+0x72/0x200
> e1000e_poll+0xcf/0x320 [e1000e]
> __napi_poll+0x2b/0x160
> net_rx_action+0x2c6/0x3b0
> ? enqueue_hrtimer+0x42/0xa0
> handle_softirqs+0xf7/0x340
> ? sched_clock_cpu+0xf/0x1f0
> __irq_exit_rcu+0x97/0xb0
> common_interrupt+0x85/0xa0
>
> Fixes: 0c7e314a6352 ("RDMA/rxe: Fix rxe_cq_post")
> Fixes: 78b26a335310 ("RDMA/rxe: Remove tasklet call from rxe_cq.c")
> Signed-off-by: Honggang LI <honggangli@163.com>
> ---
> drivers/infiniband/sw/rxe/rxe_cq.c | 35 ++++++++++++++++++++++++---
> drivers/infiniband/sw/rxe/rxe_loc.h | 2 ++
> drivers/infiniband/sw/rxe/rxe_verbs.c | 2 ++
> drivers/infiniband/sw/rxe/rxe_verbs.h | 2 ++
> 4 files changed, 37 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
> index fec87c9030ab..97a537994aee 100644
> --- a/drivers/infiniband/sw/rxe/rxe_cq.c
> +++ b/drivers/infiniband/sw/rxe/rxe_cq.c
> @@ -39,6 +39,21 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
> return -EINVAL;
> }
>
> +static void rxe_send_complete(struct tasklet_struct *t)
> +{
> + struct rxe_cq *cq = from_tasklet(cq, t, comp_task);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&cq->cq_lock, flags);
> + if (cq->is_dying) {
> + spin_unlock_irqrestore(&cq->cq_lock, flags);
> + return;
> + }
> + spin_unlock_irqrestore(&cq->cq_lock, flags);
> +
> + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
> +}
> +
> int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
> int comp_vector, struct ib_udata *udata,
> struct rxe_create_cq_resp __user *uresp)
> @@ -64,6 +79,10 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
>
> cq->is_user = uresp;
>
> + cq->is_dying = false;
> +
> + tasklet_setup(&cq->comp_task, rxe_send_complete);
> +
> spin_lock_init(&cq->cq_lock);
> cq->ibcq.cqe = cqe;
> return 0;
> @@ -84,7 +103,6 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe,
> return err;
> }
>
> -/* caller holds reference to cq */
> int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
> {
> struct ib_event ev;
> @@ -113,17 +131,26 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
>
> queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
>
> + spin_unlock_irqrestore(&cq->cq_lock, flags);
> +
> if ((cq->notify & IB_CQ_NEXT_COMP) ||
> (cq->notify & IB_CQ_SOLICITED && solicited)) {
> cq->notify = 0;
> - cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
> + tasklet_schedule(&cq->comp_task);
> }
>
> - spin_unlock_irqrestore(&cq->cq_lock, flags);
> -
> return 0;
> }
>
> +void rxe_cq_disable(struct rxe_cq *cq)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&cq->cq_lock, flags);
> + cq->is_dying = true;
> + spin_unlock_irqrestore(&cq->cq_lock, flags);
> +}
> +
> void rxe_cq_cleanup(struct rxe_pool_elem *elem)
> {
> struct rxe_cq *cq = container_of(elem, typeof(*cq), elem);
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index ded46119151b..ba84f780aa3d 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -31,6 +31,8 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe,
>
> int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
>
> +void rxe_cq_disable(struct rxe_cq *cq);
> +
> void rxe_cq_cleanup(struct rxe_pool_elem *elem);
>
> /* rxe_mcast.c */
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
> index de6238ee4379..a964d86789f6 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.c
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
> @@ -1205,6 +1205,8 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
> goto err_out;
> }
>
> + rxe_cq_disable(cq);
> +
> err = rxe_cleanup(cq);
> if (err)
> rxe_err_cq(cq, "cleanup failed, err = %d\n", err);
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index 3c1354f82283..03df97e83570 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -63,7 +63,9 @@ struct rxe_cq {
> struct rxe_queue *queue;
> spinlock_t cq_lock;
> u8 notify;
> + bool is_dying;
> bool is_user;
> + struct tasklet_struct comp_task;
> atomic_t num_wq;
> };
>
> --
> 2.45.2
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
2024-07-11 3:06 ` Greg Sword
@ 2024-07-11 7:01 ` Honggang LI
2024-07-11 13:46 ` Zhu Yanjun
2024-07-11 23:25 ` Zhu Yanjun
0 siblings, 2 replies; 7+ messages in thread
From: Honggang LI @ 2024-07-11 7:01 UTC (permalink / raw)
To: Greg Sword; +Cc: zyjzyj2000, jgg, leon, rpearsonhpe, linux-rdma
On Thu, Jul 11, 2024 at 11:06:06AM +0800, Greg Sword wrote:
> Subject: Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
> From: Greg Sword <gregsword0@gmail.com>
> Date: Thu, 11 Jul 2024 11:06:06 +0800
>
> On Thu, Jul 11, 2024 at 9:41 AM Honggang LI <honggangli@163.com> wrote:
> >
> > If ib_req_notify_cq() was called in complete handler, deadlock occurs
> > in receive path.
> >
> > rxe_req_notify_cq+0x21/0x70 [rdma_rxe]
> > krping_cq_event_handler+0x26f/0x2c0 [rdma_krping]
>
> What is rdma_krping? What is the deadlock?
https://github.com/larrystevenwise/krping.git
> Please explain the deadlock in details.
88 int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
89 {
90 struct ib_event ev;
91 int full;
92 void *addr;
93 unsigned long flags;
94
95 spin_lock_irqsave(&cq->cq_lock, flags); // Lock!
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
96
97 full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
98 if (unlikely(full)) {
99 rxe_err_cq(cq, "queue full\n");
100 spin_unlock_irqrestore(&cq->cq_lock, flags);
101 if (cq->ibcq.event_handler) {
102 ev.device = cq->ibcq.device;
103 ev.element.cq = &cq->ibcq;
104 ev.event = IB_EVENT_CQ_ERR;
105 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
106 }
107
108 return -EBUSY;
109 }
110
111 addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT);
112 memcpy(addr, cqe, sizeof(*cqe));
113
114 queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
115
116 if ((cq->notify & IB_CQ_NEXT_COMP) ||
117 (cq->notify & IB_CQ_SOLICITED && solicited)) {
118 cq->notify = 0;
119 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
call the complete handler krping_cq_event_handler()
120 }
121
122 spin_unlock_irqrestore(&cq->cq_lock, flags);
static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
{
struct krping_cb *cb = ctx;
struct ib_wc wc;
const struct ib_recv_wr *bad_wr;
int ret;
BUG_ON(cb->cq != cq);
if (cb->state == ERROR) {
printk(KERN_ERR PFX "cq completion in ERROR state\n");
return;
}
if (cb->frtest) {
printk(KERN_ERR PFX "cq completion event in frtest!\n");
return;
}
if (!cb->wlat && !cb->rlat && !cb->bw)
ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
if (wc.status) {
static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct rxe_cq *cq = to_rcq(ibcq);
int ret = 0;
int empty;
unsigned long irq_flags;
spin_lock_irqsave(&cq->cq_lock, irq_flags);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Deadlock
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
2024-07-11 7:01 ` Honggang LI
@ 2024-07-11 13:46 ` Zhu Yanjun
2024-07-12 2:46 ` Honggang LI
2024-07-11 23:25 ` Zhu Yanjun
1 sibling, 1 reply; 7+ messages in thread
From: Zhu Yanjun @ 2024-07-11 13:46 UTC (permalink / raw)
To: Honggang LI, Greg Sword; +Cc: zyjzyj2000, jgg, leon, rpearsonhpe, linux-rdma
在 2024/7/11 9:01, Honggang LI 写道:
> On Thu, Jul 11, 2024 at 11:06:06AM +0800, Greg Sword wrote:
>> Subject: Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
>> From: Greg Sword <gregsword0@gmail.com>
>> Date: Thu, 11 Jul 2024 11:06:06 +0800
>>
>> On Thu, Jul 11, 2024 at 9:41 AM Honggang LI <honggangli@163.com> wrote:
>>>
>>> If ib_req_notify_cq() was called in complete handler, deadlock occurs
>>> in receive path.
>>>
>>> rxe_req_notify_cq+0x21/0x70 [rdma_rxe]
>>> krping_cq_event_handler+0x26f/0x2c0 [rdma_krping]
>>
>> What is rdma_krping? What is the deadlock?
>
> https://github.com/larrystevenwise/krping.git
>
>> Please explain the deadlock in details.
I read the discussion carefully. I have the following:
1). This problem is not from RXE. It seems to be related with krping
modules. As such, the root cause is not in RXE. It is not good to fix
this problem in RXE.
2). In the kernel upstream, tasklet is marked obsolete and has some
design flaws. So replacing workqueue with tasklet in RXE does not keep
up with the kernel upstream.
https://patchwork.kernel.org/project/linux-rdma/cover/20240621050525.3720069-1-allen.lkml@gmail.com/
In this link, there are some work to replace tasklet with BH workqueue.
As such, it is not good to replace workqueue with tasklet.
From the above, to now I can not agree with you. This is just my 2-cent
suggestions.
I am not sure if others have better suggestions about this commit or not.
Zhu Yanjun
>
> 88 int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
> 89 {
> 90 struct ib_event ev;
> 91 int full;
> 92 void *addr;
> 93 unsigned long flags;
> 94
> 95 spin_lock_irqsave(&cq->cq_lock, flags); // Lock!
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 96
> 97 full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
> 98 if (unlikely(full)) {
> 99 rxe_err_cq(cq, "queue full\n");
> 100 spin_unlock_irqrestore(&cq->cq_lock, flags);
> 101 if (cq->ibcq.event_handler) {
> 102 ev.device = cq->ibcq.device;
> 103 ev.element.cq = &cq->ibcq;
> 104 ev.event = IB_EVENT_CQ_ERR;
> 105 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
> 106 }
> 107
> 108 return -EBUSY;
> 109 }
> 110
> 111 addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT);
> 112 memcpy(addr, cqe, sizeof(*cqe));
> 113
> 114 queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
> 115
> 116 if ((cq->notify & IB_CQ_NEXT_COMP) ||
> 117 (cq->notify & IB_CQ_SOLICITED && solicited)) {
> 118 cq->notify = 0;
> 119 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> call the complete handler krping_cq_event_handler()
> 120 }
> 121
> 122 spin_unlock_irqrestore(&cq->cq_lock, flags);
>
>
>
> static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
> {
> struct krping_cb *cb = ctx;
> struct ib_wc wc;
> const struct ib_recv_wr *bad_wr;
> int ret;
>
> BUG_ON(cb->cq != cq);
> if (cb->state == ERROR) {
> printk(KERN_ERR PFX "cq completion in ERROR state\n");
> return;
> }
> if (cb->frtest) {
> printk(KERN_ERR PFX "cq completion event in frtest!\n");
> return;
> }
> if (!cb->wlat && !cb->rlat && !cb->bw)
> ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
> if (wc.status) {
>
> static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
> {
> struct rxe_cq *cq = to_rcq(ibcq);
> int ret = 0;
> int empty;
> unsigned long irq_flags;
>
> spin_lock_irqsave(&cq->cq_lock, irq_flags);
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Deadlock
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
2024-07-11 7:01 ` Honggang LI
2024-07-11 13:46 ` Zhu Yanjun
@ 2024-07-11 23:25 ` Zhu Yanjun
2024-09-15 12:26 ` Zhu Yanjun
1 sibling, 1 reply; 7+ messages in thread
From: Zhu Yanjun @ 2024-07-11 23:25 UTC (permalink / raw)
To: Honggang LI, Greg Sword; +Cc: zyjzyj2000, jgg, leon, rpearsonhpe, linux-rdma
在 2024/7/11 9:01, Honggang LI 写道:
> On Thu, Jul 11, 2024 at 11:06:06AM +0800, Greg Sword wrote:
>> Subject: Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
>> From: Greg Sword <gregsword0@gmail.com>
>> Date: Thu, 11 Jul 2024 11:06:06 +0800
>>
>> On Thu, Jul 11, 2024 at 9:41 AM Honggang LI <honggangli@163.com> wrote:
>>>
>>> If ib_req_notify_cq() was called in complete handler, deadlock occurs
>>> in receive path.
>>>
>>> rxe_req_notify_cq+0x21/0x70 [rdma_rxe]
>>> krping_cq_event_handler+0x26f/0x2c0 [rdma_krping]
>>
>> What is rdma_krping? What is the deadlock?
>
> https://github.com/larrystevenwise/krping.git
>
>> Please explain the deadlock in details.
>
> 88 int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
> 89 {
> 90 struct ib_event ev;
> 91 int full;
> 92 void *addr;
> 93 unsigned long flags;
> 94
> 95 spin_lock_irqsave(&cq->cq_lock, flags); // Lock!
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 96
> 97 full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
> 98 if (unlikely(full)) {
> 99 rxe_err_cq(cq, "queue full\n");
> 100 spin_unlock_irqrestore(&cq->cq_lock, flags);
> 101 if (cq->ibcq.event_handler) {
> 102 ev.device = cq->ibcq.device;
> 103 ev.element.cq = &cq->ibcq;
> 104 ev.event = IB_EVENT_CQ_ERR;
> 105 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
> 106 }
> 107
> 108 return -EBUSY;
> 109 }
> 110
> 111 addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT);
> 112 memcpy(addr, cqe, sizeof(*cqe));
> 113
> 114 queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
> 115
> 116 if ((cq->notify & IB_CQ_NEXT_COMP) ||
> 117 (cq->notify & IB_CQ_SOLICITED && solicited)) {
> 118 cq->notify = 0;
> 119 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> call the complete handler krping_cq_event_handler()
> 120 }
> 121
> 122 spin_unlock_irqrestore(&cq->cq_lock, flags);
>
>
>
> static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
> {
> struct krping_cb *cb = ctx;
> struct ib_wc wc;
> const struct ib_recv_wr *bad_wr;
> int ret;
>
> BUG_ON(cb->cq != cq);
> if (cb->state == ERROR) {
> printk(KERN_ERR PFX "cq completion in ERROR state\n");
> return;
> }
> if (cb->frtest) {
> printk(KERN_ERR PFX "cq completion event in frtest!\n");
> return;
> }
> if (!cb->wlat && !cb->rlat && !cb->bw)
> ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP) > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IMO, here, we can use a BH workqueue to execute this notification? Or
add an event handler?
Please reference other ULP kernel modules.
Thanks,
Zhu Yanjun
> while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
> if (wc.status) {
>
> static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
> {
> struct rxe_cq *cq = to_rcq(ibcq);
> int ret = 0;
> int empty;
> unsigned long irq_flags;
>
> spin_lock_irqsave(&cq->cq_lock, irq_flags);
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Deadlock
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
2024-07-11 13:46 ` Zhu Yanjun
@ 2024-07-12 2:46 ` Honggang LI
0 siblings, 0 replies; 7+ messages in thread
From: Honggang LI @ 2024-07-12 2:46 UTC (permalink / raw)
To: Zhu Yanjun; +Cc: Greg Sword, zyjzyj2000, jgg, leon, rpearsonhpe, linux-rdma
On Thu, Jul 11, 2024 at 03:46:24PM +0200, Zhu Yanjun wrote:
> Subject: Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
> >
> > > Please explain the deadlock in details.
>
> I read the discussion carefully. I have the following:
> 1). This problem is not from RXE. It seems to be related with krping
> modules. As such, the root cause is not in RXE. It is not good to fix this
> problem in RXE.
Can't say it a problem in krping. As krping works over irdma/mlx5/siw.
The deadlock only happens with rxe.
>
> 2). In the kernel upstream, tasklet is marked obsolete and has some design
> flaws. So replacing workqueue with tasklet in RXE does not keep up with the
> kernel upstream.
>
> https://patchwork.kernel.org/project/linux-rdma/cover/20240621050525.3720069-1-allen.lkml@gmail.com/
> In this link, there are some work to replace tasklet with BH workqueue.
> As such, it is not good to replace workqueue with tasklet.
Yes, you are right. Tasklet is obsoleted. We need a better solution for
this issue. Please feel free to drop the patch.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
2024-07-11 23:25 ` Zhu Yanjun
@ 2024-09-15 12:26 ` Zhu Yanjun
0 siblings, 0 replies; 7+ messages in thread
From: Zhu Yanjun @ 2024-09-15 12:26 UTC (permalink / raw)
To: Honggang LI, Greg Sword; +Cc: zyjzyj2000, jgg, leon, rpearsonhpe, linux-rdma
在 2024/7/12 7:25, Zhu Yanjun 写道:
> 在 2024/7/11 9:01, Honggang LI 写道:
>> On Thu, Jul 11, 2024 at 11:06:06AM +0800, Greg Sword wrote:
>>> Subject: Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
>>> From: Greg Sword <gregsword0@gmail.com>
>>> Date: Thu, 11 Jul 2024 11:06:06 +0800
>>>
>>> On Thu, Jul 11, 2024 at 9:41 AM Honggang LI <honggangli@163.com> wrote:
>>>>
>>>> If ib_req_notify_cq() was called in complete handler, deadlock occurs
>>>> in receive path.
>>>>
>>>> rxe_req_notify_cq+0x21/0x70 [rdma_rxe]
>>>> krping_cq_event_handler+0x26f/0x2c0 [rdma_krping]
>>>
>>> What is rdma_krping? What is the deadlock?
>>
>> https://github.com/larrystevenwise/krping.git
>>
>>> Please explain the deadlock in details.
>>
>> 88 int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int
>> solicited)
>> 89 {
>> 90 struct ib_event ev;
>> 91 int full;
>> 92 void *addr;
>> 93 unsigned long flags;
>> 94
>> 95 spin_lock_irqsave(&cq->cq_lock, flags); // Lock!
>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>> 96
>> 97 full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
>> 98 if (unlikely(full)) {
>> 99 rxe_err_cq(cq, "queue full\n");
>> 100 spin_unlock_irqrestore(&cq->cq_lock, flags);
>> 101 if (cq->ibcq.event_handler) {
>> 102 ev.device = cq->ibcq.device;
>> 103 ev.element.cq = &cq->ibcq;
>> 104 ev.event = IB_EVENT_CQ_ERR;
>> 105 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
>> 106 }
>> 107
>> 108 return -EBUSY;
>> 109 }
>> 110
>> 111 addr = queue_producer_addr(cq->queue,
>> QUEUE_TYPE_TO_CLIENT);
>> 112 memcpy(addr, cqe, sizeof(*cqe));
>> 113
>> 114 queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
>> 115
>> 116 if ((cq->notify & IB_CQ_NEXT_COMP) ||
>> 117 (cq->notify & IB_CQ_SOLICITED && solicited)) {
>> 118 cq->notify = 0;
>> 119 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>> call the complete handler krping_cq_event_handler()
Today I moved krping_cq_event_handler into a workqueue. And this krping
can work well in SoftRoCE.
That is, I modifed the source code of krping and moved this
krping_cq_event_handler into the system workqueue.
Then I run the krping tests on SoftRoCE. The krping tests work well on
SoftRoCE.
Zhu Yanjun
>> 120 }
>> 121
>> 122 spin_unlock_irqrestore(&cq->cq_lock, flags);
>>
>>
>>
>> static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
>> {
>> struct krping_cb *cb = ctx;
>> struct ib_wc wc;
>> const struct ib_recv_wr *bad_wr;
>> int ret;
>>
>> BUG_ON(cb->cq != cq);
>> if (cb->state == ERROR) {
>> printk(KERN_ERR PFX "cq completion in ERROR state\n");
>> return;
>> }
>> if (cb->frtest) {
>> printk(KERN_ERR PFX "cq completion event in
>> frtest!\n");
>> return;
>> }
>> if (!cb->wlat && !cb->rlat && !cb->bw)
>> ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP) >
>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>
> IMO, here, we can use a BH workqueue to execute this notification? Or
> add an event handler?
>
> Please reference other ULP kernel modules.
>
> Thanks,
> Zhu Yanjun
>
>> while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
>> if (wc.status) {
>>
>> static int rxe_req_notify_cq(struct ib_cq *ibcq, enum
>> ib_cq_notify_flags flags)
>> {
>> struct rxe_cq *cq = to_rcq(ibcq);
>> int ret = 0;
>> int empty;
>> unsigned long irq_flags;
>>
>> spin_lock_irqsave(&cq->cq_lock, irq_flags);
>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Deadlock
>>
>
--
Best Regards,
Yanjun.Zhu
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2024-09-15 12:27 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-11 1:40 [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c Honggang LI
2024-07-11 3:06 ` Greg Sword
2024-07-11 7:01 ` Honggang LI
2024-07-11 13:46 ` Zhu Yanjun
2024-07-12 2:46 ` Honggang LI
2024-07-11 23:25 ` Zhu Yanjun
2024-09-15 12:26 ` Zhu Yanjun
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).