* [PATCH rdma-next 1/1] RDMA/core: Fix WARNING in gid_table_release_one
@ 2025-11-04 2:08 Zhu Yanjun
2025-11-04 13:00 ` Jason Gunthorpe
0 siblings, 1 reply; 4+ messages in thread
From: Zhu Yanjun @ 2025-11-04 2:08 UTC (permalink / raw)
To: jgg, leon, linux-rdma; +Cc: Zhu Yanjun, syzbot+b0da83a6c0e2e2bddbd4
From: "Zhu Yanjun" <yanjun.zhu@linux.dev>
GID entry ref leak for dev syz1 index 2 ref=615
...
Call Trace:
<TASK>
ib_device_release+0xd2/0x1c0 drivers/infiniband/core/device.c:509
device_release+0x99/0x1c0 drivers/base/core.c:-1
kobject_cleanup lib/kobject.c:689 [inline]
kobject_release lib/kobject.c:720 [inline]
kref_put include/linux/kref.h:65 [inline]
kobject_put+0x228/0x480 lib/kobject.c:737
process_one_work kernel/workqueue.c:3263 [inline]
process_scheduled_works+0xae1/0x17b0 kernel/workqueue.c:3346
worker_thread+0x8a0/0xda0 kernel/workqueue.c:3427
kthread+0x711/0x8a0 kernel/kthread.c:463
ret_from_fork+0x47c/0x820 arch/x86/kernel/process.c:158
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
</TASK>
When the state of a GID is GID_TABLE_ENTRY_PENDING_DEL, it indicates
that the GID is about to be released soon. Therefore, it does not
appear to be a leak.
The release_gid_table() function also waits for a short period; if the
GID still exists after that, a GID leak warning will be triggered.
Fixes: b150c3862d21 ("IB/core: Introduce GID entry reference counts")
Reported-by: syzbot+b0da83a6c0e2e2bddbd4@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=b0da83a6c0e2e2bddbd4
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
---
drivers/infiniband/core/cache.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 81cf3c902e81..451325ce6fa5 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -800,13 +800,24 @@ static void release_gid_table(struct ib_device *device,
return;
for (i = 0; i < table->sz; i++) {
+ int cnt = 200;
+
if (is_gid_entry_free(table->data_vec[i]))
continue;
- WARN_ONCE(true,
- "GID entry ref leak for dev %s index %d ref=%u\n",
+ WARN_ONCE(table->data_vec[i]->state != GID_TABLE_ENTRY_PENDING_DEL,
+ "GID entry ref leak for dev %s index %d ref=%u, state: %d\n",
dev_name(&device->dev), i,
- kref_read(&table->data_vec[i]->kref));
+ kref_read(&table->data_vec[i]->kref), table->data_vec[i]->state);
+
+ while ((kref_read(&table->data_vec[i]->kref) > 0) && (cnt > 0)) {
+ cnt--;
+ msleep(10);
+ }
+
+ if (cnt <= 0)
+ pr_warn_ratelimited("Possibly kref leak, ref:%u, state: %d\n",
+ kref_read(&table->data_vec[i]->kref), table->data_vec[i]->state);
}
mutex_destroy(&table->lock);
--
2.51.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH rdma-next 1/1] RDMA/core: Fix WARNING in gid_table_release_one
2025-11-04 2:08 [PATCH rdma-next 1/1] RDMA/core: Fix WARNING in gid_table_release_one Zhu Yanjun
@ 2025-11-04 13:00 ` Jason Gunthorpe
2025-11-04 15:39 ` Zhu Yanjun
2025-11-05 13:02 ` Leon Romanovsky
0 siblings, 2 replies; 4+ messages in thread
From: Jason Gunthorpe @ 2025-11-04 13:00 UTC (permalink / raw)
To: Zhu Yanjun; +Cc: leon, linux-rdma, syzbot+b0da83a6c0e2e2bddbd4
On Mon, Nov 03, 2025 at 06:08:45PM -0800, Zhu Yanjun wrote:
> @@ -800,13 +800,24 @@ static void release_gid_table(struct ib_device *device,
> return;
>
> for (i = 0; i < table->sz; i++) {
> + int cnt = 200;
> +
> if (is_gid_entry_free(table->data_vec[i]))
> continue;
>
> - WARN_ONCE(true,
> - "GID entry ref leak for dev %s index %d ref=%u\n",
> + WARN_ONCE(table->data_vec[i]->state != GID_TABLE_ENTRY_PENDING_DEL,
> + "GID entry ref leak for dev %s index %d ref=%u, state: %d\n",
> dev_name(&device->dev), i,
> - kref_read(&table->data_vec[i]->kref));
> + kref_read(&table->data_vec[i]->kref), table->data_vec[i]->state);
> +
> + while ((kref_read(&table->data_vec[i]->kref) > 0) && (cnt > 0)) {
> + cnt--;
> + msleep(10);
> + }
Definately don't want to see this looping.
If it is waiting for the work queue then maybe this should flush the
work queue.
Something like this?
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -799,7 +799,19 @@ static void release_gid_table(struct ib_device *device,
if (!table)
return;
+ mutex_lock(&table->lock);
for (i = 0; i < table->sz; i++) {
+ if (is_gid_entry_free(table->data_vec[i]))
+ continue;
+
+ /*
+ * The entry may be sitting in the WQ waiting for
+ * free_gid_work(), flush it to try to clean it.
+ */
+ mutex_unlock(&table->lock);
+ flush_workqueue(ib_wq);
+ mutex_lock(&table->lock);
+
if (is_gid_entry_free(table->data_vec[i]))
continue;
@@ -808,6 +820,7 @@ static void release_gid_table(struct ib_device *device,
dev_name(&device->dev), i,
kref_read(&table->data_vec[i]->kref));
}
+ mutex_unlock(&table->lock);
mutex_destroy(&table->lock);
kfree(table->data_vec);
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH rdma-next 1/1] RDMA/core: Fix WARNING in gid_table_release_one
2025-11-04 13:00 ` Jason Gunthorpe
@ 2025-11-04 15:39 ` Zhu Yanjun
2025-11-05 13:02 ` Leon Romanovsky
1 sibling, 0 replies; 4+ messages in thread
From: Zhu Yanjun @ 2025-11-04 15:39 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: leon, linux-rdma, syzbot+b0da83a6c0e2e2bddbd4
在 2025/11/4 5:00, Jason Gunthorpe 写道:
> On Mon, Nov 03, 2025 at 06:08:45PM -0800, Zhu Yanjun wrote:
>> @@ -800,13 +800,24 @@ static void release_gid_table(struct ib_device *device,
>> return;
>>
>> for (i = 0; i < table->sz; i++) {
>> + int cnt = 200;
>> +
>> if (is_gid_entry_free(table->data_vec[i]))
>> continue;
>>
>> - WARN_ONCE(true,
>> - "GID entry ref leak for dev %s index %d ref=%u\n",
>> + WARN_ONCE(table->data_vec[i]->state != GID_TABLE_ENTRY_PENDING_DEL,
>> + "GID entry ref leak for dev %s index %d ref=%u, state: %d\n",
>> dev_name(&device->dev), i,
>> - kref_read(&table->data_vec[i]->kref));
>> + kref_read(&table->data_vec[i]->kref), table->data_vec[i]->state);
>> +
>> + while ((kref_read(&table->data_vec[i]->kref) > 0) && (cnt > 0)) {
>> + cnt--;
>> + msleep(10);
>> + }
> Definately don't want to see this looping.
>
> If it is waiting for the work queue then maybe this should flush the
> work queue.
Thanks a lot, Jason. I will delve into your suggestions and follow your
advice to send a new patch for this problem.
Yanjun.Zhu
>
> Something like this?
>
> --- a/drivers/infiniband/core/cache.c
> +++ b/drivers/infiniband/core/cache.c
> @@ -799,7 +799,19 @@ static void release_gid_table(struct ib_device *device,
> if (!table)
> return;
>
> + mutex_lock(&table->lock);
> for (i = 0; i < table->sz; i++) {
> + if (is_gid_entry_free(table->data_vec[i]))
> + continue;
> +
> + /*
> + * The entry may be sitting in the WQ waiting for
> + * free_gid_work(), flush it to try to clean it.
> + */
> + mutex_unlock(&table->lock);
> + flush_workqueue(ib_wq);
> + mutex_lock(&table->lock);
> +
> if (is_gid_entry_free(table->data_vec[i]))
> continue;
>
> @@ -808,6 +820,7 @@ static void release_gid_table(struct ib_device *device,
> dev_name(&device->dev), i,
> kref_read(&table->data_vec[i]->kref));
> }
> + mutex_unlock(&table->lock);
>
> mutex_destroy(&table->lock);
> kfree(table->data_vec);
>
>
--
Best Regards,
Yanjun.Zhu
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH rdma-next 1/1] RDMA/core: Fix WARNING in gid_table_release_one
2025-11-04 13:00 ` Jason Gunthorpe
2025-11-04 15:39 ` Zhu Yanjun
@ 2025-11-05 13:02 ` Leon Romanovsky
1 sibling, 0 replies; 4+ messages in thread
From: Leon Romanovsky @ 2025-11-05 13:02 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Zhu Yanjun, linux-rdma, syzbot+b0da83a6c0e2e2bddbd4
On Tue, Nov 04, 2025 at 09:00:01AM -0400, Jason Gunthorpe wrote:
> On Mon, Nov 03, 2025 at 06:08:45PM -0800, Zhu Yanjun wrote:
> > @@ -800,13 +800,24 @@ static void release_gid_table(struct ib_device *device,
> > return;
> >
> > for (i = 0; i < table->sz; i++) {
> > + int cnt = 200;
> > +
> > if (is_gid_entry_free(table->data_vec[i]))
> > continue;
> >
> > - WARN_ONCE(true,
> > - "GID entry ref leak for dev %s index %d ref=%u\n",
> > + WARN_ONCE(table->data_vec[i]->state != GID_TABLE_ENTRY_PENDING_DEL,
> > + "GID entry ref leak for dev %s index %d ref=%u, state: %d\n",
> > dev_name(&device->dev), i,
> > - kref_read(&table->data_vec[i]->kref));
> > + kref_read(&table->data_vec[i]->kref), table->data_vec[i]->state);
> > +
> > + while ((kref_read(&table->data_vec[i]->kref) > 0) && (cnt > 0)) {
> > + cnt--;
> > + msleep(10);
> > + }
>
> Definately don't want to see this looping.
>
> If it is waiting for the work queue then maybe this should flush the
> work queue.
>
> Something like this?
>
> --- a/drivers/infiniband/core/cache.c
> +++ b/drivers/infiniband/core/cache.c
> @@ -799,7 +799,19 @@ static void release_gid_table(struct ib_device *device,
> if (!table)
> return;
>
> + mutex_lock(&table->lock);
> for (i = 0; i < table->sz; i++) {
> + if (is_gid_entry_free(table->data_vec[i]))
> + continue;
> +
> + /*
> + * The entry may be sitting in the WQ waiting for
> + * free_gid_work(), flush it to try to clean it.
> + */
> + mutex_unlock(&table->lock);
> + flush_workqueue(ib_wq);
> + mutex_lock(&table->lock);
I don't think that this is right thing to do. If you want, you can call
to flush_workqueue(ib_wq) in ib_cache_release_one().
Thanks
> +
> if (is_gid_entry_free(table->data_vec[i]))
> continue;
>
> @@ -808,6 +820,7 @@ static void release_gid_table(struct ib_device *device,
> dev_name(&device->dev), i,
> kref_read(&table->data_vec[i]->kref));
> }
> + mutex_unlock(&table->lock);
>
> mutex_destroy(&table->lock);
> kfree(table->data_vec);
>
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-11-05 13:02 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-04 2:08 [PATCH rdma-next 1/1] RDMA/core: Fix WARNING in gid_table_release_one Zhu Yanjun
2025-11-04 13:00 ` Jason Gunthorpe
2025-11-04 15:39 ` Zhu Yanjun
2025-11-05 13:02 ` Leon Romanovsky
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).