From: Michael Gur <michaelgur@nvidia.com>
To: jgg@ziepe.ca, leon@kernel.org, linux-rdma@vger.kernel.org
Cc: Edward Srouji <edwards@nvidia.com>,
Yishai Hadas <yishaih@nvidia.com>,
Patrisious Haddad <phaddad@nvidia.com>,
Michael Guralnik <michaelgur@nvidia.com>
Subject: [PATCH rdma-next 4/9] RDMA/core: Fix FRMR aging push to queue error flow
Date: Wed, 10 Jun 2026 03:01:40 +0300 [thread overview]
Message-ID: <20260610000145.820592-5-michaelgur@nvidia.com> (raw)
In-Reply-To: <20260610000145.820592-1-michaelgur@nvidia.com>
From: Michael Guralnik <michaelgur@nvidia.com>
Aging pools with pinned handles requires moving handles from the
active queue to a non-empty inactive queue that might fail on new page
allocation, we are currently not handling the fault and leaking any mkey
that fails the push.
Fix by Introducing push_queue_to_queue_locked() that fills the
destination's partial tail page from the source and then splices the
remaining source pages onto the destination, performing no allocation.
Replace the per-handle move loop in age_pinned_pool() and the
open-coded splice in pool_aging_work() with calls to the helper.
As the helper cannot fail under memory pressure, removing a class of
GFP_ATOMIC allocations under the pool lock and simplifying the error
flow.
Fixes: 020d189d16a6 ("RDMA/core: Add pinned handles to FRMR pools")
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
---
drivers/infiniband/core/frmr_pools.c | 53 ++++++++++++++++++++--------
1 file changed, 38 insertions(+), 15 deletions(-)
diff --git a/drivers/infiniband/core/frmr_pools.c b/drivers/infiniband/core/frmr_pools.c
index 6170466ea958..927642c06f3a 100644
--- a/drivers/infiniband/core/frmr_pools.c
+++ b/drivers/infiniband/core/frmr_pools.c
@@ -97,13 +97,44 @@ static void destroy_all_handles_in_queue(struct ib_device *device,
}
}
+/*
+ * Bulk-move all handles from @src into @dst without allocating new pages.
+ * If @dst has a partial tail page, fill it handle-by-handle from @src first
+ * to preserve the invariant that only the tail page is partial, then splice
+ * the remaining @src pages onto @dst. On return @src is empty.
+ *
+ * Caller must hold the lock protecting both queues.
+ */
+static void splice_frmr_queue_locked(struct frmr_queue *dst,
+ struct frmr_queue *src)
+{
+ u32 free_in_tail = dst->ci % NUM_HANDLES_PER_PAGE;
+ u32 handle;
+
+ if (free_in_tail) {
+ free_in_tail = NUM_HANDLES_PER_PAGE - free_in_tail;
+ while (free_in_tail && src->ci) {
+ handle = pop_handle_from_queue_locked(src);
+ push_handle_to_queue_locked(dst, handle);
+ free_in_tail--;
+ }
+ }
+
+ if (src->ci > 0) {
+ list_splice_tail_init(&src->pages_list, &dst->pages_list);
+ dst->num_pages += src->num_pages;
+ dst->ci += src->ci;
+ src->num_pages = 0;
+ src->ci = 0;
+ }
+}
+
static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool)
{
struct ib_frmr_pools *pools = device->frmr_pools;
u32 total, to_destroy, destroyed = 0;
bool has_work = false;
u32 *handles;
- u32 handle;
spin_lock(&pool->lock);
total = pool->queue.ci + pool->inactive_queue.ci + pool->in_use;
@@ -112,7 +143,7 @@ static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool)
return false;
}
- to_destroy = total - pool->pinned_handles;
+ to_destroy = min(total - pool->pinned_handles, pool->inactive_queue.ci);
handles = kcalloc(to_destroy, sizeof(*handles), GFP_ATOMIC);
if (!handles) {
@@ -121,15 +152,13 @@ static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool)
}
/* Destroy all excess handles in the inactive queue */
- while (pool->inactive_queue.ci && destroyed < to_destroy) {
- handles[destroyed++] = pop_handle_from_queue_locked(
+ for (; destroyed < to_destroy; destroyed++)
+ handles[destroyed] = pop_handle_from_queue_locked(
&pool->inactive_queue);
- }
/* Move all handles from regular queue to inactive queue */
- while (pool->queue.ci) {
- handle = pop_handle_from_queue_locked(&pool->queue);
- push_handle_to_queue_locked(&pool->inactive_queue, handle);
+ if (pool->queue.ci > 0) {
+ splice_frmr_queue_locked(&pool->inactive_queue, &pool->queue);
has_work = true;
}
@@ -158,13 +187,7 @@ static void pool_aging_work(struct work_struct *work)
/* Move all pages from regular queue to inactive queue */
spin_lock(&pool->lock);
if (pool->queue.ci > 0) {
- list_splice_tail_init(&pool->queue.pages_list,
- &pool->inactive_queue.pages_list);
- pool->inactive_queue.num_pages = pool->queue.num_pages;
- pool->inactive_queue.ci = pool->queue.ci;
-
- pool->queue.num_pages = 0;
- pool->queue.ci = 0;
+ splice_frmr_queue_locked(&pool->inactive_queue, &pool->queue);
has_work = true;
}
spin_unlock(&pool->lock);
--
2.52.0
next prev parent reply other threads:[~2026-06-10 0:03 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-10 0:01 [PATCH rdma-next 0/9] FRMR pools fixes Michael Gur
2026-06-10 0:01 ` [PATCH rdma-next 1/9] RDMA/mlx5: Fix mkey creation error flow rollback Michael Gur
2026-06-10 0:01 ` [PATCH rdma-next 2/9] RDMA/mlx5: Fix TPH extraction in FRMR pool key Michael Gur
2026-06-10 0:01 ` [PATCH rdma-next 3/9] RDMA/core: Fix skipped usage for driver built FRMR key Michael Gur
2026-06-10 0:01 ` Michael Gur [this message]
2026-06-10 0:01 ` [PATCH rdma-next 5/9] RDMA/core: Fix FRMR set pinned push error path Michael Gur
2026-06-10 0:01 ` [PATCH rdma-next 6/9] RDMA/core: Avoid NULL dereference on FRMR bad usage Michael Gur
2026-06-10 0:01 ` [PATCH rdma-next 7/9] RDMA/core: Fix FRMR handle leak on push failure Michael Gur
2026-06-10 0:01 ` [PATCH rdma-next 8/9] RDMA/core: Add ib_frmr_pool_drop for unrecoverable handles Michael Gur
2026-06-10 0:01 ` [PATCH rdma-next 9/9] RDMA/mlx5: Drop FRMR pool handle on UMR revoke failure Michael Gur
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260610000145.820592-5-michaelgur@nvidia.com \
--to=michaelgur@nvidia.com \
--cc=edwards@nvidia.com \
--cc=jgg@ziepe.ca \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=phaddad@nvidia.com \
--cc=yishaih@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox