All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michael Gur <michaelgur@nvidia.com>
To: jgg@ziepe.ca, leon@kernel.org, linux-rdma@vger.kernel.org
Cc: Edward Srouji <edwards@nvidia.com>,
	Yishai Hadas <yishaih@nvidia.com>,
	Patrisious Haddad <phaddad@nvidia.com>,
	Michael Guralnik <michaelgur@nvidia.com>
Subject: [PATCH rdma-next 4/9] RDMA/core: Fix FRMR aging push to queue error flow
Date: Wed, 10 Jun 2026 03:01:40 +0300	[thread overview]
Message-ID: <20260610000145.820592-5-michaelgur@nvidia.com> (raw)
In-Reply-To: <20260610000145.820592-1-michaelgur@nvidia.com>

From: Michael Guralnik <michaelgur@nvidia.com>

Aging pools with pinned handles requires moving handles from the
active queue to a non-empty inactive queue that might fail on new page
allocation, we are currently not handling the fault and leaking any mkey
that fails the push.

Fix by Introducing push_queue_to_queue_locked() that fills the
destination's partial tail page from the source and then splices the
remaining source pages onto the destination, performing no allocation.

Replace the per-handle move loop in age_pinned_pool() and the
open-coded splice in pool_aging_work() with calls to the helper.
As the helper cannot fail under memory pressure, removing a class of
GFP_ATOMIC allocations under the pool lock and simplifying the error
flow.

Fixes: 020d189d16a6 ("RDMA/core: Add pinned handles to FRMR pools")
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
---
 drivers/infiniband/core/frmr_pools.c | 53 ++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/core/frmr_pools.c b/drivers/infiniband/core/frmr_pools.c
index 6170466ea958..927642c06f3a 100644
--- a/drivers/infiniband/core/frmr_pools.c
+++ b/drivers/infiniband/core/frmr_pools.c
@@ -97,13 +97,44 @@ static void destroy_all_handles_in_queue(struct ib_device *device,
 	}
 }
 
+/*
+ * Bulk-move all handles from @src into @dst without allocating new pages.
+ * If @dst has a partial tail page, fill it handle-by-handle from @src first
+ * to preserve the invariant that only the tail page is partial, then splice
+ * the remaining @src pages onto @dst. On return @src is empty.
+ *
+ * Caller must hold the lock protecting both queues.
+ */
+static void splice_frmr_queue_locked(struct frmr_queue *dst,
+				     struct frmr_queue *src)
+{
+	u32 free_in_tail = dst->ci % NUM_HANDLES_PER_PAGE;
+	u32 handle;
+
+	if (free_in_tail) {
+		free_in_tail = NUM_HANDLES_PER_PAGE - free_in_tail;
+		while (free_in_tail && src->ci) {
+			handle = pop_handle_from_queue_locked(src);
+			push_handle_to_queue_locked(dst, handle);
+			free_in_tail--;
+		}
+	}
+
+	if (src->ci > 0) {
+		list_splice_tail_init(&src->pages_list, &dst->pages_list);
+		dst->num_pages += src->num_pages;
+		dst->ci += src->ci;
+		src->num_pages = 0;
+		src->ci = 0;
+	}
+}
+
 static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool)
 {
 	struct ib_frmr_pools *pools = device->frmr_pools;
 	u32 total, to_destroy, destroyed = 0;
 	bool has_work = false;
 	u32 *handles;
-	u32 handle;
 
 	spin_lock(&pool->lock);
 	total = pool->queue.ci + pool->inactive_queue.ci + pool->in_use;
@@ -112,7 +143,7 @@ static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool)
 		return false;
 	}
 
-	to_destroy = total - pool->pinned_handles;
+	to_destroy = min(total - pool->pinned_handles, pool->inactive_queue.ci);
 
 	handles = kcalloc(to_destroy, sizeof(*handles), GFP_ATOMIC);
 	if (!handles) {
@@ -121,15 +152,13 @@ static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool)
 	}
 
 	/* Destroy all excess handles in the inactive queue */
-	while (pool->inactive_queue.ci && destroyed < to_destroy) {
-		handles[destroyed++] = pop_handle_from_queue_locked(
+	for (; destroyed < to_destroy; destroyed++)
+		handles[destroyed] = pop_handle_from_queue_locked(
 			&pool->inactive_queue);
-	}
 
 	/* Move all handles from regular queue to inactive queue */
-	while (pool->queue.ci) {
-		handle = pop_handle_from_queue_locked(&pool->queue);
-		push_handle_to_queue_locked(&pool->inactive_queue, handle);
+	if (pool->queue.ci > 0) {
+		splice_frmr_queue_locked(&pool->inactive_queue, &pool->queue);
 		has_work = true;
 	}
 
@@ -158,13 +187,7 @@ static void pool_aging_work(struct work_struct *work)
 	/* Move all pages from regular queue to inactive queue */
 	spin_lock(&pool->lock);
 	if (pool->queue.ci > 0) {
-		list_splice_tail_init(&pool->queue.pages_list,
-				      &pool->inactive_queue.pages_list);
-		pool->inactive_queue.num_pages = pool->queue.num_pages;
-		pool->inactive_queue.ci = pool->queue.ci;
-
-		pool->queue.num_pages = 0;
-		pool->queue.ci = 0;
+		splice_frmr_queue_locked(&pool->inactive_queue, &pool->queue);
 		has_work = true;
 	}
 	spin_unlock(&pool->lock);
-- 
2.52.0


  parent reply	other threads:[~2026-06-10  0:03 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-10  0:01 [PATCH rdma-next 0/9] FRMR pools fixes Michael Gur
2026-06-10  0:01 ` [PATCH rdma-next 1/9] RDMA/mlx5: Fix mkey creation error flow rollback Michael Gur
2026-06-10  0:01 ` [PATCH rdma-next 2/9] RDMA/mlx5: Fix TPH extraction in FRMR pool key Michael Gur
2026-06-10  0:01 ` [PATCH rdma-next 3/9] RDMA/core: Fix skipped usage for driver built FRMR key Michael Gur
2026-06-10  0:01 ` Michael Gur [this message]
2026-06-10  0:01 ` [PATCH rdma-next 5/9] RDMA/core: Fix FRMR set pinned push error path Michael Gur
2026-06-11  6:06   ` Tao Cui
2026-06-10  0:01 ` [PATCH rdma-next 6/9] RDMA/core: Avoid NULL dereference on FRMR bad usage Michael Gur
2026-06-10  0:01 ` [PATCH rdma-next 7/9] RDMA/core: Fix FRMR handle leak on push failure Michael Gur
2026-06-10  0:01 ` [PATCH rdma-next 8/9] RDMA/core: Add ib_frmr_pool_drop for unrecoverable handles Michael Gur
2026-06-10  0:01 ` [PATCH rdma-next 9/9] RDMA/mlx5: Drop FRMR pool handle on UMR revoke failure Michael Gur
2026-06-10 17:45 ` [PATCH rdma-next 0/9] FRMR pools fixes Jason Gunthorpe
2026-06-10 17:57   ` Michael Gur

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260610000145.820592-5-michaelgur@nvidia.com \
    --to=michaelgur@nvidia.com \
    --cc=edwards@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=leon@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=phaddad@nvidia.com \
    --cc=yishaih@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.