From: Edward Srouji <edwards@nvidia.com>
To: Jason Gunthorpe <jgg@ziepe.ca>, Leon Romanovsky <leon@kernel.org>,
"Saeed Mahameed" <saeedm@nvidia.com>,
Tariq Toukan <tariqt@nvidia.com>, Mark Bloch <mbloch@nvidia.com>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: <linux-kernel@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
<netdev@vger.kernel.org>,
Michael Guralnik <michaelgur@nvidia.com>,
"Edward Srouji" <edwards@nvidia.com>,
Yishai Hadas <yishaih@nvidia.com>
Subject: [PATCH rdma-next v3 03/11] RDMA/core: Add aging to FRMR pools
Date: Mon, 2 Feb 2026 17:59:55 +0200 [thread overview]
Message-ID: <20260202-frmr_pools-v3-3-b8405ed9deba@nvidia.com> (raw)
In-Reply-To: <20260202-frmr_pools-v3-0-b8405ed9deba@nvidia.com>
From: Michael Guralnik <michaelgur@nvidia.com>
Add aging mechanism to handles of FRMR pools.
Keep the handles stored in FRMR pools for at least 1 minute for
application to reuse, destroy all handles which were not reused.
Add a new queue to each pool to accomplish that.
Upon aging trigger, destroy all FRMR handles from the new 'inactive'
queue and move all handles from the 'active' pool to the 'inactive' pool.
This ensures all destroyed handles were not reused for at least one aging
time period and were not held longer than 2 aging time periods.
Handles from the inactive queue will be popped only if the active queue is
empty.
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Edward Srouji <edwards@nvidia.com>
---
drivers/infiniband/core/frmr_pools.c | 84 ++++++++++++++++++++++++++++++++----
drivers/infiniband/core/frmr_pools.h | 7 +++
2 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/core/frmr_pools.c b/drivers/infiniband/core/frmr_pools.c
index eae15894a3b2..c0b2770df8bf 100644
--- a/drivers/infiniband/core/frmr_pools.c
+++ b/drivers/infiniband/core/frmr_pools.c
@@ -8,9 +8,12 @@
#include <linux/sort.h>
#include <linux/spinlock.h>
#include <rdma/ib_verbs.h>
+#include <linux/timer.h>
#include "frmr_pools.h"
+#define FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS 60
+
static int push_handle_to_queue_locked(struct frmr_queue *queue, u32 handle)
{
u32 tmp = queue->ci % NUM_HANDLES_PER_PAGE;
@@ -80,19 +83,58 @@ static bool pop_frmr_handles_page(struct ib_frmr_pool *pool,
return true;
}
-static void destroy_frmr_pool(struct ib_device *device,
- struct ib_frmr_pool *pool)
+static void destroy_all_handles_in_queue(struct ib_device *device,
+ struct ib_frmr_pool *pool,
+ struct frmr_queue *queue)
{
struct ib_frmr_pools *pools = device->frmr_pools;
struct frmr_handles_page *page;
u32 count;
- while (pop_frmr_handles_page(pool, &pool->queue, &page, &count)) {
+ while (pop_frmr_handles_page(pool, queue, &page, &count)) {
pools->pool_ops->destroy_frmrs(device, page->handles, count);
kfree(page);
}
+}
+
+static void pool_aging_work(struct work_struct *work)
+{
+ struct ib_frmr_pool *pool = container_of(
+ to_delayed_work(work), struct ib_frmr_pool, aging_work);
+ struct ib_frmr_pools *pools = pool->device->frmr_pools;
+ bool has_work = false;
+
+ destroy_all_handles_in_queue(pool->device, pool, &pool->inactive_queue);
+
+ /* Move all pages from regular queue to inactive queue */
+ spin_lock(&pool->lock);
+ if (pool->queue.ci > 0) {
+ list_splice_tail_init(&pool->queue.pages_list,
+ &pool->inactive_queue.pages_list);
+ pool->inactive_queue.num_pages = pool->queue.num_pages;
+ pool->inactive_queue.ci = pool->queue.ci;
+
+ pool->queue.num_pages = 0;
+ pool->queue.ci = 0;
+ has_work = true;
+ }
+ spin_unlock(&pool->lock);
- rb_erase(&pool->node, &pools->rb_root);
+ /* Reschedule if there are handles to age in next aging period */
+ if (has_work)
+ queue_delayed_work(
+ pools->aging_wq, &pool->aging_work,
+ secs_to_jiffies(FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS));
+}
+
+static void destroy_frmr_pool(struct ib_device *device,
+ struct ib_frmr_pool *pool)
+{
+ cancel_delayed_work_sync(&pool->aging_work);
+ destroy_all_handles_in_queue(device, pool, &pool->queue);
+ destroy_all_handles_in_queue(device, pool, &pool->inactive_queue);
+
+ rb_erase(&pool->node, &device->frmr_pools->rb_root);
kfree(pool);
}
@@ -116,6 +158,11 @@ int ib_frmr_pools_init(struct ib_device *device,
pools->rb_root = RB_ROOT;
rwlock_init(&pools->rb_lock);
pools->pool_ops = pool_ops;
+ pools->aging_wq = create_singlethread_workqueue("frmr_aging_wq");
+ if (!pools->aging_wq) {
+ kfree(pools);
+ return -ENOMEM;
+ }
device->frmr_pools = pools;
return 0;
@@ -146,6 +193,7 @@ void ib_frmr_pools_cleanup(struct ib_device *device)
node = next;
}
+ destroy_workqueue(pools->aging_wq);
kfree(pools);
device->frmr_pools = NULL;
}
@@ -233,7 +281,10 @@ static struct ib_frmr_pool *create_frmr_pool(struct ib_device *device,
memcpy(&pool->key, key, sizeof(*key));
INIT_LIST_HEAD(&pool->queue.pages_list);
+ INIT_LIST_HEAD(&pool->inactive_queue.pages_list);
spin_lock_init(&pool->lock);
+ INIT_DELAYED_WORK(&pool->aging_work, pool_aging_work);
+ pool->device = device;
write_lock(&pools->rb_lock);
existing = rb_find_add(&pool->node, &pools->rb_root, frmr_pool_cmp_add);
@@ -260,11 +311,17 @@ static int get_frmr_from_pool(struct ib_device *device,
spin_lock(&pool->lock);
if (pool->queue.ci == 0) {
- spin_unlock(&pool->lock);
- err = pools->pool_ops->create_frmrs(device, &pool->key, &handle,
- 1);
- if (err)
- return err;
+ if (pool->inactive_queue.ci > 0) {
+ handle = pop_handle_from_queue_locked(
+ &pool->inactive_queue);
+ spin_unlock(&pool->lock);
+ } else {
+ spin_unlock(&pool->lock);
+ err = pools->pool_ops->create_frmrs(device, &pool->key,
+ &handle, 1);
+ if (err)
+ return err;
+ }
} else {
handle = pop_handle_from_queue_locked(&pool->queue);
spin_unlock(&pool->lock);
@@ -312,12 +369,21 @@ EXPORT_SYMBOL(ib_frmr_pool_pop);
int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr)
{
struct ib_frmr_pool *pool = mr->frmr.pool;
+ struct ib_frmr_pools *pools = device->frmr_pools;
+ bool schedule_aging = false;
int ret;
spin_lock(&pool->lock);
+ /* Schedule aging every time an empty pool becomes non-empty */
+ if (pool->queue.ci == 0)
+ schedule_aging = true;
ret = push_handle_to_queue_locked(&pool->queue, mr->frmr.handle);
spin_unlock(&pool->lock);
+ if (ret == 0 && schedule_aging)
+ queue_delayed_work(pools->aging_wq, &pool->aging_work,
+ secs_to_jiffies(FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS));
+
return ret;
}
EXPORT_SYMBOL(ib_frmr_pool_push);
diff --git a/drivers/infiniband/core/frmr_pools.h b/drivers/infiniband/core/frmr_pools.h
index 5a4d03b3d86f..a20323e03e3f 100644
--- a/drivers/infiniband/core/frmr_pools.h
+++ b/drivers/infiniband/core/frmr_pools.h
@@ -11,6 +11,7 @@
#include <linux/spinlock_types.h>
#include <linux/types.h>
#include <asm/page.h>
+#include <linux/workqueue.h>
#define NUM_HANDLES_PER_PAGE \
((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32))
@@ -37,12 +38,18 @@ struct ib_frmr_pool {
/* Protect access to the queue */
spinlock_t lock;
struct frmr_queue queue;
+ struct frmr_queue inactive_queue;
+
+ struct delayed_work aging_work;
+ struct ib_device *device;
};
struct ib_frmr_pools {
struct rb_root rb_root;
rwlock_t rb_lock;
const struct ib_frmr_pool_ops *pool_ops;
+
+ struct workqueue_struct *aging_wq;
};
#endif /* RDMA_CORE_FRMR_POOLS_H */
--
2.47.1
next prev parent reply other threads:[~2026-02-02 16:01 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-02 15:59 [PATCH rdma-next v3 00/11] RDMA/core: Introduce FRMR pools infrastructure Edward Srouji
2026-02-02 15:59 ` [PATCH rdma-next v3 01/11] RDMA/mlx5: Move device async_ctx initialization Edward Srouji
2026-02-02 15:59 ` [PATCH rdma-next v3 02/11] IB/core: Introduce FRMR pools Edward Srouji
2026-02-02 15:59 ` Edward Srouji [this message]
2026-02-02 15:59 ` [PATCH rdma-next v3 04/11] RDMA/core: Add FRMR pools statistics Edward Srouji
2026-02-02 15:59 ` [PATCH rdma-next v3 05/11] RDMA/core: Add pinned handles to FRMR pools Edward Srouji
2026-02-02 15:59 ` [PATCH rdma-next v3 06/11] RDMA/mlx5: Switch from MR cache " Edward Srouji
2026-02-02 15:59 ` [PATCH rdma-next v3 07/11] net/mlx5: Drop MR cache related code Edward Srouji
2026-02-02 16:00 ` [PATCH rdma-next v3 08/11] RDMA/nldev: Add command to get FRMR pools Edward Srouji
2026-02-02 16:00 ` [PATCH rdma-next v3 09/11] RDMA/core: Add netlink command to modify FRMR aging Edward Srouji
2026-02-02 16:00 ` [PATCH rdma-next v3 10/11] RDMA/nldev: Add command to set pinned FRMR handles Edward Srouji
2026-02-02 16:00 ` [PATCH rdma-next v3 11/11] RDMA/nldev: Expose kernel-internal FRMR pools in netlink Edward Srouji
2026-02-25 11:47 ` [PATCH rdma-next v3 00/11] RDMA/core: Introduce FRMR pools infrastructure Leon Romanovsky
2026-02-26 13:32 ` Edward Srouji
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260202-frmr_pools-v3-3-b8405ed9deba@nvidia.com \
--to=edwards@nvidia.com \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=jgg@ziepe.ca \
--cc=kuba@kernel.org \
--cc=leon@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=mbloch@nvidia.com \
--cc=michaelgur@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=saeedm@nvidia.com \
--cc=tariqt@nvidia.com \
--cc=yishaih@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox