From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: asml.silence@gmail.com, netdev@vger.kernel.org
Subject: [RFC 3/6] io_uring/zcrx: store area pointers in an array
Date: Tue, 12 May 2026 11:25:03 +0100 [thread overview]
Message-ID: <b741a6a8df4af49128285859b9fc8999b973a810.1778581283.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1778581283.git.asml.silence@gmail.com>
Currently, we have only a one area per zcrx instance, and struct
io_zcrx_ifq stores a single pointer. To prepare for adding more areas,
replace it with an array of areas.
We'll be creating them at runtime, and io_zcrx_append_area() will take
care of synchronisation. The array is protected by 3 locks: ->pp_lock,
->alloc_lock and ->rq.lock. It takes all of them when switching arrays,
and readers should hold either of them.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
io_uring/zcrx.c | 112 ++++++++++++++++++++++++++++++++++--------------
io_uring/zcrx.h | 5 ++-
2 files changed, 85 insertions(+), 32 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 563bef1e724b..0ec491587a36 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -279,12 +279,12 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
return io_import_umem(ifq, mem, area_reg);
}
-static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
- struct io_zcrx_area *area)
+static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
{
int i;
- guard(mutex)(&ifq->pp_lock);
+ lockdep_assert_held(&ifq->pp_lock);
+
if (!area->is_mapped)
return;
area->is_mapped = false;
@@ -302,6 +302,17 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
}
}
+static void io_zcrx_unmap_areas(struct io_zcrx_ifq *ifq)
+{
+ int area_idx;
+
+ /* ->pp_lock protect ->nr_areas and ->areas reads */
+ lockdep_assert_held(&ifq->pp_lock);
+
+ for (area_idx = 0; area_idx < ifq->nr_areas; area_idx++)
+ io_zcrx_unmap_area(ifq, ifq->areas[area_idx]);
+}
+
static void zcrx_sync_for_device(struct page_pool *pp, struct io_zcrx_ifq *zcrx,
netmem_ref *netmems, unsigned nr)
{
@@ -410,7 +421,8 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
static void io_zcrx_free_area(struct io_zcrx_ifq *ifq,
struct io_zcrx_area *area)
{
- io_zcrx_unmap_area(ifq, area);
+ scoped_guard(mutex, &ifq->pp_lock)
+ io_zcrx_unmap_area(ifq, area);
io_release_area_mem(&area->mem);
if (area->mem.account_pages)
@@ -427,13 +439,30 @@ static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
struct io_zcrx_area *area)
{
bool kern_readable = !area->mem.is_dmabuf;
+ struct io_zcrx_area **areas, **old_areas;
+ unsigned old_nr;
- if (WARN_ON_ONCE(ifq->area))
- return -EINVAL;
if (WARN_ON_ONCE(ifq->kern_readable != kern_readable))
return -EINVAL;
- ifq->area = area;
+ guard(mutex)(&ifq->pp_lock);
+ old_areas = ifq->areas;
+ old_nr = ifq->nr_areas;
+
+ areas = kmalloc_array(old_nr + 1, sizeof(areas[0]),
+ GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!areas)
+ return -ENOMEM;
+ if (old_areas)
+ memcpy(areas, old_areas, old_nr * sizeof(areas[0]));
+ areas[old_nr] = area;
+
+ scoped_guard(spinlock_bh, &ifq->rq.lock) {
+ guard(spinlock_bh)(&ifq->alloc_lock);
+ ifq->areas = areas;
+ ifq->nr_areas = old_nr + 1;
+ }
+ kfree(old_areas);
return 0;
}
@@ -540,8 +569,6 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
static void io_zcrx_drop_netdev(struct io_zcrx_ifq *ifq)
{
- guard(mutex)(&ifq->pp_lock);
-
if (!ifq->netdev)
return;
netdev_put(ifq->netdev, &ifq->netdev_tracker);
@@ -576,13 +603,15 @@ static void io_close_queue(struct io_zcrx_ifq *ifq)
static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
{
+ int i;
+
if (WARN_ON_ONCE(ifq->if_rxq != -1))
return;
if (WARN_ON_ONCE(ifq->netdev != NULL))
return;
- if (ifq->area)
- io_zcrx_free_area(ifq, ifq->area);
+ for (i = 0; i < ifq->nr_areas; i++)
+ io_zcrx_free_area(ifq, ifq->areas[i]);
if (ifq->mm_account)
mmdrop(ifq->mm_account);
if (ifq->dev)
@@ -591,6 +620,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
io_free_rbuf_ring(ifq);
free_uid(ifq->user);
mutex_destroy(&ifq->pp_lock);
+ kfree(ifq->areas);
kfree(ifq);
}
@@ -636,14 +666,10 @@ static void io_zcrx_return_niov(struct net_iov *niov)
page_pool_put_unrefed_netmem(niov->desc.pp, netmem, -1, false);
}
-static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
+static void io_zcrx_scrub_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
{
- struct io_zcrx_area *area = ifq->area;
int i;
- if (!area)
- return;
-
/* Reclaim back all buffers given to the user space. */
for (i = 0; i < area->nia.num_niovs; i++) {
struct net_iov *niov = &area->nia.niovs[i];
@@ -657,6 +683,15 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
}
}
+static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
+{
+ int i;
+
+ guard(mutex)(&ifq->pp_lock);
+ for (i = 0; i < ifq->nr_areas; i++)
+ io_zcrx_scrub_area(ifq, ifq->areas[i]);
+}
+
static void zcrx_unregister_user(struct io_zcrx_ifq *ifq)
{
if (refcount_dec_and_test(&ifq->user_refs)) {
@@ -1019,12 +1054,15 @@ static inline bool io_parse_rqe(struct io_uring_zcrx_rqe *rqe,
unsigned niov_idx, area_idx;
struct io_zcrx_area *area;
+ lockdep_assert_held(&ifq->rq.lock);
+
area_idx = off >> IORING_ZCRX_AREA_SHIFT;
niov_idx = (off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift;
- if (unlikely(rqe->__pad || area_idx))
+ if (unlikely(rqe->__pad || area_idx >= ifq->nr_areas))
return false;
- area = ifq->area;
+ area_idx = array_index_nospec(area_idx, ifq->nr_areas);
+ area = ifq->areas[area_idx];
if (unlikely(niov_idx >= area->nia.num_niovs))
return false;
@@ -1080,18 +1118,24 @@ static unsigned io_zcrx_ring_refill(struct page_pool *pp,
static unsigned io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq,
netmem_ref *netmems, unsigned to_alloc)
{
- struct io_zcrx_area *area = ifq->area;
- unsigned allocated = 0;
+ unsigned area_idx = 0;
+ unsigned allocated;
guard(spinlock_bh)(&ifq->alloc_lock);
- for (allocated = 0; allocated < to_alloc; allocated++) {
- struct net_iov *niov = zcrx_get_free_niov(area);
+ while (allocated < to_alloc) {
+ struct net_iov *niov = zcrx_get_free_niov(ifq->areas[area_idx]);
+
+ if (!niov) {
+ area_idx++;
+ if (area_idx >= ifq->nr_areas)
+ break;
+ continue;
+ }
- if (!niov)
- break;
net_mp_niov_set_page_pool(pp, niov);
netmems[allocated] = net_iov_to_netmem(niov);
+ allocated++;
}
return allocated;
}
@@ -1178,9 +1222,9 @@ static void io_pp_uninstall(void *mp_priv, struct netdev_rx_queue *rxq)
struct pp_memory_provider_params *p = &rxq->mp_params;
struct io_zcrx_ifq *ifq = mp_priv;
+ guard(mutex)(&ifq->pp_lock);
io_zcrx_drop_netdev(ifq);
- if (ifq->area)
- io_zcrx_unmap_area(ifq, ifq->area);
+ io_zcrx_unmap_areas(ifq);
p->mp_ops = NULL;
p->mp_priv = NULL;
@@ -1319,16 +1363,22 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
static struct net_iov *io_alloc_fallback_niov(struct io_zcrx_ifq *ifq)
{
struct net_iov *niov = NULL;
+ unsigned area_idx;
if (!ifq->kern_readable)
return NULL;
- scoped_guard(spinlock_bh, &ifq->alloc_lock)
- niov = zcrx_get_free_niov(ifq->area);
+ guard(spinlock_bh)(&ifq->alloc_lock);
+
+ for (area_idx = 0; area_idx < ifq->nr_areas; area_idx++) {
+ niov = zcrx_get_free_niov(ifq->areas[area_idx]);
+ if (niov) {
+ page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
+ return niov;
+ }
+ }
- if (niov)
- page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
- return niov;
+ return NULL;
}
struct io_copy_cache {
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 687ca7c9f45b..85a15f4c04e3 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -49,7 +49,10 @@ struct zcrx_rq {
};
struct io_zcrx_ifq {
- struct io_zcrx_area *area;
+ /* read-protected by any of: ->pp_lock, ->alloc_lock, ->rq.lock */
+ struct io_zcrx_area **areas;
+ unsigned nr_areas;
+
unsigned niov_shift;
struct user_struct *user;
struct mm_struct *mm_account;
--
2.53.0
next prev parent reply other threads:[~2026-05-12 10:25 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
2026-05-12 10:25 ` [RFC 1/6] io_uring/zcrx: remove extra ifq close Pavel Begunkov
2026-05-12 10:25 ` [RFC 2/6] io_uring/zcrx: move freelist lock to struct zcrx Pavel Begunkov
2026-05-12 10:25 ` Pavel Begunkov [this message]
2026-05-12 10:25 ` [RFC 4/6] io_uring/zcrx: don't pass ifq_reg for for area creation Pavel Begunkov
2026-05-12 10:25 ` [RFC 5/6] io_uring/zcrx: split append from " Pavel Begunkov
2026-05-12 10:25 ` [RFC 6/6] io_uring/zcrx: add dynamic " Pavel Begunkov
2026-05-12 10:28 ` [RFC 0/6] dynamic area addition Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b741a6a8df4af49128285859b9fc8999b973a810.1778581283.git.asml.silence@gmail.com \
--to=asml.silence@gmail.com \
--cc=io-uring@vger.kernel.org \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox