All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 0/6] dynamic area addition
@ 2026-05-12 10:25 Pavel Begunkov
  2026-05-12 10:25 ` [RFC 1/6] io_uring/zcrx: remove extra ifq close Pavel Begunkov
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Pavel Begunkov @ 2026-05-12 10:25 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, netdev

Currently, the user needs to give memory for the data upfront
when registering a zcrx instance, but it's not always easy to
predict for the user how much it will need. This series adds
a way to add more memory / areas at runtime.

Pavel Begunkov (6):
  io_uring/zcrx: remove extra ifq close
  io_uring/zcrx: move freelist lock to struct zcrx
  io_uring/zcrx: store area pointers in an array
  io_uring/zcrx: don't pass ifq_reg for for area creation
  io_uring/zcrx: split append from area creation
  io_uring/zcrx: add dynamic area creation

 include/uapi/linux/io_uring/zcrx.h |   7 +
 io_uring/zcrx.c                    | 203 ++++++++++++++++++++++-------
 io_uring/zcrx.h                    |   7 +-
 3 files changed, 168 insertions(+), 49 deletions(-)

-- 
2.53.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC 1/6] io_uring/zcrx: remove extra ifq close
  2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
@ 2026-05-12 10:25 ` Pavel Begunkov
  2026-05-12 10:25 ` [RFC 2/6] io_uring/zcrx: move freelist lock to struct zcrx Pavel Begunkov
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2026-05-12 10:25 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, netdev

By the time io_zcrx_ifq_free() is called the interface queue should
already be closed, so io_close_queue() will be a no-op. Remove the call
and add a couple of warnings.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 7b93c87b8371..3478040f2197 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -576,7 +576,10 @@ static void io_close_queue(struct io_zcrx_ifq *ifq)
 
 static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 {
-	io_close_queue(ifq);
+	if (WARN_ON_ONCE(ifq->if_rxq != -1))
+		return;
+	if (WARN_ON_ONCE(ifq->netdev != NULL))
+		return;
 
 	if (ifq->area)
 		io_zcrx_free_area(ifq, ifq->area);
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC 2/6] io_uring/zcrx: move freelist lock to struct zcrx
  2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
  2026-05-12 10:25 ` [RFC 1/6] io_uring/zcrx: remove extra ifq close Pavel Begunkov
@ 2026-05-12 10:25 ` Pavel Begunkov
  2026-05-12 10:25 ` [RFC 3/6] io_uring/zcrx: store area pointers in an array Pavel Begunkov
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2026-05-12 10:25 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, netdev

freelist_lock, which protects slow path allocations, is currently stored
in struct io_zcrx_area. Once we add support for multiple queues, we'll
need a lock in the zcrx ctx, move it there.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 14 +++++++-------
 io_uring/zcrx.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 3478040f2197..563bef1e724b 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -511,7 +511,6 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	/* we're only supporting one area per ifq for now */
 	area->area_id = 0;
 	area_reg->rq_area_token = (u64)area->area_id << IORING_ZCRX_AREA_SHIFT;
-	spin_lock_init(&area->freelist_lock);
 
 	ret = io_zcrx_append_area(ifq, area);
 	if (!ret)
@@ -532,6 +531,7 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
 
 	ifq->if_rxq = -1;
 	spin_lock_init(&ifq->rq.lock);
+	spin_lock_init(&ifq->alloc_lock);
 	mutex_init(&ifq->pp_lock);
 	refcount_set(&ifq->refs, 1);
 	refcount_set(&ifq->user_refs, 1);
@@ -603,8 +603,9 @@ static void io_put_zcrx_ifq(struct io_zcrx_ifq *ifq)
 static void io_zcrx_return_niov_freelist(struct net_iov *niov)
 {
 	struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+	struct io_zcrx_ifq *ifq = area->ifq;
 
-	guard(spinlock_bh)(&area->freelist_lock);
+	guard(spinlock_bh)(&ifq->alloc_lock);
 	if (WARN_ON_ONCE(area->free_count >= area->nia.num_niovs))
 		return;
 	area->freelist[area->free_count++] = net_iov_idx(niov);
@@ -614,7 +615,7 @@ static struct net_iov *zcrx_get_free_niov(struct io_zcrx_area *area)
 {
 	unsigned niov_idx;
 
-	lockdep_assert_held(&area->freelist_lock);
+	lockdep_assert_held(&area->ifq->alloc_lock);
 
 	if (unlikely(!area->free_count))
 		return NULL;
@@ -1082,7 +1083,7 @@ static unsigned io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *if
 	struct io_zcrx_area *area = ifq->area;
 	unsigned allocated = 0;
 
-	guard(spinlock_bh)(&area->freelist_lock);
+	guard(spinlock_bh)(&ifq->alloc_lock);
 
 	for (allocated = 0; allocated < to_alloc; allocated++) {
 		struct net_iov *niov = zcrx_get_free_niov(area);
@@ -1317,14 +1318,13 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 
 static struct net_iov *io_alloc_fallback_niov(struct io_zcrx_ifq *ifq)
 {
-	struct io_zcrx_area *area = ifq->area;
 	struct net_iov *niov = NULL;
 
 	if (!ifq->kern_readable)
 		return NULL;
 
-	scoped_guard(spinlock_bh, &area->freelist_lock)
-		niov = zcrx_get_free_niov(area);
+	scoped_guard(spinlock_bh, &ifq->alloc_lock)
+		niov = zcrx_get_free_niov(ifq->area);
 
 	if (niov)
 		page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 75e0a4e6ef6e..687ca7c9f45b 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -34,7 +34,6 @@ struct io_zcrx_area {
 	u16			area_id;
 
 	/* freelist */
-	spinlock_t		freelist_lock ____cacheline_aligned_in_smp;
 	u32			free_count;
 	u32			*freelist;
 
@@ -57,6 +56,7 @@ struct io_zcrx_ifq {
 	bool				kern_readable;
 
 	struct zcrx_rq			rq ____cacheline_aligned_in_smp;
+	spinlock_t			alloc_lock ____cacheline_aligned_in_smp;
 
 	u32				if_rxq;
 	struct device			*dev;
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC 3/6] io_uring/zcrx: store area pointers in an array
  2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
  2026-05-12 10:25 ` [RFC 1/6] io_uring/zcrx: remove extra ifq close Pavel Begunkov
  2026-05-12 10:25 ` [RFC 2/6] io_uring/zcrx: move freelist lock to struct zcrx Pavel Begunkov
@ 2026-05-12 10:25 ` Pavel Begunkov
  2026-05-12 10:25 ` [RFC 4/6] io_uring/zcrx: don't pass ifq_reg for for area creation Pavel Begunkov
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2026-05-12 10:25 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, netdev

Currently, we have only a one area per zcrx instance, and struct
io_zcrx_ifq stores a single pointer. To prepare for adding more areas,
replace it with an array of areas.

We'll be creating them at runtime, and io_zcrx_append_area() will take
care of synchronisation. The array is protected by 3 locks: ->pp_lock,
->alloc_lock and ->rq.lock. It takes all of them when switching arrays,
and readers should hold either of them.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 112 ++++++++++++++++++++++++++++++++++--------------
 io_uring/zcrx.h |   5 ++-
 2 files changed, 85 insertions(+), 32 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 563bef1e724b..0ec491587a36 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -279,12 +279,12 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
 	return io_import_umem(ifq, mem, area_reg);
 }
 
-static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
-				struct io_zcrx_area *area)
+static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 {
 	int i;
 
-	guard(mutex)(&ifq->pp_lock);
+	lockdep_assert_held(&ifq->pp_lock);
+
 	if (!area->is_mapped)
 		return;
 	area->is_mapped = false;
@@ -302,6 +302,17 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
 	}
 }
 
+static void io_zcrx_unmap_areas(struct io_zcrx_ifq *ifq)
+{
+	int area_idx;
+
+	/* ->pp_lock protect ->nr_areas and ->areas reads */
+	lockdep_assert_held(&ifq->pp_lock);
+
+	for (area_idx = 0; area_idx < ifq->nr_areas; area_idx++)
+		io_zcrx_unmap_area(ifq, ifq->areas[area_idx]);
+}
+
 static void zcrx_sync_for_device(struct page_pool *pp, struct io_zcrx_ifq *zcrx,
 				 netmem_ref *netmems, unsigned nr)
 {
@@ -410,7 +421,8 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
 static void io_zcrx_free_area(struct io_zcrx_ifq *ifq,
 			      struct io_zcrx_area *area)
 {
-	io_zcrx_unmap_area(ifq, area);
+	scoped_guard(mutex, &ifq->pp_lock)
+		io_zcrx_unmap_area(ifq, area);
 	io_release_area_mem(&area->mem);
 
 	if (area->mem.account_pages)
@@ -427,13 +439,30 @@ static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area)
 {
 	bool kern_readable = !area->mem.is_dmabuf;
+	struct io_zcrx_area **areas, **old_areas;
+	unsigned old_nr;
 
-	if (WARN_ON_ONCE(ifq->area))
-		return -EINVAL;
 	if (WARN_ON_ONCE(ifq->kern_readable != kern_readable))
 		return -EINVAL;
 
-	ifq->area = area;
+	guard(mutex)(&ifq->pp_lock);
+	old_areas = ifq->areas;
+	old_nr = ifq->nr_areas;
+
+	areas = kmalloc_array(old_nr + 1, sizeof(areas[0]),
+			      GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	if (!areas)
+		return -ENOMEM;
+	if (old_areas)
+		memcpy(areas, old_areas, old_nr * sizeof(areas[0]));
+	areas[old_nr] = area;
+
+	scoped_guard(spinlock_bh, &ifq->rq.lock) {
+		guard(spinlock_bh)(&ifq->alloc_lock);
+		ifq->areas = areas;
+		ifq->nr_areas = old_nr + 1;
+	}
+	kfree(old_areas);
 	return 0;
 }
 
@@ -540,8 +569,6 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
 
 static void io_zcrx_drop_netdev(struct io_zcrx_ifq *ifq)
 {
-	guard(mutex)(&ifq->pp_lock);
-
 	if (!ifq->netdev)
 		return;
 	netdev_put(ifq->netdev, &ifq->netdev_tracker);
@@ -576,13 +603,15 @@ static void io_close_queue(struct io_zcrx_ifq *ifq)
 
 static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 {
+	int i;
+
 	if (WARN_ON_ONCE(ifq->if_rxq != -1))
 		return;
 	if (WARN_ON_ONCE(ifq->netdev != NULL))
 		return;
 
-	if (ifq->area)
-		io_zcrx_free_area(ifq, ifq->area);
+	for (i = 0; i < ifq->nr_areas; i++)
+		io_zcrx_free_area(ifq, ifq->areas[i]);
 	if (ifq->mm_account)
 		mmdrop(ifq->mm_account);
 	if (ifq->dev)
@@ -591,6 +620,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 	io_free_rbuf_ring(ifq);
 	free_uid(ifq->user);
 	mutex_destroy(&ifq->pp_lock);
+	kfree(ifq->areas);
 	kfree(ifq);
 }
 
@@ -636,14 +666,10 @@ static void io_zcrx_return_niov(struct net_iov *niov)
 	page_pool_put_unrefed_netmem(niov->desc.pp, netmem, -1, false);
 }
 
-static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
+static void io_zcrx_scrub_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 {
-	struct io_zcrx_area *area = ifq->area;
 	int i;
 
-	if (!area)
-		return;
-
 	/* Reclaim back all buffers given to the user space. */
 	for (i = 0; i < area->nia.num_niovs; i++) {
 		struct net_iov *niov = &area->nia.niovs[i];
@@ -657,6 +683,15 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
 	}
 }
 
+static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
+{
+	int i;
+
+	guard(mutex)(&ifq->pp_lock);
+	for (i = 0; i < ifq->nr_areas; i++)
+		io_zcrx_scrub_area(ifq, ifq->areas[i]);
+}
+
 static void zcrx_unregister_user(struct io_zcrx_ifq *ifq)
 {
 	if (refcount_dec_and_test(&ifq->user_refs)) {
@@ -1019,12 +1054,15 @@ static inline bool io_parse_rqe(struct io_uring_zcrx_rqe *rqe,
 	unsigned niov_idx, area_idx;
 	struct io_zcrx_area *area;
 
+	lockdep_assert_held(&ifq->rq.lock);
+
 	area_idx = off >> IORING_ZCRX_AREA_SHIFT;
 	niov_idx = (off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift;
 
-	if (unlikely(rqe->__pad || area_idx))
+	if (unlikely(rqe->__pad || area_idx >= ifq->nr_areas))
 		return false;
-	area = ifq->area;
+	area_idx = array_index_nospec(area_idx, ifq->nr_areas);
+	area = ifq->areas[area_idx];
 
 	if (unlikely(niov_idx >= area->nia.num_niovs))
 		return false;
@@ -1080,18 +1118,24 @@ static unsigned io_zcrx_ring_refill(struct page_pool *pp,
 static unsigned io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq,
 				    netmem_ref *netmems, unsigned to_alloc)
 {
-	struct io_zcrx_area *area = ifq->area;
-	unsigned allocated = 0;
+	unsigned area_idx = 0;
+	unsigned allocated;
 
 	guard(spinlock_bh)(&ifq->alloc_lock);
 
-	for (allocated = 0; allocated < to_alloc; allocated++) {
-		struct net_iov *niov = zcrx_get_free_niov(area);
+	while (allocated < to_alloc) {
+		struct net_iov *niov = zcrx_get_free_niov(ifq->areas[area_idx]);
+
+		if (!niov) {
+			area_idx++;
+			if (area_idx >= ifq->nr_areas)
+				break;
+			continue;
+		}
 
-		if (!niov)
-			break;
 		net_mp_niov_set_page_pool(pp, niov);
 		netmems[allocated] = net_iov_to_netmem(niov);
+		allocated++;
 	}
 	return allocated;
 }
@@ -1178,9 +1222,9 @@ static void io_pp_uninstall(void *mp_priv, struct netdev_rx_queue *rxq)
 	struct pp_memory_provider_params *p = &rxq->mp_params;
 	struct io_zcrx_ifq *ifq = mp_priv;
 
+	guard(mutex)(&ifq->pp_lock);
 	io_zcrx_drop_netdev(ifq);
-	if (ifq->area)
-		io_zcrx_unmap_area(ifq, ifq->area);
+	io_zcrx_unmap_areas(ifq);
 
 	p->mp_ops = NULL;
 	p->mp_priv = NULL;
@@ -1319,16 +1363,22 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 static struct net_iov *io_alloc_fallback_niov(struct io_zcrx_ifq *ifq)
 {
 	struct net_iov *niov = NULL;
+	unsigned area_idx;
 
 	if (!ifq->kern_readable)
 		return NULL;
 
-	scoped_guard(spinlock_bh, &ifq->alloc_lock)
-		niov = zcrx_get_free_niov(ifq->area);
+	guard(spinlock_bh)(&ifq->alloc_lock);
+
+	for (area_idx = 0; area_idx < ifq->nr_areas; area_idx++) {
+		niov = zcrx_get_free_niov(ifq->areas[area_idx]);
+		if (niov) {
+			page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
+			return niov;
+		}
+	}
 
-	if (niov)
-		page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
-	return niov;
+	return NULL;
 }
 
 struct io_copy_cache {
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 687ca7c9f45b..85a15f4c04e3 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -49,7 +49,10 @@ struct zcrx_rq {
 };
 
 struct io_zcrx_ifq {
-	struct io_zcrx_area		*area;
+	/* read-protected by any of: ->pp_lock, ->alloc_lock, ->rq.lock */
+	struct io_zcrx_area		**areas;
+	unsigned			nr_areas;
+
 	unsigned			niov_shift;
 	struct user_struct		*user;
 	struct mm_struct		*mm_account;
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC 4/6] io_uring/zcrx: don't pass ifq_reg for for area creation
  2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
                   ` (2 preceding siblings ...)
  2026-05-12 10:25 ` [RFC 3/6] io_uring/zcrx: store area pointers in an array Pavel Begunkov
@ 2026-05-12 10:25 ` Pavel Begunkov
  2026-05-12 10:25 ` [RFC 5/6] io_uring/zcrx: split append from " Pavel Begunkov
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2026-05-12 10:25 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, netdev

We might want to create an area without having an instance of struct
io_uring_zcrx_ifq_reg. Extract a helper that doesn't have the ifq
registration structure as an argument but takes the buf length
explicitly.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 0ec491587a36..0551b05d53ee 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -466,21 +466,22 @@ static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
 	return 0;
 }
 
-static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
+static int __zcrx_create_area(struct io_zcrx_ifq *ifq,
 			       struct io_uring_zcrx_area_reg *area_reg,
-			       struct io_uring_zcrx_ifq_reg *reg)
+			       u32 rx_buf_len)
 {
 	int buf_size_shift = PAGE_SHIFT;
 	struct io_zcrx_area *area;
 	unsigned nr_iovs;
 	int i, ret;
 
-	if (reg->rx_buf_len) {
-		if (!is_power_of_2(reg->rx_buf_len) ||
-		     reg->rx_buf_len < PAGE_SIZE)
+	if (rx_buf_len) {
+		if (!is_power_of_2(rx_buf_len) || rx_buf_len < PAGE_SIZE)
 			return -EINVAL;
-		buf_size_shift = ilog2(reg->rx_buf_len);
+		buf_size_shift = ilog2(rx_buf_len);
 	}
+	if (WARN_ON_ONCE(ifq->niov_shift))
+		return -EINVAL;
 	if (!ifq->dev && buf_size_shift != PAGE_SHIFT)
 		return -EOPNOTSUPP;
 
@@ -550,6 +551,13 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	return ret;
 }
 
+static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
+			       struct io_uring_zcrx_area_reg *area_reg,
+			       struct io_uring_zcrx_ifq_reg *reg)
+{
+	return __zcrx_create_area(ifq, area_reg, reg->rx_buf_len);
+}
+
 static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
 {
 	struct io_zcrx_ifq *ifq;
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC 5/6] io_uring/zcrx: split append from area creation
  2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
                   ` (3 preceding siblings ...)
  2026-05-12 10:25 ` [RFC 4/6] io_uring/zcrx: don't pass ifq_reg for for area creation Pavel Begunkov
@ 2026-05-12 10:25 ` Pavel Begunkov
  2026-05-12 10:25 ` [RFC 6/6] io_uring/zcrx: add dynamic " Pavel Begunkov
  2026-05-12 10:28 ` [RFC 0/6] dynamic area addition Pavel Begunkov
  6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2026-05-12 10:25 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, netdev

A preparation patch, move appending an area from __zcrx_create_area()
to the caller.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 0551b05d53ee..5fb81bb6f819 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -468,6 +468,7 @@ static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
 
 static int __zcrx_create_area(struct io_zcrx_ifq *ifq,
 			       struct io_uring_zcrx_area_reg *area_reg,
+			       struct io_zcrx_area **res_area,
 			       u32 rx_buf_len)
 {
 	int buf_size_shift = PAGE_SHIFT;
@@ -541,10 +542,8 @@ static int __zcrx_create_area(struct io_zcrx_ifq *ifq,
 	/* we're only supporting one area per ifq for now */
 	area->area_id = 0;
 	area_reg->rq_area_token = (u64)area->area_id << IORING_ZCRX_AREA_SHIFT;
-
-	ret = io_zcrx_append_area(ifq, area);
-	if (!ret)
-		return 0;
+	*res_area = area;
+	return 0;
 err:
 	if (area)
 		io_zcrx_free_area(ifq, area);
@@ -555,7 +554,19 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 			       struct io_uring_zcrx_area_reg *area_reg,
 			       struct io_uring_zcrx_ifq_reg *reg)
 {
-	return __zcrx_create_area(ifq, area_reg, reg->rx_buf_len);
+	struct io_zcrx_area *area;
+	int ret;
+
+	ret = __zcrx_create_area(ifq, area_reg, &area, reg->rx_buf_len);
+	if (ret)
+		return ret;
+
+	ret = io_zcrx_append_area(ifq, area);
+	if (ret) {
+		io_zcrx_free_area(ifq, area);
+		return ret;
+	}
+	return 0;
 }
 
 static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC 6/6] io_uring/zcrx: add dynamic area creation
  2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
                   ` (4 preceding siblings ...)
  2026-05-12 10:25 ` [RFC 5/6] io_uring/zcrx: split append from " Pavel Begunkov
@ 2026-05-12 10:25 ` Pavel Begunkov
  2026-05-12 10:28 ` [RFC 0/6] dynamic area addition Pavel Begunkov
  6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2026-05-12 10:25 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, netdev

It's not always possible for the user to predict during registration how
much memory zcrx will need to sustain the traffic. Allow to dynamically
add more areas with a new ctrl code ZCRX_CTRL_ADD_AREA.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/uapi/linux/io_uring/zcrx.h |  7 ++++++
 io_uring/zcrx.c                    | 39 +++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h
index 5ce02c7a6096..de696eb10db4 100644
--- a/include/uapi/linux/io_uring/zcrx.h
+++ b/include/uapi/linux/io_uring/zcrx.h
@@ -88,6 +88,7 @@ struct io_uring_zcrx_ifq_reg {
 enum zcrx_ctrl_op {
 	ZCRX_CTRL_FLUSH_RQ,
 	ZCRX_CTRL_EXPORT,
+	ZCRX_CTRL_ADD_AREA,
 
 	__ZCRX_CTRL_LAST,
 };
@@ -101,6 +102,11 @@ struct zcrx_ctrl_export {
 	__u32 		__resv1[11];
 };
 
+struct zcrx_ctrl_add_area {
+	__u64		area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
+	__u64		__resv[5];
+};
+
 struct zcrx_ctrl {
 	__u32	zcrx_id;
 	__u32	op; /* see enum zcrx_ctrl_op */
@@ -109,6 +115,7 @@ struct zcrx_ctrl {
 	union {
 		struct zcrx_ctrl_export		zc_export;
 		struct zcrx_ctrl_flush_rq	zc_flush;
+		struct zcrx_ctrl_add_area	zc_area;
 	};
 };
 
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 5fb81bb6f819..4bcf68b8d682 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -481,7 +481,7 @@ static int __zcrx_create_area(struct io_zcrx_ifq *ifq,
 			return -EINVAL;
 		buf_size_shift = ilog2(rx_buf_len);
 	}
-	if (WARN_ON_ONCE(ifq->niov_shift))
+	if (ifq->niov_shift && ifq->niov_shift != buf_size_shift)
 		return -EINVAL;
 	if (!ifq->dev && buf_size_shift != PAGE_SHIFT)
 		return -EOPNOTSUPP;
@@ -967,6 +967,8 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
 			goto err;
 	}
 
+	WARN_ON_ONCE(!ifq->niov_shift);
+
 	reg.zcrx_id = id;
 
 	scoped_guard(mutex, &ctx->mmap_lock) {
@@ -1325,6 +1327,39 @@ static int zcrx_flush_rq(struct io_ring_ctx *ctx, struct io_zcrx_ifq *zcrx,
 	return 0;
 }
 
+static int zcrx_ctrl_add_area(struct io_ring_ctx *ctx, struct io_zcrx_ifq *ifq,
+			      struct zcrx_ctrl *ctrl)
+{
+	struct zcrx_ctrl_add_area *ctrl_add = &ctrl->zc_area;
+	struct io_uring_zcrx_area_reg __user *area_uptr;
+	struct io_uring_zcrx_area_reg area_reg;
+	struct io_zcrx_area *area;
+	int ret;
+
+	area_uptr = u64_to_user_ptr(ctrl_add->area_ptr);
+
+	if (!mem_is_zero(&ctrl_add->__resv, sizeof(ctrl_add->__resv)))
+		return -EINVAL;
+	if (copy_from_user(&area_reg, area_uptr, sizeof(area_reg)))
+		return -EFAULT;
+
+	ret = __zcrx_create_area(ifq, &area_reg, &area, 0);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(area_uptr, &area_reg, sizeof(area_reg))) {
+		io_zcrx_free_area(ifq, area);
+		return -EFAULT;
+	}
+
+	ret = io_zcrx_append_area(ifq, area);
+	if (ret) {
+		io_zcrx_free_area(ifq, area);
+		return ret;
+	}
+	return 0;
+}
+
 int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
 {
 	struct zcrx_ctrl ctrl;
@@ -1348,6 +1383,8 @@ int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
 		return zcrx_flush_rq(ctx, zcrx, &ctrl);
 	case ZCRX_CTRL_EXPORT:
 		return zcrx_export(ctx, zcrx, &ctrl, arg);
+	case ZCRX_CTRL_ADD_AREA:
+		return zcrx_ctrl_add_area(ctx, zcrx, &ctrl);
 	}
 
 	return -EOPNOTSUPP;
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC 0/6] dynamic area addition
  2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
                   ` (5 preceding siblings ...)
  2026-05-12 10:25 ` [RFC 6/6] io_uring/zcrx: add dynamic " Pavel Begunkov
@ 2026-05-12 10:28 ` Pavel Begunkov
  6 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2026-05-12 10:28 UTC (permalink / raw)
  To: io-uring; +Cc: netdev

On 5/12/26 11:25, Pavel Begunkov wrote:
> Currently, the user needs to give memory for the data upfront
> when registering a zcrx instance, but it's not always easy to
> predict for the user how much it will need. This series adds
> a way to add more memory / areas at runtime.

There are some tests in the branch:

https://github.com/isilence/liburing/tree/zcrx/area-create

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-05-12 10:28 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-12 10:25 [RFC 0/6] dynamic area addition Pavel Begunkov
2026-05-12 10:25 ` [RFC 1/6] io_uring/zcrx: remove extra ifq close Pavel Begunkov
2026-05-12 10:25 ` [RFC 2/6] io_uring/zcrx: move freelist lock to struct zcrx Pavel Begunkov
2026-05-12 10:25 ` [RFC 3/6] io_uring/zcrx: store area pointers in an array Pavel Begunkov
2026-05-12 10:25 ` [RFC 4/6] io_uring/zcrx: don't pass ifq_reg for for area creation Pavel Begunkov
2026-05-12 10:25 ` [RFC 5/6] io_uring/zcrx: split append from " Pavel Begunkov
2026-05-12 10:25 ` [RFC 6/6] io_uring/zcrx: add dynamic " Pavel Begunkov
2026-05-12 10:28 ` [RFC 0/6] dynamic area addition Pavel Begunkov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.