io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [zcrx-next 0/8] niov sizing and area mapping improvement
@ 2025-08-17 22:44 Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
                   ` (8 more replies)
  0 siblings, 9 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

This includes a bunch of cleanups deduplicating area type handling,
and Patch 7 introduces handling for non-PAGE_SIZE niovs.

For a full branch with all relevant dependencies see
https://github.com/isilence/linux.git zcrx/for-next

Pavel Begunkov (8):
  io_uring/zcrx: don't pass slot to io_zcrx_create_area
  io_uring/zcrx: move area reg checks into io_import_area
  io_uring/zcrx: check all niovs filled with dma addresses
  io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback()
  io_uring/zcrx: deduplicate area mapping
  io_uring/zcrx: remove dmabuf_offset
  io_uring/zcrx: make niov size variable
  io_uring/zcrx: set sgt for umem area

 io_uring/zcrx.c | 123 +++++++++++++++++++++++++-----------------------
 io_uring/zcrx.h |   4 +-
 2 files changed, 65 insertions(+), 62 deletions(-)

-- 
2.49.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 2/8] io_uring/zcrx: move area reg checks into io_import_area Pavel Begunkov
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Don't pass a pointer to a pointer where an area should be stored to
io_zcrx_create_area(), and let it handle finding the right place for a
new area. It's more straightforward and will be needed to support
multiple areas.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 859bb5f54892..1c69c8c8e509 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -378,8 +378,16 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
 
 #define IO_ZCRX_AREA_SUPPORTED_FLAGS	(IORING_ZCRX_AREA_DMABUF)
 
+static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
+				struct io_zcrx_area *area)
+{
+	if (ifq->area)
+		return -EINVAL;
+	ifq->area = area;
+	return 0;
+}
+
 static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
-			       struct io_zcrx_area **res,
 			       struct io_uring_zcrx_area_reg *area_reg)
 {
 	struct io_zcrx_area *area;
@@ -436,8 +444,10 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	area->area_id = 0;
 	area_reg->rq_area_token = (u64)area->area_id << IORING_ZCRX_AREA_SHIFT;
 	spin_lock_init(&area->freelist_lock);
-	*res = area;
-	return 0;
+
+	ret = io_zcrx_append_area(ifq, area);
+	if (!ret)
+		return 0;
 err:
 	if (area)
 		io_zcrx_free_area(area);
@@ -589,7 +599,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	}
 	get_device(ifq->dev);
 
-	ret = io_zcrx_create_area(ifq, &ifq->area, &area);
+	ret = io_zcrx_create_area(ifq, &area);
 	if (ret)
 		goto err;
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 2/8] io_uring/zcrx: move area reg checks into io_import_area
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 3/8] io_uring/zcrx: check all niovs filled with dma addresses Pavel Begunkov
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

io_import_area() is responsible for importing memory and parsing
io_uring_zcrx_area_reg, so move all area reg structure checks into the
function.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 1c69c8c8e509..ea62e13b9500 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -26,6 +26,8 @@
 #include "zcrx.h"
 #include "rsrc.h"
 
+#define IO_ZCRX_AREA_SUPPORTED_FLAGS	(IORING_ZCRX_AREA_DMABUF)
+
 #define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
 
 static inline struct io_zcrx_ifq *io_pp_to_ifq(struct page_pool *pp)
@@ -231,6 +233,13 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
 {
 	int ret;
 
+	if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
+		return -EINVAL;
+	if (area_reg->rq_area_token)
+		return -EINVAL;
+	if (area_reg->__resv2[0] || area_reg->__resv2[1])
+		return -EINVAL;
+
 	ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
 	if (ret)
 		return ret;
@@ -376,8 +385,6 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
 	kfree(area);
 }
 
-#define IO_ZCRX_AREA_SUPPORTED_FLAGS	(IORING_ZCRX_AREA_DMABUF)
-
 static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area)
 {
@@ -394,13 +401,6 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	unsigned nr_iovs;
 	int i, ret;
 
-	if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
-		return -EINVAL;
-	if (area_reg->rq_area_token)
-		return -EINVAL;
-	if (area_reg->__resv2[0] || area_reg->__resv2[1])
-		return -EINVAL;
-
 	ret = -ENOMEM;
 	area = kzalloc(sizeof(*area), GFP_KERNEL);
 	if (!area)
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 3/8] io_uring/zcrx: check all niovs filled with dma addresses
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 2/8] io_uring/zcrx: move area reg checks into io_import_area Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() Pavel Begunkov
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Add a warning if io_populate_area_dma() can't fill in all net_iovs, it
should never happen.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index ea62e13b9500..be6d59401dc7 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -77,6 +77,9 @@ static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 			niov_idx++;
 		}
 	}
+
+	if (WARN_ON_ONCE(niov_idx != area->nia.num_niovs))
+		return -EFAULT;
 	return 0;
 }
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback()
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (2 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 3/8] io_uring/zcrx: check all niovs filled with dma addresses Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 5/8] io_uring/zcrx: deduplicate area mapping Pavel Begunkov
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

io_zcrx_copy_chunk() doesn't and shouldn't care from which area the
buffer is allocated, don't try to resolve the area in it but pass the
ifq to io_zcrx_alloc_fallback() and let it handle it. Also rename it for
more clarity.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index be6d59401dc7..cb8113e83311 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -945,10 +945,14 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 	return true;
 }
 
-static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
+static struct net_iov *io_alloc_fallback_niov(struct io_zcrx_ifq *ifq)
 {
+	struct io_zcrx_area *area = ifq->area;
 	struct net_iov *niov = NULL;
 
+	if (area->mem.is_dmabuf)
+		return NULL;
+
 	spin_lock_bh(&area->freelist_lock);
 	if (area->free_count)
 		niov = __io_zcrx_get_free_niov(area);
@@ -1008,19 +1012,15 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
 				  struct page *src_page, unsigned int src_offset,
 				  size_t len)
 {
-	struct io_zcrx_area *area = ifq->area;
 	size_t copied = 0;
 	int ret = 0;
 
-	if (area->mem.is_dmabuf)
-		return -EFAULT;
-
 	while (len) {
 		struct io_copy_cache cc;
 		struct net_iov *niov;
 		size_t n;
 
-		niov = io_zcrx_alloc_fallback(area);
+		niov = io_alloc_fallback_niov(ifq);
 		if (!niov) {
 			ret = -ENOMEM;
 			break;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 5/8] io_uring/zcrx: deduplicate area mapping
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (3 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 6/8] io_uring/zcrx: remove dmabuf_offset Pavel Begunkov
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

With a common type for storing dma addresses and io_populate_area_dma(),
type-specific area mapping helpers are trivial, so open code them and
deduplicate the call to io_populate_area_dma().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 37 ++++++++++++++-----------------------
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index cb8113e83311..2bd6e0bcc645 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -157,14 +157,6 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
 	return ret;
 }
 
-static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
-{
-	if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
-		return -EINVAL;
-	return io_populate_area_dma(ifq, area, area->mem.sgt,
-				    area->mem.dmabuf_offset);
-}
-
 static unsigned long io_count_account_pages(struct page **pages, unsigned nr_pages)
 {
 	struct folio *last_folio = NULL;
@@ -275,30 +267,29 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
 	}
 }
 
-static unsigned io_zcrx_map_area_umem(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
-{
-	int ret;
-
-	ret = dma_map_sgtable(ifq->dev, &area->mem.page_sg_table,
-				DMA_FROM_DEVICE, IO_DMA_ATTR);
-	if (ret < 0)
-		return ret;
-	return io_populate_area_dma(ifq, area, &area->mem.page_sg_table, 0);
-}
-
 static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 {
+	unsigned long offset;
+	struct sg_table *sgt;
 	int ret;
 
 	guard(mutex)(&ifq->pp_lock);
 	if (area->is_mapped)
 		return 0;
 
-	if (area->mem.is_dmabuf)
-		ret = io_zcrx_map_area_dmabuf(ifq, area);
-	else
-		ret = io_zcrx_map_area_umem(ifq, area);
+	if (!area->mem.is_dmabuf) {
+		ret = dma_map_sgtable(ifq->dev, &area->mem.page_sg_table,
+				      DMA_FROM_DEVICE, IO_DMA_ATTR);
+		if (ret < 0)
+			return ret;
+		sgt = &area->mem.page_sg_table;
+		offset = 0;
+	} else {
+		sgt = area->mem.sgt;
+		offset = area->mem.dmabuf_offset;
+	}
 
+	ret = io_populate_area_dma(ifq, area, sgt, offset);
 	if (ret == 0)
 		area->is_mapped = true;
 	return ret;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 6/8] io_uring/zcrx: remove dmabuf_offset
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (4 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 5/8] io_uring/zcrx: deduplicate area mapping Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 7/8] io_uring/zcrx: make niov size variable Pavel Begunkov
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

It was removed from uapi, so now it's always 0 and can be removed
together with offset handling in io_populate_area_dma().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 13 ++-----------
 io_uring/zcrx.h |  1 -
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 2bd6e0bcc645..87ba34c5a22f 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -53,7 +53,7 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 
 static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area,
-				struct sg_table *sgt, unsigned long off)
+				struct sg_table *sgt)
 {
 	struct scatterlist *sg;
 	unsigned i, niov_idx = 0;
@@ -61,11 +61,6 @@ static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 	for_each_sgtable_dma_sg(sgt, sg, i) {
 		dma_addr_t dma = sg_dma_address(sg);
 		unsigned long sg_len = sg_dma_len(sg);
-		unsigned long sg_off = min(sg_len, off);
-
-		off -= sg_off;
-		sg_len -= sg_off;
-		dma += sg_off;
 
 		while (sg_len && niov_idx < area->nia.num_niovs) {
 			struct net_iov *niov = &area->nia.niovs[niov_idx];
@@ -149,7 +144,6 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
 		goto err;
 	}
 
-	mem->dmabuf_offset = off;
 	mem->size = len;
 	return 0;
 err:
@@ -269,7 +263,6 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
 
 static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 {
-	unsigned long offset;
 	struct sg_table *sgt;
 	int ret;
 
@@ -283,13 +276,11 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 		if (ret < 0)
 			return ret;
 		sgt = &area->mem.page_sg_table;
-		offset = 0;
 	} else {
 		sgt = area->mem.sgt;
-		offset = area->mem.dmabuf_offset;
 	}
 
-	ret = io_populate_area_dma(ifq, area, sgt, offset);
+	ret = io_populate_area_dma(ifq, area, sgt);
 	if (ret == 0)
 		area->is_mapped = true;
 	return ret;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index f6a9ecf3e08a..4e94cfa720e6 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -20,7 +20,6 @@ struct io_zcrx_mem {
 	struct dma_buf_attachment	*attach;
 	struct dma_buf			*dmabuf;
 	struct sg_table			*sgt;
-	unsigned long			dmabuf_offset;
 };
 
 struct io_zcrx_area {
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 7/8] io_uring/zcrx: make niov size variable
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (5 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 6/8] io_uring/zcrx: remove dmabuf_offset Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 8/8] io_uring/zcrx: set sgt for umem area Pavel Begunkov
  2025-08-20 18:20 ` [zcrx-next 0/8] niov sizing and area mapping improvement Jens Axboe
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Instead of using PAGE_SIZE for the niov size add a niov_shift field to
ifq, and patch up all important places. Copy fallback still assumes
PAGE_SIZE, so it'll be wasting some memory for now.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 32 +++++++++++++++++++++-----------
 io_uring/zcrx.h |  1 +
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 87ba34c5a22f..952cd7669589 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -45,16 +45,19 @@ static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *nio
 static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 {
 	struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+	unsigned niov_pages_shift;
 
 	lockdep_assert(!area->mem.is_dmabuf);
 
-	return area->mem.pages[net_iov_idx(niov)];
+	niov_pages_shift = area->ifq->niov_shift - PAGE_SHIFT;
+	return area->mem.pages[net_iov_idx(niov) << niov_pages_shift];
 }
 
 static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area,
 				struct sg_table *sgt)
 {
+	unsigned niov_size = 1U << ifq->niov_shift;
 	struct scatterlist *sg;
 	unsigned i, niov_idx = 0;
 
@@ -62,13 +65,16 @@ static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 		dma_addr_t dma = sg_dma_address(sg);
 		unsigned long sg_len = sg_dma_len(sg);
 
+		if (WARN_ON_ONCE(sg_len % niov_size))
+			return -EINVAL;
+
 		while (sg_len && niov_idx < area->nia.num_niovs) {
 			struct net_iov *niov = &area->nia.niovs[niov_idx];
 
 			if (net_mp_niov_set_dma_addr(niov, dma))
 				return -EFAULT;
-			sg_len -= PAGE_SIZE;
-			dma += PAGE_SIZE;
+			sg_len -= niov_size;
+			dma += niov_size;
 			niov_idx++;
 		}
 	}
@@ -396,7 +402,8 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	if (ret)
 		goto err;
 
-	nr_iovs = area->mem.size >> PAGE_SHIFT;
+	ifq->niov_shift = PAGE_SHIFT;
+	nr_iovs = area->mem.size >> ifq->niov_shift;
 	area->nia.num_niovs = nr_iovs;
 
 	ret = -ENOMEM;
@@ -742,7 +749,7 @@ static void io_zcrx_ring_refill(struct page_pool *pp,
 		netmem_ref netmem;
 
 		area_idx = rqe->off >> IORING_ZCRX_AREA_SHIFT;
-		niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> PAGE_SHIFT;
+		niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift;
 
 		if (unlikely(rqe->__pad || area_idx))
 			continue;
@@ -785,20 +792,23 @@ static void io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq)
 	spin_unlock_bh(&area->freelist_lock);
 }
 
-static void io_sync_allocated_niovs(struct page_pool *pp)
+static void io_sync_allocated_niovs(struct io_zcrx_ifq *ifq,
+				    struct page_pool *pp)
 {
 #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
+	unsigned niov_size;
 	int i;
 
 	if (!dma_dev_need_sync(pp->p.dev))
 		return;
 
+	niov_size = 1U << ifq->niov_shift;
 	for (i = 0; i < pp->alloc.count; i++) {
 		netmem_ref netmem = pp->alloc.cache[i];
 		dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem);
 
 		__dma_sync_single_for_device(pp->p.dev, dma_addr + pp->p.offset,
-					     PAGE_SIZE, pp->p.dma_dir);
+					     niov_size, pp->p.dma_dir);
 	}
 #endif
 }
@@ -819,7 +829,7 @@ static netmem_ref io_pp_zc_alloc_netmems(struct page_pool *pp, gfp_t gfp)
 	if (!pp->alloc.count)
 		return 0;
 out_return:
-	io_sync_allocated_niovs(pp);
+	io_sync_allocated_niovs(ifq, pp);
 	return pp->alloc.cache[--pp->alloc.count];
 }
 
@@ -847,8 +857,8 @@ static int io_pp_zc_init(struct page_pool *pp)
 		return -EINVAL;
 	if (WARN_ON_ONCE(!pp->dma_map))
 		return -EOPNOTSUPP;
-	if (pp->p.order != 0)
-		return -EOPNOTSUPP;
+	if (pp->p.order + PAGE_SHIFT != ifq->niov_shift)
+		return -EINVAL;
 	if (pp->p.dma_dir != DMA_FROM_DEVICE)
 		return -EOPNOTSUPP;
 
@@ -920,7 +930,7 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 	cqe->flags = IORING_CQE_F_MORE;
 
 	area = io_zcrx_iov_to_area(niov);
-	offset = off + (net_iov_idx(niov) << PAGE_SHIFT);
+	offset = off + (net_iov_idx(niov) << ifq->niov_shift);
 	rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
 	rcqe->off = offset + ((u64)area->area_id << IORING_ZCRX_AREA_SHIFT);
 	rcqe->__pad = 0;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 4e94cfa720e6..41e4ceab8dd6 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -41,6 +41,7 @@ struct io_zcrx_area {
 struct io_zcrx_ifq {
 	struct io_ring_ctx		*ctx;
 	struct io_zcrx_area		*area;
+	unsigned			niov_shift;
 
 	spinlock_t			rq_lock ____cacheline_aligned_in_smp;
 	struct io_uring			*rq_ring;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 8/8] io_uring/zcrx: set sgt for umem area
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (6 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 7/8] io_uring/zcrx: make niov size variable Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-20 18:20 ` [zcrx-next 0/8] niov sizing and area mapping improvement Jens Axboe
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Set struct io_zcrx_mem::sgt for umem areas as well to simplify looking
up the current sg table.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 14 ++++++--------
 io_uring/zcrx.h |  2 +-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 952cd7669589..b3cfe0c04920 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -54,10 +54,10 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 }
 
 static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
-				struct io_zcrx_area *area,
-				struct sg_table *sgt)
+				struct io_zcrx_area *area)
 {
 	unsigned niov_size = 1U << ifq->niov_shift;
+	struct sg_table *sgt = area->mem.sgt;
 	struct scatterlist *sg;
 	unsigned i, niov_idx = 0;
 
@@ -203,6 +203,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
 	if (ret < 0)
 		mem->account_pages = 0;
 
+	mem->sgt = &mem->page_sg_table;
 	mem->pages = pages;
 	mem->nr_folios = nr_pages;
 	mem->size = area_reg->len;
@@ -217,7 +218,8 @@ static void io_release_area_mem(struct io_zcrx_mem *mem)
 	}
 	if (mem->pages) {
 		unpin_user_pages(mem->pages, mem->nr_folios);
-		sg_free_table(&mem->page_sg_table);
+		sg_free_table(mem->sgt);
+		mem->sgt = NULL;
 		kvfree(mem->pages);
 	}
 }
@@ -269,7 +271,6 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
 
 static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 {
-	struct sg_table *sgt;
 	int ret;
 
 	guard(mutex)(&ifq->pp_lock);
@@ -281,12 +282,9 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 				      DMA_FROM_DEVICE, IO_DMA_ATTR);
 		if (ret < 0)
 			return ret;
-		sgt = &area->mem.page_sg_table;
-	} else {
-		sgt = area->mem.sgt;
 	}
 
-	ret = io_populate_area_dma(ifq, area, sgt);
+	ret = io_populate_area_dma(ifq, area);
 	if (ret == 0)
 		area->is_mapped = true;
 	return ret;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 41e4ceab8dd6..a48871b5adad 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -16,10 +16,10 @@ struct io_zcrx_mem {
 	unsigned long			nr_folios;
 	struct sg_table			page_sg_table;
 	unsigned long			account_pages;
+	struct sg_table			*sgt;
 
 	struct dma_buf_attachment	*attach;
 	struct dma_buf			*dmabuf;
-	struct sg_table			*sgt;
 };
 
 struct io_zcrx_area {
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [zcrx-next 0/8] niov sizing and area mapping improvement
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (7 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 8/8] io_uring/zcrx: set sgt for umem area Pavel Begunkov
@ 2025-08-20 18:20 ` Jens Axboe
  8 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2025-08-20 18:20 UTC (permalink / raw)
  To: io-uring, Pavel Begunkov


On Sun, 17 Aug 2025 23:44:11 +0100, Pavel Begunkov wrote:
> This includes a bunch of cleanups deduplicating area type handling,
> and Patch 7 introduces handling for non-PAGE_SIZE niovs.
> 
> For a full branch with all relevant dependencies see
> https://github.com/isilence/linux.git zcrx/for-next
> 
> Pavel Begunkov (8):
>   io_uring/zcrx: don't pass slot to io_zcrx_create_area
>   io_uring/zcrx: move area reg checks into io_import_area
>   io_uring/zcrx: check all niovs filled with dma addresses
>   io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback()
>   io_uring/zcrx: deduplicate area mapping
>   io_uring/zcrx: remove dmabuf_offset
>   io_uring/zcrx: make niov size variable
>   io_uring/zcrx: set sgt for umem area
> 
> [...]

Applied, thanks!

[1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area
      commit: e205db1eb9596e6e7d9ed78882d4c47c8448c2e5
[2/8] io_uring/zcrx: move area reg checks into io_import_area
      commit: ff9d7473a29a241491fad2b9e0e2de6671556b4e
[3/8] io_uring/zcrx: check all niovs filled with dma addresses
      commit: c6c489577c004b65734f2d59a9c0da94c8bd3187
[4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback()
      commit: f37d7f9b092274a33fe40b58b05327a82702e936
[5/8] io_uring/zcrx: deduplicate area mapping
      commit: fc8b5f573a2fa1cc5e7347493687229557ebd013
[6/8] io_uring/zcrx: remove dmabuf_offset
      commit: 1228c5129dd5577e1b988f85915e63d24b99ad92
[7/8] io_uring/zcrx: make niov size variable
      commit: 16a4e2d99220fd844efcf6d34b4d954912ed8d35
[8/8] io_uring/zcrx: set sgt for umem area
      commit: 14fcac7a7cec83e4ed15538103ef6c51400c559c

Best regards,
-- 
Jens Axboe




^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-08-20 18:20 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 2/8] io_uring/zcrx: move area reg checks into io_import_area Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 3/8] io_uring/zcrx: check all niovs filled with dma addresses Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 5/8] io_uring/zcrx: deduplicate area mapping Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 6/8] io_uring/zcrx: remove dmabuf_offset Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 7/8] io_uring/zcrx: make niov size variable Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 8/8] io_uring/zcrx: set sgt for umem area Pavel Begunkov
2025-08-20 18:20 ` [zcrx-next 0/8] niov sizing and area mapping improvement Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).