All of lore.kernel.org
 help / color / mirror / Atom feed
* [zcrx-next 0/8] niov sizing and area mapping improvement
@ 2025-08-17 22:44 Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
                   ` (8 more replies)
  0 siblings, 9 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

This includes a bunch of cleanups deduplicating area type handling,
and Patch 7 introduces handling for non-PAGE_SIZE niovs.

For a full branch with all relevant dependencies see
https://github.com/isilence/linux.git zcrx/for-next

Pavel Begunkov (8):
  io_uring/zcrx: don't pass slot to io_zcrx_create_area
  io_uring/zcrx: move area reg checks into io_import_area
  io_uring/zcrx: check all niovs filled with dma addresses
  io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback()
  io_uring/zcrx: deduplicate area mapping
  io_uring/zcrx: remove dmabuf_offset
  io_uring/zcrx: make niov size variable
  io_uring/zcrx: set sgt for umem area

 io_uring/zcrx.c | 123 +++++++++++++++++++++++++-----------------------
 io_uring/zcrx.h |   4 +-
 2 files changed, 65 insertions(+), 62 deletions(-)

-- 
2.49.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 2/8] io_uring/zcrx: move area reg checks into io_import_area Pavel Begunkov
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Don't pass a pointer to a pointer where an area should be stored to
io_zcrx_create_area(), and let it handle finding the right place for a
new area. It's more straightforward and will be needed to support
multiple areas.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 859bb5f54892..1c69c8c8e509 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -378,8 +378,16 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
 
 #define IO_ZCRX_AREA_SUPPORTED_FLAGS	(IORING_ZCRX_AREA_DMABUF)
 
+static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
+				struct io_zcrx_area *area)
+{
+	if (ifq->area)
+		return -EINVAL;
+	ifq->area = area;
+	return 0;
+}
+
 static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
-			       struct io_zcrx_area **res,
 			       struct io_uring_zcrx_area_reg *area_reg)
 {
 	struct io_zcrx_area *area;
@@ -436,8 +444,10 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	area->area_id = 0;
 	area_reg->rq_area_token = (u64)area->area_id << IORING_ZCRX_AREA_SHIFT;
 	spin_lock_init(&area->freelist_lock);
-	*res = area;
-	return 0;
+
+	ret = io_zcrx_append_area(ifq, area);
+	if (!ret)
+		return 0;
 err:
 	if (area)
 		io_zcrx_free_area(area);
@@ -589,7 +599,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	}
 	get_device(ifq->dev);
 
-	ret = io_zcrx_create_area(ifq, &ifq->area, &area);
+	ret = io_zcrx_create_area(ifq, &area);
 	if (ret)
 		goto err;
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 2/8] io_uring/zcrx: move area reg checks into io_import_area
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 3/8] io_uring/zcrx: check all niovs filled with dma addresses Pavel Begunkov
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

io_import_area() is responsible for importing memory and parsing
io_uring_zcrx_area_reg, so move all area reg structure checks into the
function.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 1c69c8c8e509..ea62e13b9500 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -26,6 +26,8 @@
 #include "zcrx.h"
 #include "rsrc.h"
 
+#define IO_ZCRX_AREA_SUPPORTED_FLAGS	(IORING_ZCRX_AREA_DMABUF)
+
 #define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
 
 static inline struct io_zcrx_ifq *io_pp_to_ifq(struct page_pool *pp)
@@ -231,6 +233,13 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
 {
 	int ret;
 
+	if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
+		return -EINVAL;
+	if (area_reg->rq_area_token)
+		return -EINVAL;
+	if (area_reg->__resv2[0] || area_reg->__resv2[1])
+		return -EINVAL;
+
 	ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
 	if (ret)
 		return ret;
@@ -376,8 +385,6 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
 	kfree(area);
 }
 
-#define IO_ZCRX_AREA_SUPPORTED_FLAGS	(IORING_ZCRX_AREA_DMABUF)
-
 static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area)
 {
@@ -394,13 +401,6 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	unsigned nr_iovs;
 	int i, ret;
 
-	if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
-		return -EINVAL;
-	if (area_reg->rq_area_token)
-		return -EINVAL;
-	if (area_reg->__resv2[0] || area_reg->__resv2[1])
-		return -EINVAL;
-
 	ret = -ENOMEM;
 	area = kzalloc(sizeof(*area), GFP_KERNEL);
 	if (!area)
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 3/8] io_uring/zcrx: check all niovs filled with dma addresses
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 2/8] io_uring/zcrx: move area reg checks into io_import_area Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() Pavel Begunkov
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Add a warning if io_populate_area_dma() can't fill in all net_iovs, it
should never happen.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index ea62e13b9500..be6d59401dc7 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -77,6 +77,9 @@ static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 			niov_idx++;
 		}
 	}
+
+	if (WARN_ON_ONCE(niov_idx != area->nia.num_niovs))
+		return -EFAULT;
 	return 0;
 }
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback()
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (2 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 3/8] io_uring/zcrx: check all niovs filled with dma addresses Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 5/8] io_uring/zcrx: deduplicate area mapping Pavel Begunkov
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

io_zcrx_copy_chunk() doesn't and shouldn't care from which area the
buffer is allocated, don't try to resolve the area in it but pass the
ifq to io_zcrx_alloc_fallback() and let it handle it. Also rename it for
more clarity.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index be6d59401dc7..cb8113e83311 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -945,10 +945,14 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 	return true;
 }
 
-static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
+static struct net_iov *io_alloc_fallback_niov(struct io_zcrx_ifq *ifq)
 {
+	struct io_zcrx_area *area = ifq->area;
 	struct net_iov *niov = NULL;
 
+	if (area->mem.is_dmabuf)
+		return NULL;
+
 	spin_lock_bh(&area->freelist_lock);
 	if (area->free_count)
 		niov = __io_zcrx_get_free_niov(area);
@@ -1008,19 +1012,15 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
 				  struct page *src_page, unsigned int src_offset,
 				  size_t len)
 {
-	struct io_zcrx_area *area = ifq->area;
 	size_t copied = 0;
 	int ret = 0;
 
-	if (area->mem.is_dmabuf)
-		return -EFAULT;
-
 	while (len) {
 		struct io_copy_cache cc;
 		struct net_iov *niov;
 		size_t n;
 
-		niov = io_zcrx_alloc_fallback(area);
+		niov = io_alloc_fallback_niov(ifq);
 		if (!niov) {
 			ret = -ENOMEM;
 			break;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 5/8] io_uring/zcrx: deduplicate area mapping
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (3 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 6/8] io_uring/zcrx: remove dmabuf_offset Pavel Begunkov
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

With a common type for storing dma addresses and io_populate_area_dma(),
type-specific area mapping helpers are trivial, so open code them and
deduplicate the call to io_populate_area_dma().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 37 ++++++++++++++-----------------------
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index cb8113e83311..2bd6e0bcc645 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -157,14 +157,6 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
 	return ret;
 }
 
-static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
-{
-	if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
-		return -EINVAL;
-	return io_populate_area_dma(ifq, area, area->mem.sgt,
-				    area->mem.dmabuf_offset);
-}
-
 static unsigned long io_count_account_pages(struct page **pages, unsigned nr_pages)
 {
 	struct folio *last_folio = NULL;
@@ -275,30 +267,29 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
 	}
 }
 
-static unsigned io_zcrx_map_area_umem(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
-{
-	int ret;
-
-	ret = dma_map_sgtable(ifq->dev, &area->mem.page_sg_table,
-				DMA_FROM_DEVICE, IO_DMA_ATTR);
-	if (ret < 0)
-		return ret;
-	return io_populate_area_dma(ifq, area, &area->mem.page_sg_table, 0);
-}
-
 static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 {
+	unsigned long offset;
+	struct sg_table *sgt;
 	int ret;
 
 	guard(mutex)(&ifq->pp_lock);
 	if (area->is_mapped)
 		return 0;
 
-	if (area->mem.is_dmabuf)
-		ret = io_zcrx_map_area_dmabuf(ifq, area);
-	else
-		ret = io_zcrx_map_area_umem(ifq, area);
+	if (!area->mem.is_dmabuf) {
+		ret = dma_map_sgtable(ifq->dev, &area->mem.page_sg_table,
+				      DMA_FROM_DEVICE, IO_DMA_ATTR);
+		if (ret < 0)
+			return ret;
+		sgt = &area->mem.page_sg_table;
+		offset = 0;
+	} else {
+		sgt = area->mem.sgt;
+		offset = area->mem.dmabuf_offset;
+	}
 
+	ret = io_populate_area_dma(ifq, area, sgt, offset);
 	if (ret == 0)
 		area->is_mapped = true;
 	return ret;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 6/8] io_uring/zcrx: remove dmabuf_offset
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (4 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 5/8] io_uring/zcrx: deduplicate area mapping Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 7/8] io_uring/zcrx: make niov size variable Pavel Begunkov
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

It was removed from uapi, so now it's always 0 and can be removed
together with offset handling in io_populate_area_dma().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 13 ++-----------
 io_uring/zcrx.h |  1 -
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 2bd6e0bcc645..87ba34c5a22f 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -53,7 +53,7 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 
 static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area,
-				struct sg_table *sgt, unsigned long off)
+				struct sg_table *sgt)
 {
 	struct scatterlist *sg;
 	unsigned i, niov_idx = 0;
@@ -61,11 +61,6 @@ static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 	for_each_sgtable_dma_sg(sgt, sg, i) {
 		dma_addr_t dma = sg_dma_address(sg);
 		unsigned long sg_len = sg_dma_len(sg);
-		unsigned long sg_off = min(sg_len, off);
-
-		off -= sg_off;
-		sg_len -= sg_off;
-		dma += sg_off;
 
 		while (sg_len && niov_idx < area->nia.num_niovs) {
 			struct net_iov *niov = &area->nia.niovs[niov_idx];
@@ -149,7 +144,6 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
 		goto err;
 	}
 
-	mem->dmabuf_offset = off;
 	mem->size = len;
 	return 0;
 err:
@@ -269,7 +263,6 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
 
 static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 {
-	unsigned long offset;
 	struct sg_table *sgt;
 	int ret;
 
@@ -283,13 +276,11 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 		if (ret < 0)
 			return ret;
 		sgt = &area->mem.page_sg_table;
-		offset = 0;
 	} else {
 		sgt = area->mem.sgt;
-		offset = area->mem.dmabuf_offset;
 	}
 
-	ret = io_populate_area_dma(ifq, area, sgt, offset);
+	ret = io_populate_area_dma(ifq, area, sgt);
 	if (ret == 0)
 		area->is_mapped = true;
 	return ret;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index f6a9ecf3e08a..4e94cfa720e6 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -20,7 +20,6 @@ struct io_zcrx_mem {
 	struct dma_buf_attachment	*attach;
 	struct dma_buf			*dmabuf;
 	struct sg_table			*sgt;
-	unsigned long			dmabuf_offset;
 };
 
 struct io_zcrx_area {
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 7/8] io_uring/zcrx: make niov size variable
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (5 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 6/8] io_uring/zcrx: remove dmabuf_offset Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-17 22:44 ` [zcrx-next 8/8] io_uring/zcrx: set sgt for umem area Pavel Begunkov
  2025-08-20 18:20 ` [zcrx-next 0/8] niov sizing and area mapping improvement Jens Axboe
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Instead of using PAGE_SIZE for the niov size add a niov_shift field to
ifq, and patch up all important places. Copy fallback still assumes
PAGE_SIZE, so it'll be wasting some memory for now.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 32 +++++++++++++++++++++-----------
 io_uring/zcrx.h |  1 +
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 87ba34c5a22f..952cd7669589 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -45,16 +45,19 @@ static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *nio
 static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 {
 	struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+	unsigned niov_pages_shift;
 
 	lockdep_assert(!area->mem.is_dmabuf);
 
-	return area->mem.pages[net_iov_idx(niov)];
+	niov_pages_shift = area->ifq->niov_shift - PAGE_SHIFT;
+	return area->mem.pages[net_iov_idx(niov) << niov_pages_shift];
 }
 
 static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area,
 				struct sg_table *sgt)
 {
+	unsigned niov_size = 1U << ifq->niov_shift;
 	struct scatterlist *sg;
 	unsigned i, niov_idx = 0;
 
@@ -62,13 +65,16 @@ static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 		dma_addr_t dma = sg_dma_address(sg);
 		unsigned long sg_len = sg_dma_len(sg);
 
+		if (WARN_ON_ONCE(sg_len % niov_size))
+			return -EINVAL;
+
 		while (sg_len && niov_idx < area->nia.num_niovs) {
 			struct net_iov *niov = &area->nia.niovs[niov_idx];
 
 			if (net_mp_niov_set_dma_addr(niov, dma))
 				return -EFAULT;
-			sg_len -= PAGE_SIZE;
-			dma += PAGE_SIZE;
+			sg_len -= niov_size;
+			dma += niov_size;
 			niov_idx++;
 		}
 	}
@@ -396,7 +402,8 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	if (ret)
 		goto err;
 
-	nr_iovs = area->mem.size >> PAGE_SHIFT;
+	ifq->niov_shift = PAGE_SHIFT;
+	nr_iovs = area->mem.size >> ifq->niov_shift;
 	area->nia.num_niovs = nr_iovs;
 
 	ret = -ENOMEM;
@@ -742,7 +749,7 @@ static void io_zcrx_ring_refill(struct page_pool *pp,
 		netmem_ref netmem;
 
 		area_idx = rqe->off >> IORING_ZCRX_AREA_SHIFT;
-		niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> PAGE_SHIFT;
+		niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift;
 
 		if (unlikely(rqe->__pad || area_idx))
 			continue;
@@ -785,20 +792,23 @@ static void io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq)
 	spin_unlock_bh(&area->freelist_lock);
 }
 
-static void io_sync_allocated_niovs(struct page_pool *pp)
+static void io_sync_allocated_niovs(struct io_zcrx_ifq *ifq,
+				    struct page_pool *pp)
 {
 #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
+	unsigned niov_size;
 	int i;
 
 	if (!dma_dev_need_sync(pp->p.dev))
 		return;
 
+	niov_size = 1U << ifq->niov_shift;
 	for (i = 0; i < pp->alloc.count; i++) {
 		netmem_ref netmem = pp->alloc.cache[i];
 		dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem);
 
 		__dma_sync_single_for_device(pp->p.dev, dma_addr + pp->p.offset,
-					     PAGE_SIZE, pp->p.dma_dir);
+					     niov_size, pp->p.dma_dir);
 	}
 #endif
 }
@@ -819,7 +829,7 @@ static netmem_ref io_pp_zc_alloc_netmems(struct page_pool *pp, gfp_t gfp)
 	if (!pp->alloc.count)
 		return 0;
 out_return:
-	io_sync_allocated_niovs(pp);
+	io_sync_allocated_niovs(ifq, pp);
 	return pp->alloc.cache[--pp->alloc.count];
 }
 
@@ -847,8 +857,8 @@ static int io_pp_zc_init(struct page_pool *pp)
 		return -EINVAL;
 	if (WARN_ON_ONCE(!pp->dma_map))
 		return -EOPNOTSUPP;
-	if (pp->p.order != 0)
-		return -EOPNOTSUPP;
+	if (pp->p.order + PAGE_SHIFT != ifq->niov_shift)
+		return -EINVAL;
 	if (pp->p.dma_dir != DMA_FROM_DEVICE)
 		return -EOPNOTSUPP;
 
@@ -920,7 +930,7 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 	cqe->flags = IORING_CQE_F_MORE;
 
 	area = io_zcrx_iov_to_area(niov);
-	offset = off + (net_iov_idx(niov) << PAGE_SHIFT);
+	offset = off + (net_iov_idx(niov) << ifq->niov_shift);
 	rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
 	rcqe->off = offset + ((u64)area->area_id << IORING_ZCRX_AREA_SHIFT);
 	rcqe->__pad = 0;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 4e94cfa720e6..41e4ceab8dd6 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -41,6 +41,7 @@ struct io_zcrx_area {
 struct io_zcrx_ifq {
 	struct io_ring_ctx		*ctx;
 	struct io_zcrx_area		*area;
+	unsigned			niov_shift;
 
 	spinlock_t			rq_lock ____cacheline_aligned_in_smp;
 	struct io_uring			*rq_ring;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [zcrx-next 8/8] io_uring/zcrx: set sgt for umem area
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (6 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 7/8] io_uring/zcrx: make niov size variable Pavel Begunkov
@ 2025-08-17 22:44 ` Pavel Begunkov
  2025-08-20 18:20 ` [zcrx-next 0/8] niov sizing and area mapping improvement Jens Axboe
  8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-08-17 22:44 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence

Set struct io_zcrx_mem::sgt for umem areas as well to simplify looking
up the current sg table.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 14 ++++++--------
 io_uring/zcrx.h |  2 +-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 952cd7669589..b3cfe0c04920 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -54,10 +54,10 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 }
 
 static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
-				struct io_zcrx_area *area,
-				struct sg_table *sgt)
+				struct io_zcrx_area *area)
 {
 	unsigned niov_size = 1U << ifq->niov_shift;
+	struct sg_table *sgt = area->mem.sgt;
 	struct scatterlist *sg;
 	unsigned i, niov_idx = 0;
 
@@ -203,6 +203,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
 	if (ret < 0)
 		mem->account_pages = 0;
 
+	mem->sgt = &mem->page_sg_table;
 	mem->pages = pages;
 	mem->nr_folios = nr_pages;
 	mem->size = area_reg->len;
@@ -217,7 +218,8 @@ static void io_release_area_mem(struct io_zcrx_mem *mem)
 	}
 	if (mem->pages) {
 		unpin_user_pages(mem->pages, mem->nr_folios);
-		sg_free_table(&mem->page_sg_table);
+		sg_free_table(mem->sgt);
+		mem->sgt = NULL;
 		kvfree(mem->pages);
 	}
 }
@@ -269,7 +271,6 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
 
 static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 {
-	struct sg_table *sgt;
 	int ret;
 
 	guard(mutex)(&ifq->pp_lock);
@@ -281,12 +282,9 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 				      DMA_FROM_DEVICE, IO_DMA_ATTR);
 		if (ret < 0)
 			return ret;
-		sgt = &area->mem.page_sg_table;
-	} else {
-		sgt = area->mem.sgt;
 	}
 
-	ret = io_populate_area_dma(ifq, area, sgt);
+	ret = io_populate_area_dma(ifq, area);
 	if (ret == 0)
 		area->is_mapped = true;
 	return ret;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 41e4ceab8dd6..a48871b5adad 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -16,10 +16,10 @@ struct io_zcrx_mem {
 	unsigned long			nr_folios;
 	struct sg_table			page_sg_table;
 	unsigned long			account_pages;
+	struct sg_table			*sgt;
 
 	struct dma_buf_attachment	*attach;
 	struct dma_buf			*dmabuf;
-	struct sg_table			*sgt;
 };
 
 struct io_zcrx_area {
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [zcrx-next 0/8] niov sizing and area mapping improvement
  2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
                   ` (7 preceding siblings ...)
  2025-08-17 22:44 ` [zcrx-next 8/8] io_uring/zcrx: set sgt for umem area Pavel Begunkov
@ 2025-08-20 18:20 ` Jens Axboe
  8 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2025-08-20 18:20 UTC (permalink / raw)
  To: io-uring, Pavel Begunkov


On Sun, 17 Aug 2025 23:44:11 +0100, Pavel Begunkov wrote:
> This includes a bunch of cleanups deduplicating area type handling,
> and Patch 7 introduces handling for non-PAGE_SIZE niovs.
> 
> For a full branch with all relevant dependencies see
> https://github.com/isilence/linux.git zcrx/for-next
> 
> Pavel Begunkov (8):
>   io_uring/zcrx: don't pass slot to io_zcrx_create_area
>   io_uring/zcrx: move area reg checks into io_import_area
>   io_uring/zcrx: check all niovs filled with dma addresses
>   io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback()
>   io_uring/zcrx: deduplicate area mapping
>   io_uring/zcrx: remove dmabuf_offset
>   io_uring/zcrx: make niov size variable
>   io_uring/zcrx: set sgt for umem area
> 
> [...]

Applied, thanks!

[1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area
      commit: e205db1eb9596e6e7d9ed78882d4c47c8448c2e5
[2/8] io_uring/zcrx: move area reg checks into io_import_area
      commit: ff9d7473a29a241491fad2b9e0e2de6671556b4e
[3/8] io_uring/zcrx: check all niovs filled with dma addresses
      commit: c6c489577c004b65734f2d59a9c0da94c8bd3187
[4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback()
      commit: f37d7f9b092274a33fe40b58b05327a82702e936
[5/8] io_uring/zcrx: deduplicate area mapping
      commit: fc8b5f573a2fa1cc5e7347493687229557ebd013
[6/8] io_uring/zcrx: remove dmabuf_offset
      commit: 1228c5129dd5577e1b988f85915e63d24b99ad92
[7/8] io_uring/zcrx: make niov size variable
      commit: 16a4e2d99220fd844efcf6d34b4d954912ed8d35
[8/8] io_uring/zcrx: set sgt for umem area
      commit: 14fcac7a7cec83e4ed15538103ef6c51400c559c

Best regards,
-- 
Jens Axboe




^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-08-20 18:20 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-17 22:44 [zcrx-next 0/8] niov sizing and area mapping improvement Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 1/8] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 2/8] io_uring/zcrx: move area reg checks into io_import_area Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 3/8] io_uring/zcrx: check all niovs filled with dma addresses Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 4/8] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 5/8] io_uring/zcrx: deduplicate area mapping Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 6/8] io_uring/zcrx: remove dmabuf_offset Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 7/8] io_uring/zcrx: make niov size variable Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 8/8] io_uring/zcrx: set sgt for umem area Pavel Begunkov
2025-08-20 18:20 ` [zcrx-next 0/8] niov sizing and area mapping improvement Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.