bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, io-uring@vger.kernel.org
Cc: Caleb Sander Mateos <csander@purestorage.com>,
	Akilesh Kailash <akailash@google.com>,
	bpf@vger.kernel.org, Alexei Starovoitov <ast@kernel.org>,
	Ming Lei <ming.lei@redhat.com>
Subject: [PATCH 5/5] io_uring: bpf: add io_uring_bpf_req_memcpy() kfunc
Date: Wed,  5 Nov 2025 00:21:20 +0800	[thread overview]
Message-ID: <20251104162123.1086035-6-ming.lei@redhat.com> (raw)
In-Reply-To: <20251104162123.1086035-1-ming.lei@redhat.com>

Add io_uring_bpf_req_memcpy() kfunc to enable BPF programs to copy
data between buffers associated with IORING_OP_BPF requests.

The kfunc supports copying between:
- Plain user buffers (using import_ubuf())
- Fixed/registered buffers (using io_import_reg_buf())
- Mixed combinations (plain-to-fixed, fixed-to-plain)

This enables BPF programs to implement data transformation and
processing operations directly within io_uring's request context,
avoiding additional userspace copies.

Implementation details:

1. Add issue_flags tracking in struct uring_bpf_data:
   - Replace __pad field with issue_flags (bytes 36-39)
   - Initialized to 0 before ops->prep_fn()
   - Saved from issue_flags parameter before ops->issue_fn()
   - Required by io_import_reg_buf() for proper async handling

2. Add buffer preparation infrastructure:
   - io_bpf_prep_buffers() extracts buffer metadata from SQE
   - Buffer 1: plain (addr/len) or fixed (buf_index/addr/len)
   - Buffer 2: plain only (addr3/optlen)
   - Buffer types encoded in sqe->bpf_op_flags bits 23-18

3. io_uring_bpf_req_memcpy() implementation:
   - Validates buffer IDs (1 or 2) and prevents same-buffer copies
   - Extracts buffer metadata based on buffer ID
   - Sets up iov_iters using import_ubuf() or io_import_reg_buf()
   - Performs page-sized chunked copying via temporary buffer
   - Returns bytes copied or negative error code

Buffer encoding in sqe->bpf_op_flags (32 bits):
  Bits 31-24: BPF operation ID (8 bits)
  Bits 23-21: Buffer 1 type (0=none, 1=plain, 2=fixed)
  Bits 20-18: Buffer 2 type (0=none, 1=plain)
  Bits 17-0:  Custom BPF flags (18 bits)

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 io_uring/bpf.c       | 187 +++++++++++++++++++++++++++++++++++++++++++
 io_uring/uring_bpf.h |  11 ++-
 2 files changed, 197 insertions(+), 1 deletion(-)

diff --git a/io_uring/bpf.c b/io_uring/bpf.c
index e837c3d57b96..ee4c617e3904 100644
--- a/io_uring/bpf.c
+++ b/io_uring/bpf.c
@@ -109,6 +109,8 @@ int io_uring_bpf_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (ret)
 		return ret;
 
+	/* ctx->uring_lock is held */
+	data->issue_flags = 0;
 	if (ops->prep_fn)
 		return ops->prep_fn(data, sqe);
 	return -EOPNOTSUPP;
@@ -126,6 +128,9 @@ static int __io_uring_bpf_issue(struct io_kiocb *req)
 
 int io_uring_bpf_issue(struct io_kiocb *req, unsigned int issue_flags)
 {
+	struct uring_bpf_data *data = io_kiocb_to_cmd(req, struct uring_bpf_data);
+
+	data->issue_flags = issue_flags;
 	if (issue_flags & IO_URING_F_UNLOCKED) {
 		int idx, ret;
 
@@ -143,6 +148,8 @@ void io_uring_bpf_fail(struct io_kiocb *req)
 	struct uring_bpf_data *data = io_kiocb_to_cmd(req, struct uring_bpf_data);
 	struct uring_bpf_ops *ops = uring_bpf_get_ops(data);
 
+	/* ctx->uring_lock is held */
+	data->issue_flags = 0;
 	if (ops->fail_fn)
 		ops->fail_fn(data);
 }
@@ -152,6 +159,8 @@ void io_uring_bpf_cleanup(struct io_kiocb *req)
 	struct uring_bpf_data *data = io_kiocb_to_cmd(req, struct uring_bpf_data);
 	struct uring_bpf_ops *ops = uring_bpf_get_ops(data);
 
+	/* ctx->uring_lock is held */
+	data->issue_flags = 0;
 	if (ops->cleanup_fn)
 		ops->cleanup_fn(data);
 }
@@ -324,6 +333,104 @@ static struct bpf_struct_ops bpf_uring_bpf_ops = {
 	.owner = THIS_MODULE,
 };
 
+/*
+ * Helper to copy data between two iov_iters using page extraction.
+ * Extracts pages from source iterator and copies them to destination.
+ * Returns number of bytes copied or negative error code.
+ */
+static ssize_t io_bpf_copy_iters(struct iov_iter *src, struct iov_iter *dst,
+				 size_t len)
+{
+#define MAX_PAGES_PER_LOOP 32
+	struct page *pages[MAX_PAGES_PER_LOOP];
+	size_t total_copied = 0;
+	bool need_unpin;
+
+	/* Determine if we'll need to unpin pages later */
+	need_unpin = user_backed_iter(src);
+
+	/* Process pages in chunks */
+	while (len > 0) {
+		struct page **page_array = pages;
+		size_t offset, copied = 0;
+		ssize_t extracted;
+		unsigned int nr_pages;
+		size_t chunk_len;
+		int i;
+
+		/* Extract up to MAX_PAGES_PER_LOOP pages */
+		chunk_len = min_t(size_t, len, MAX_PAGES_PER_LOOP * PAGE_SIZE);
+		extracted = iov_iter_extract_pages(src, &page_array, chunk_len,
+						   MAX_PAGES_PER_LOOP, 0, &offset);
+		if (extracted <= 0) {
+			if (total_copied > 0)
+				break;
+			return extracted < 0 ? extracted : -EFAULT;
+		}
+
+		nr_pages = DIV_ROUND_UP(offset + extracted, PAGE_SIZE);
+
+		/* Copy pages to destination iterator */
+		for (i = 0; i < nr_pages && copied < extracted; i++) {
+			size_t page_offset = (i == 0) ? offset : 0;
+			size_t page_len = min_t(size_t, extracted - copied,
+						PAGE_SIZE - page_offset);
+			size_t n;
+
+			n = copy_page_to_iter(pages[i], page_offset, page_len, dst);
+			copied += n;
+			if (n < page_len)
+				break;
+		}
+
+		/* Clean up extracted pages */
+		if (need_unpin)
+			unpin_user_pages(pages, nr_pages);
+
+		total_copied += copied;
+		len -= copied;
+
+		/* Stop if we didn't copy all extracted data */
+		if (copied < extracted)
+			break;
+	}
+
+	return total_copied;
+#undef MAX_PAGES_PER_LOOP
+}
+
+/*
+ * Helper to import a buffer into an iov_iter for BPF memcpy operations.
+ * Handles both plain user buffers and fixed/registered buffers.
+ *
+ * @req: io_kiocb request
+ * @iter: output iterator
+ * @buf_type: buffer type (plain or fixed)
+ * @addr: buffer address
+ * @offset: offset into buffer
+ * @len: length from offset
+ * @direction: ITER_SOURCE for source buffer, ITER_DEST for destination
+ * @issue_flags: io_uring issue flags
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int io_bpf_import_buffer(struct io_kiocb *req, struct iov_iter *iter,
+				u8 buf_type, u64 addr, unsigned int offset,
+				u32 len, int direction, unsigned int issue_flags)
+{
+	if (buf_type == IORING_BPF_BUF_TYPE_PLAIN) {
+		/* Plain user buffer */
+		return import_ubuf(direction, (void __user *)(addr + offset),
+				   len - offset, iter);
+	} else if (buf_type == IORING_BPF_BUF_TYPE_FIXED) {
+		/* Fixed buffer */
+		return io_import_reg_buf(req, iter, addr + offset,
+					 len - offset, direction, issue_flags);
+	}
+
+	return -EINVAL;
+}
+
 __bpf_kfunc_start_defs();
 __bpf_kfunc void uring_bpf_set_result(struct uring_bpf_data *data, int res)
 {
@@ -339,11 +446,91 @@ __bpf_kfunc struct io_kiocb *uring_bpf_data_to_req(struct uring_bpf_data *data)
 {
 	return cmd_to_io_kiocb(data);
 }
+
+/**
+ * io_uring_bpf_req_memcpy - Copy data between io_uring BPF request buffers
+ * @data: BPF request data containing buffer metadata
+ * @dest: Destination buffer descriptor (with buf_id and offset)
+ * @src: Source buffer descriptor (with buf_id and offset)
+ * @len: Number of bytes to copy
+ *
+ * Copies data between two different io_uring BPF request buffers (buf_id 1 and 2).
+ * Supports: plain-to-plain, fixed-to-plain, and plain-to-fixed.
+ * Does not support copying within the same buffer (src and dest must be different).
+ *
+ * Returns: Number of bytes copied on success, negative error code on failure
+ */
+__bpf_kfunc int io_uring_bpf_req_memcpy(struct uring_bpf_data *data,
+					struct bpf_req_mem_desc *dest,
+					struct bpf_req_mem_desc *src,
+					unsigned int len)
+{
+	struct io_kiocb *req = cmd_to_io_kiocb(data);
+	struct iov_iter dst_iter, src_iter;
+	u8 dst_type, src_type;
+	u64 dst_addr, src_addr;
+	u32 dst_len, src_len;
+	int ret;
+
+	/* Validate buffer IDs */
+	if (dest->buf_id < 1 || dest->buf_id > 2 ||
+	    src->buf_id < 1 || src->buf_id > 2)
+		return -EINVAL;
+
+	/* Don't allow copying within the same buffer */
+	if (src->buf_id == dest->buf_id)
+		return -EINVAL;
+
+	/* Extract source buffer metadata */
+	if (src->buf_id == 1) {
+		src_type = IORING_BPF_BUF1_TYPE(data->opf);
+		src_addr = data->buf1_addr;
+		src_len = data->buf1_len;
+	} else {
+		src_type = IORING_BPF_BUF2_TYPE(data->opf);
+		src_addr = data->buf2_addr;
+		src_len = data->buf2_len;
+	}
+
+	/* Extract destination buffer metadata */
+	if (dest->buf_id == 1) {
+		dst_type = IORING_BPF_BUF1_TYPE(data->opf);
+		dst_addr = data->buf1_addr;
+		dst_len = data->buf1_len;
+	} else {
+		dst_type = IORING_BPF_BUF2_TYPE(data->opf);
+		dst_addr = data->buf2_addr;
+		dst_len = data->buf2_len;
+	}
+
+	/* Validate offsets and lengths */
+	if (src->offset + len > src_len || dest->offset + len > dst_len)
+		return -EINVAL;
+
+	/* Initialize source iterator */
+	ret = io_bpf_import_buffer(req, &src_iter, src_type,
+				   src_addr, src->offset, src_len,
+				   ITER_SOURCE, data->issue_flags);
+	if (ret)
+		return ret;
+
+	/* Initialize destination iterator */
+	ret = io_bpf_import_buffer(req, &dst_iter, dst_type,
+				   dst_addr, dest->offset, dst_len,
+				   ITER_DEST, data->issue_flags);
+	if (ret)
+		return ret;
+
+	/* Extract pages from source iterator and copy to destination */
+	return io_bpf_copy_iters(&src_iter, &dst_iter, len);
+}
+
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(uring_bpf_kfuncs)
 BTF_ID_FLAGS(func, uring_bpf_set_result)
 BTF_ID_FLAGS(func, uring_bpf_data_to_req)
+BTF_ID_FLAGS(func, io_uring_bpf_req_memcpy)
 BTF_KFUNCS_END(uring_bpf_kfuncs)
 
 static const struct btf_kfunc_id_set uring_kfunc_set = {
diff --git a/io_uring/uring_bpf.h b/io_uring/uring_bpf.h
index c919931cb4b0..d6e0d6dff82e 100644
--- a/io_uring/uring_bpf.h
+++ b/io_uring/uring_bpf.h
@@ -14,13 +14,22 @@ struct uring_bpf_data {
 	/* Buffer 2 metadata - readable for bpf prog (plain only) */
 	u64		buf2_addr;		/* buffer 2 address, bytes 24-31 */
 	u32		buf2_len;		/* buffer 2 length, bytes 32-35 */
-	u32		__pad;			/* padding, bytes 36-39 */
+	u32		issue_flags;		/* issue_flags from io_uring, bytes 36-39 */
 
 	/* writeable for bpf prog */
 	u8              pdu[64 - sizeof(struct file *) - 4 * sizeof(u32) -
 		2 * sizeof(u64)];
 };
 
+/*
+ * Descriptor for io_uring BPF request buffer.
+ * Used by io_uring_bpf_req_memcpy() to identify which buffer to copy from/to.
+ */
+struct bpf_req_mem_desc {
+	u8		buf_id;		/* Buffer ID: 1 or 2 */
+	unsigned int	offset;		/* Offset into buffer */
+};
+
 typedef int (*uring_io_prep_t)(struct uring_bpf_data *data,
 			       const struct io_uring_sqe *sqe);
 typedef int (*uring_io_issue_t)(struct uring_bpf_data *data);
-- 
2.47.0


  parent reply	other threads:[~2025-11-04 16:22 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-04 16:21 [PATCH 0/5] io_uring: add IORING_OP_BPF for extending io_uring Ming Lei
2025-11-04 16:21 ` [PATCH 1/5] io_uring: prepare for extending io_uring with bpf Ming Lei
2025-11-04 16:21 ` [PATCH 2/5] io_uring: bpf: add io_uring_ctx setup for BPF into one list Ming Lei
2025-11-04 16:21 ` [PATCH 3/5] io_uring: bpf: extend io_uring with bpf struct_ops Ming Lei
2025-11-07 19:02   ` kernel test robot
2025-11-08  6:53   ` kernel test robot
2025-11-13 10:32   ` Stefan Metzmacher
2025-11-13 10:59     ` Ming Lei
2025-11-13 11:19       ` Stefan Metzmacher
2025-11-14  3:00         ` Ming Lei
2025-12-08 22:45           ` Caleb Sander Mateos
2025-12-09  3:08             ` Ming Lei
2025-12-10 16:11               ` Caleb Sander Mateos
2025-11-19 14:39   ` Jonathan Corbet
2025-11-20  1:46     ` Ming Lei
2025-11-20  1:51       ` Ming Lei
2025-11-04 16:21 ` [PATCH 4/5] io_uring: bpf: add buffer support for IORING_OP_BPF Ming Lei
2025-11-13 10:42   ` Stefan Metzmacher
2025-11-13 11:04     ` Ming Lei
2025-11-13 11:25       ` Stefan Metzmacher
2025-11-04 16:21 ` Ming Lei [this message]
2025-11-07 18:51   ` [PATCH 5/5] io_uring: bpf: add io_uring_bpf_req_memcpy() kfunc kernel test robot
2025-11-05 12:47 ` [PATCH 0/5] io_uring: add IORING_OP_BPF for extending io_uring Pavel Begunkov
2025-11-05 15:57   ` Ming Lei
2025-11-06 16:03     ` Pavel Begunkov
2025-11-07 15:54       ` Ming Lei
2025-11-11 14:07         ` Pavel Begunkov
2025-11-13  4:18           ` Ming Lei
2025-11-19 19:00             ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251104162123.1086035-6-ming.lei@redhat.com \
    --to=ming.lei@redhat.com \
    --cc=akailash@google.com \
    --cc=ast@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=bpf@vger.kernel.org \
    --cc=csander@purestorage.com \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).