All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kanchan Joshi <joshi.k@samsung.com>
To: axboe@kernel.dk, hch@lst.de, kbusch@kernel.org,
	martin.petersen@oracle.com, asml.silence@gmail.com,
	brauner@kernel.org, viro@zeniv.linux.org.uk, jack@suse.cz
Cc: linux-nvme@lists.infradead.org, linux-fsdevel@vger.kernel.org,
	io-uring@vger.kernel.org, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, gost.dev@samsung.com,
	vishak.g@samsung.com, anuj1072538@gmail.com,
	Anuj Gupta <anuj20.g@samsung.com>,
	Kanchan Joshi <joshi.k@samsung.com>
Subject: [PATCH v6 06/10] io_uring/rw: add support to send metadata along with read/write
Date: Wed, 30 Oct 2024 23:31:08 +0530	[thread overview]
Message-ID: <20241030180112.4635-7-joshi.k@samsung.com> (raw)
In-Reply-To: <20241030180112.4635-1-joshi.k@samsung.com>

From: Anuj Gupta <anuj20.g@samsung.com>

This patch adds the capability of passing integrity metadata along with
read/write.

Introduce a new 'struct io_uring_meta_pi' that contains following:
- pi_flags: integrity check flags namely
IO_INTEGRITY_CHK_{GUARD/APPTAG/REFTAG}
- len: length of the pi/metadata buffer
- buf: address of the metadata buffer
- seed: seed value for reftag remapping
- app_tag: application defined 16b value

Application sets up a SQE128 ring, prepares io_uring_meta_pi within
the second SQE.
The patch processes this information to prepare uio_meta descriptor
and passes it down using kiocb->private.

Meta exchange is supported only for direct IO.
Also vectored read/write operations with meta are not supported
currently.

Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
---
 include/uapi/linux/io_uring.h | 16 ++++++++
 io_uring/io_uring.c           |  4 ++
 io_uring/rw.c                 | 71 ++++++++++++++++++++++++++++++++++-
 io_uring/rw.h                 | 14 ++++++-
 4 files changed, 102 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 024745283783..48dcca125db3 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -105,6 +105,22 @@ struct io_uring_sqe {
 		 */
 		__u8	cmd[0];
 	};
+	/*
+	 * If the ring is initialized with IORING_SETUP_SQE128, then
+	 * this field is starting offset for 64 bytes of data. For meta io
+	 * this contains 'struct io_uring_meta_pi'
+	 */
+	__u8	big_sqe[0];
+};
+
+/* this is placed in SQE128 */
+struct io_uring_meta_pi {
+	__u16		pi_flags;
+	__u16		app_tag;
+	__u32		len;
+	__u64		addr;
+	__u64		seed;
+	__u64		rsvd[2];
 };
 
 /*
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 44a772013c09..c5fd74e42c04 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3879,6 +3879,7 @@ static int __init io_uring_init(void)
 	BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
 	BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd);
 	BUILD_BUG_SQE_ELEM(56, __u64,  __pad2);
+	BUILD_BUG_SQE_ELEM_SIZE(64, 0, big_sqe);
 
 	BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
 		     sizeof(struct io_uring_rsrc_update));
@@ -3902,6 +3903,9 @@ static int __init io_uring_init(void)
 	/* top 8bits are for internal use */
 	BUILD_BUG_ON((IORING_URING_CMD_MASK & 0xff000000) != 0);
 
+	BUILD_BUG_ON(sizeof(struct io_uring_meta_pi) >
+		     sizeof(struct io_uring_sqe));
+
 	io_uring_optable_init();
 
 	/*
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 30448f343c7f..cbb74fcfd0d1 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -257,6 +257,46 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
 	return 0;
 }
 
+static inline void io_meta_save_state(struct io_async_rw *io)
+{
+	io->meta_state.seed = io->meta.seed;
+	iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta);
+}
+
+static inline void io_meta_restore(struct io_async_rw *io)
+{
+	io->meta.seed = io->meta_state.seed;
+	iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta);
+}
+
+static int io_prep_rw_meta(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+			   struct io_rw *rw, int ddir)
+{
+	const struct io_uring_meta_pi *md = (struct io_uring_meta_pi *)sqe->big_sqe;
+	const struct io_issue_def *def;
+	struct io_async_rw *io;
+	int ret;
+
+	if (READ_ONCE(md->rsvd[0]) || READ_ONCE(md->rsvd[1]))
+		return -EINVAL;
+
+	def = &io_issue_defs[req->opcode];
+	if (def->vectored)
+		return -EOPNOTSUPP;
+
+	io = req->async_data;
+	io->meta.flags = READ_ONCE(md->pi_flags);
+	io->meta.app_tag = READ_ONCE(md->app_tag);
+	io->meta.seed = READ_ONCE(md->seed);
+	ret = import_ubuf(ddir, u64_to_user_ptr(READ_ONCE(md->addr)),
+			  READ_ONCE(md->len), &io->meta.iter);
+	if (unlikely(ret < 0))
+		return ret;
+	rw->kiocb.ki_flags |= IOCB_HAS_METADATA;
+	io_meta_save_state(io);
+	return ret;
+}
+
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		      int ddir, bool do_import)
 {
@@ -279,11 +319,19 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		rw->kiocb.ki_ioprio = get_current_ioprio();
 	}
 	rw->kiocb.dio_complete = NULL;
+	rw->kiocb.ki_flags = 0;
 
 	rw->addr = READ_ONCE(sqe->addr);
 	rw->len = READ_ONCE(sqe->len);
 	rw->flags = READ_ONCE(sqe->rw_flags);
-	return io_prep_rw_setup(req, ddir, do_import);
+	ret = io_prep_rw_setup(req, ddir, do_import);
+
+	if (unlikely(ret))
+		return ret;
+
+	if (req->ctx->flags & IORING_SETUP_SQE128)
+		ret = io_prep_rw_meta(req, sqe, rw, ddir);
+	return ret;
 }
 
 int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -409,7 +457,10 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
 static void io_resubmit_prep(struct io_kiocb *req)
 {
 	struct io_async_rw *io = req->async_data;
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
+	if (rw->kiocb.ki_flags & IOCB_HAS_METADATA)
+		io_meta_restore(io);
 	iov_iter_restore(&io->iter, &io->iter_state);
 }
 
@@ -794,7 +845,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 	if (!(req->flags & REQ_F_FIXED_FILE))
 		req->flags |= io_file_get_flags(file);
 
-	kiocb->ki_flags = file->f_iocb_flags;
+	kiocb->ki_flags |= file->f_iocb_flags;
 	ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
 	if (unlikely(ret))
 		return ret;
@@ -823,6 +874,18 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 		kiocb->ki_complete = io_complete_rw;
 	}
 
+	if (kiocb->ki_flags & IOCB_HAS_METADATA) {
+		struct io_async_rw *io = req->async_data;
+
+		/*
+		 * We have a union of meta fields with wpq used for buffered-io
+		 * in io_async_rw, so fail it here.
+		 */
+		if (!(req->file->f_flags & O_DIRECT))
+			return -EOPNOTSUPP;
+		kiocb->private = &io->meta;
+	}
+
 	return 0;
 }
 
@@ -897,6 +960,8 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
 	 * manually if we need to.
 	 */
 	iov_iter_restore(&io->iter, &io->iter_state);
+	if (kiocb->ki_flags & IOCB_HAS_METADATA)
+		io_meta_restore(io);
 
 	do {
 		/*
@@ -1101,6 +1166,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 ret_eagain:
 		iov_iter_restore(&io->iter, &io->iter_state);
+		if (kiocb->ki_flags & IOCB_HAS_METADATA)
+			io_meta_restore(io);
 		if (kiocb->ki_flags & IOCB_WRITE)
 			io_req_end_write(req);
 		return -EAGAIN;
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 3f432dc75441..2d7656bd268d 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -2,6 +2,11 @@
 
 #include <linux/pagemap.h>
 
+struct io_meta_state {
+	u32			seed;
+	struct iov_iter_state	iter_meta;
+};
+
 struct io_async_rw {
 	size_t				bytes_done;
 	struct iov_iter			iter;
@@ -9,7 +14,14 @@ struct io_async_rw {
 	struct iovec			fast_iov;
 	struct iovec			*free_iovec;
 	int				free_iov_nr;
-	struct wait_page_queue		wpq;
+	/* wpq is for buffered io, while meta fields are used with direct io */
+	union {
+		struct wait_page_queue		wpq;
+		struct {
+			struct uio_meta			meta;
+			struct io_meta_state		meta_state;
+		};
+	};
 };
 
 int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
-- 
2.25.1


  parent reply	other threads:[~2024-10-30 18:10 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20241030180957epcas5p3312b0a582e8562f8c2169e64d41592b2@epcas5p3.samsung.com>
2024-10-30 18:01 ` [PATCH v6 00/10] Read/Write with metadata/integrity Kanchan Joshi
2024-10-30 18:01   ` [PATCH v6 01/10] block: define set of integrity flags to be inherited by cloned bip Kanchan Joshi
2024-10-30 18:01   ` [PATCH v6 02/10] block: copy back bounce buffer to user-space correctly in case of split Kanchan Joshi
2024-10-30 18:01   ` [PATCH v6 03/10] block: modify bio_integrity_map_user to accept iov_iter as argument Kanchan Joshi
2024-10-31  4:33     ` kernel test robot
2024-10-30 18:01   ` [PATCH v6 04/10] fs, iov_iter: define meta io descriptor Kanchan Joshi
2024-10-31  6:55     ` Christoph Hellwig
2024-10-30 18:01   ` [PATCH v6 05/10] fs: introduce IOCB_HAS_METADATA for metadata Kanchan Joshi
2024-10-30 18:01   ` Kanchan Joshi [this message]
2024-10-30 21:09     ` [PATCH v6 06/10] io_uring/rw: add support to send metadata along with read/write Keith Busch
2024-10-31 14:39       ` Pavel Begunkov
2024-11-01 17:54         ` Kanchan Joshi
2024-11-07 17:23           ` Pavel Begunkov
2024-11-10 17:41             ` Kanchan Joshi
2024-11-12  0:54               ` Pavel Begunkov
2024-11-10 18:36             ` Kanchan Joshi
2024-11-12  1:32               ` Pavel Begunkov
2024-10-31  6:55     ` Christoph Hellwig
2024-10-30 18:01   ` [PATCH v6 07/10] block: introduce BIP_CHECK_GUARD/REFTAG/APPTAG bip_flags Kanchan Joshi
2024-10-30 18:01   ` [PATCH v6 08/10] nvme: add support for passing on the application tag Kanchan Joshi
2024-10-30 18:01   ` [PATCH v6 09/10] scsi: add support for user-meta interface Kanchan Joshi
2024-10-31  5:09     ` kernel test robot
2024-10-31  5:10     ` kernel test robot
2024-10-30 18:01   ` [PATCH v6 10/10] block: add support to pass user meta buffer Kanchan Joshi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241030180112.4635-7-joshi.k@samsung.com \
    --to=joshi.k@samsung.com \
    --cc=anuj1072538@gmail.com \
    --cc=anuj20.g@samsung.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=gost.dev@samsung.com \
    --cc=hch@lst.de \
    --cc=io-uring@vger.kernel.org \
    --cc=jack@suse.cz \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vishak.g@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.