linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH blktests] block tests: nvme metadata passthrough
@ 2025-06-06  0:30 Keith Busch
  2025-06-06  1:41 ` Anuj gupta
  2025-06-07 12:54 ` Shinichiro Kawasaki
  0 siblings, 2 replies; 7+ messages in thread
From: Keith Busch @ 2025-06-06  0:30 UTC (permalink / raw)
  To: linux-block, linux-nvme, shinichiro.kawasaki; +Cc: axboe, Keith Busch

From: Keith Busch <kbusch@kernel.org>

Get more coverage on nvme metadata passthrough. Specifically in this
test, read-only metadata is targeted as this had been a gap in previous
test coveraged.

Link: https://lore.kernel.org/linux-block/20250603184752.1185676-1-csander@purestorage.com/
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 src/Makefile                |   1 +
 src/nvme-passthrough-meta.c | 219 ++++++++++++++++++++++++++++++++++++
 tests/nvme/064              |  22 ++++
 tests/nvme/064.out          |   2 +
 4 files changed, 244 insertions(+)
 create mode 100644 src/nvme-passthrough-meta.c
 create mode 100755 tests/nvme/064
 create mode 100644 tests/nvme/064.out

diff --git a/src/Makefile b/src/Makefile
index a94e5f2..f91ac62 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -13,6 +13,7 @@ C_TARGETS := \
 	loop_change_fd \
 	loop_get_status_null \
 	mount_clear_sock \
+	nvme-passthrough-meta \
 	nbdsetsize \
 	openclose \
 	sg/dxfer-from-dev \
diff --git a/src/nvme-passthrough-meta.c b/src/nvme-passthrough-meta.c
new file mode 100644
index 0000000..a8a5b1b
--- /dev/null
+++ b/src/nvme-passthrough-meta.c
@@ -0,0 +1,219 @@
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+
+#ifndef _LINUX_NVME_IOCTL_H
+#define _LINUX_NVME_IOCTL_H
+struct nvme_passthru_cmd {
+	__u8    opcode;
+	__u8    flags;
+	__u16   rsvd1;
+	__u32   nsid;
+	__u32   cdw2;
+	__u32   cdw3;
+	__u64   metadata;
+	__u64   addr;
+	__u32   metadata_len;
+	__u32   data_len;
+	__u32   cdw10;
+	__u32   cdw11;
+	__u32   cdw12;
+	__u32   cdw13;
+	__u32   cdw14;
+	__u32   cdw15;
+	__u32   timeout_ms;
+	__u32   result;
+};
+
+#define NVME_IOCTL_ID		_IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD    _IOWR('N', 0x41, struct nvme_passthru_cmd)
+#define NVME_IOCTL_IO_CMD       _IOWR('N', 0x43, struct nvme_passthru_cmd)
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
+
+struct nvme_lbaf {
+	__le16	ms;
+	__u8	ds;
+	__u8	rp;
+};
+
+struct nvme_id_ns {
+	__le64	nsze;
+	__le64	ncap;
+	__le64	nuse;
+	__u8	nsfeat;
+	__u8	nlbaf;
+	__u8	flbas;
+	__u8	mc;
+	__u8	dpc;
+	__u8	dps;
+	__u8	nmic;
+	__u8	rescap;
+	__u8	fpi;
+	__u8	dlfeat;
+	__le16	nawun;
+	__le16	nawupf;
+	__le16	nacwu;
+	__le16	nabsn;
+	__le16	nabo;
+	__le16	nabspf;
+	__le16	noiob;
+	__u8	nvmcap[16];
+	__le16	npwg;
+	__le16	npwa;
+	__le16	npdg;
+	__le16	npda;
+	__le16	nows;
+	__u8	rsvd74[18];
+	__le32	anagrpid;
+	__u8	rsvd96[3];
+	__u8	nsattr;
+	__le16	nvmsetid;
+	__le16	endgid;
+	__u8	nguid[16];
+	__u8	eui64[8];
+	struct nvme_lbaf lbaf[64];
+	__u8	vs[3712];
+};
+
+#define BUFFER_SIZE (32768)
+
+int main(int argc, char **argv)
+{
+	int ret, fd, nsid, blocks, meta_buffer_size;
+	void *buffer, *mptr = NULL, *meta = NULL;
+	struct nvme_passthru_cmd cmd;
+	struct nvme_lbaf lbaf;
+	struct nvme_id_ns ns;
+
+	__u64 block_size;
+	__u16 meta_size;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s /dev/nvmeXnY", argv[0]);
+		return EINVAL;
+	}
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	nsid = ioctl(fd, NVME_IOCTL_ID);
+	if (nsid < 0) {
+		perror("namespace id");
+		return errno;
+	}
+
+	cmd = (struct nvme_passthru_cmd) {
+		.opcode	= 0x6,
+		.nsid	= nsid,
+		.addr	= (__u64)(uintptr_t)&ns,
+		.data_len       = 4096,
+	};
+
+	ret = ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
+	if (ret < 0) {
+		perror("id-ns");
+		return errno;
+	}
+
+	lbaf = ns.lbaf[ns.flbas & 0xf];
+	block_size = 1 << lbaf.ds;
+	meta_size = lbaf.ms;
+
+	/* format not appropriate for this test */
+	if (meta_size == 0)
+		return 0;
+
+	blocks = BUFFER_SIZE / block_size;
+	meta_buffer_size = blocks * meta_size;
+
+	buffer = malloc(BUFFER_SIZE);
+	mptr = mmap(NULL, 8192, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (mptr == MAP_FAILED) {
+		perror("mmap");
+		return errno;
+	}
+
+	/* this should directly use the user space buffer */
+	meta = mptr;
+	cmd = (struct nvme_passthru_cmd) {
+		.opcode		= 1,
+		.nsid		= 1,
+		.addr		= (uintptr_t)buffer,
+		.metadata       = (uintptr_t)meta,
+		.data_len       = BUFFER_SIZE,
+		.metadata_len   = meta_buffer_size,
+		.cdw12		= blocks - 1,
+	};
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-write");
+		return ret;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-read");
+		return ret;
+	}
+
+	/*
+	 * this offset should either force a kernel copy if we don't have
+	 * contiguous pages, or test the device's metadata sgls
+	 */
+	meta = mptr + 4096 - 16;
+	cmd.opcode = 1;
+	cmd.metadata = (uintptr_t)meta;
+
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-write");
+		return errno;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-read");
+		return errno;
+	}
+
+	/* This should not be mappable for write commands */
+	mptr = mmap(NULL, 8192, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (mptr == MAP_FAILED) {
+		perror("mmap");
+		return errno;
+	}
+
+	meta = mptr;
+
+	cmd.opcode = 1;
+	cmd.metadata = (uintptr_t)meta;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret == 0) {
+		perror("nvme-write (expect Failure)");
+		return EFAULT;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-read");
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/tests/nvme/064 b/tests/nvme/064
new file mode 100755
index 0000000..ed9c565
--- /dev/null
+++ b/tests/nvme/064
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2025 Keith Busch <kbusch@kernel.org>
+#
+# Test out metadata through the passthrough interfaces
+
+. tests/nvme/rc
+
+requires() {
+	_nvme_requires
+}
+
+DESCRIPTION="exercise the nvme metadata usage with passthrough commands"
+QUICK=1
+
+test() {
+	echo "Running ${TEST_NAME}"
+
+	src/nvme-passthrough-meta ${TEST_DEV}
+
+	echo "Test complete"
+}
diff --git a/tests/nvme/064.out b/tests/nvme/064.out
new file mode 100644
index 0000000..5b34d4e
--- /dev/null
+++ b/tests/nvme/064.out
@@ -0,0 +1,2 @@
+Running nvme/064
+Test complete
-- 
2.47.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH blktests] block tests: nvme metadata passthrough
  2025-06-06  0:30 [PATCH blktests] block tests: nvme metadata passthrough Keith Busch
@ 2025-06-06  1:41 ` Anuj gupta
  2025-06-09 15:24   ` Keith Busch
  2025-06-07 12:54 ` Shinichiro Kawasaki
  1 sibling, 1 reply; 7+ messages in thread
From: Anuj gupta @ 2025-06-06  1:41 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-block, linux-nvme, shinichiro.kawasaki, axboe, Keith Busch

> +       /* this should directly use the user space buffer */
> +       meta = mptr;
> +       cmd = (struct nvme_passthru_cmd) {
> +               .opcode         = 1,
> +               .nsid           = 1,

Minor nit: should use `.nsid = nsid` instead of hardcoded `1`

> +       /* This should not be mappable for write commands */

Maybe reword this to:
/* This buffer is read-only, so using it for write passthrough should fail */
-- makes the intent clearer.

--
Anuj Gupta


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH blktests] block tests: nvme metadata passthrough
  2025-06-06  0:30 [PATCH blktests] block tests: nvme metadata passthrough Keith Busch
  2025-06-06  1:41 ` Anuj gupta
@ 2025-06-07 12:54 ` Shinichiro Kawasaki
  2025-06-08 19:55   ` Anuj gupta
  2025-06-09 15:04   ` Keith Busch
  1 sibling, 2 replies; 7+ messages in thread
From: Shinichiro Kawasaki @ 2025-06-07 12:54 UTC (permalink / raw)
  To: Keith Busch
  Cc: linux-block@vger.kernel.org, linux-nvme@lists.infradead.org,
	axboe@kernel.dk, Keith Busch

On Jun 05, 2025 / 17:30, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> Get more coverage on nvme metadata passthrough. Specifically in this
> test, read-only metadata is targeted as this had been a gap in previous
> test coveraged.

Thanks for the patch. I ran the test case on the kernel v6.15, and it passed.
Is this pass result expected? I guess this test case intends to extend test
coverage, and does not intend to recreate the failure reported in the Link.
So, I'm guessing the test case should pass with v6.15 kernel.

Also, please find a few comments in line.

...

> diff --git a/tests/nvme/064 b/tests/nvme/064
> new file mode 100755
> index 0000000..ed9c565
> --- /dev/null
> +++ b/tests/nvme/064
> @@ -0,0 +1,22 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-3.0+
> +# Copyright (C) 2025 Keith Busch <kbusch@kernel.org>
> +#
> +# Test out metadata through the passthrough interfaces
> +
> +. tests/nvme/rc
> +
> +requires() {
> +	_nvme_requires
> +}
> +
> +DESCRIPTION="exercise the nvme metadata usage with passthrough commands"
> +QUICK=1
> +
> +test() {

This function name should be test_device() instead of test() to indicate that
it uses TEST_DEV. For test(), blktests provides empty TEST_DEV. So, actually
the src/nvme-passthrough-meta is not doing the test as expected.

> +	echo "Running ${TEST_NAME}"
> +
> +	src/nvme-passthrough-meta ${TEST_DEV}

I suggest to check status code of the command above, like:

	if ! src/nvme-passthrough-meta ${TEST_DEV}; then
		echo "src/nvme-passthrough-meta failed"
	fi

This will catch the failure with empty TEST_DEV for test(). By renaming
test() to test_device(), src/nvme-passthrough-meta returns zero status.

Also, ${TEST_DEV} requires double quotations like "${TEST_DEV}", to suprress the
shellcheck WARN below:

$ make check
shellcheck -x -e SC2119 -f gcc check common/* \
        tests/*/rc tests/*/[0-9]*[0-9] src/*.sh
tests/nvme/064:19:28: note: Double quote to prevent globbing and word splitting. [SC2086]
make: *** [Makefile:21: check] Error 1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH blktests] block tests: nvme metadata passthrough
  2025-06-07 12:54 ` Shinichiro Kawasaki
@ 2025-06-08 19:55   ` Anuj gupta
  2025-06-09  7:40     ` Shinichiro Kawasaki
  2025-06-09 15:04   ` Keith Busch
  1 sibling, 1 reply; 7+ messages in thread
From: Anuj gupta @ 2025-06-08 19:55 UTC (permalink / raw)
  To: Shinichiro Kawasaki
  Cc: Keith Busch, linux-block@vger.kernel.org,
	linux-nvme@lists.infradead.org, axboe@kernel.dk, Keith Busch

On Sat, Jun 7, 2025 at 6:25 PM Shinichiro Kawasaki
<shinichiro.kawasaki@wdc.com> wrote:
>
> On Jun 05, 2025 / 17:30, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> >
> > Get more coverage on nvme metadata passthrough. Specifically in this
> > test, read-only metadata is targeted as this had been a gap in previous
> > test coveraged.
>
> Thanks for the patch. I ran the test case on the kernel v6.15, and it passed.
> Is this pass result expected? I guess this test case intends to extend test
> coverage, and does not intend to recreate the failure reported in the Link.
> So, I'm guessing the test case should pass with v6.15 kernel.

As Keith mentioned in that thread [1], this test can catch the issue
only in the more unusual or corner-case configurations.

Additionally, if your NVMe device is not formatted with protection
information, the test will exit early and report success, as the
integrity path won't be exercised in that case.

[1] https://lore.kernel.org/linux-block/aD-J9mzq_bJe26rD@kbusch-mbp/


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH blktests] block tests: nvme metadata passthrough
  2025-06-08 19:55   ` Anuj gupta
@ 2025-06-09  7:40     ` Shinichiro Kawasaki
  0 siblings, 0 replies; 7+ messages in thread
From: Shinichiro Kawasaki @ 2025-06-09  7:40 UTC (permalink / raw)
  To: Anuj gupta
  Cc: Keith Busch, linux-block@vger.kernel.org,
	linux-nvme@lists.infradead.org, axboe@kernel.dk, Keith Busch

On Jun 09, 2025 / 01:25, Anuj gupta wrote:
> On Sat, Jun 7, 2025 at 6:25 PM Shinichiro Kawasaki
> <shinichiro.kawasaki@wdc.com> wrote:
> >
> > On Jun 05, 2025 / 17:30, Keith Busch wrote:
> > > From: Keith Busch <kbusch@kernel.org>
> > >
> > > Get more coverage on nvme metadata passthrough. Specifically in this
> > > test, read-only metadata is targeted as this had been a gap in previous
> > > test coveraged.
> >
> > Thanks for the patch. I ran the test case on the kernel v6.15, and it passed.
> > Is this pass result expected? I guess this test case intends to extend test
> > coverage, and does not intend to recreate the failure reported in the Link.
> > So, I'm guessing the test case should pass with v6.15 kernel.
> 
> As Keith mentioned in that thread [1], this test can catch the issue
> only in the more unusual or corner-case configurations.
> 
> Additionally, if your NVMe device is not formatted with protection
> information, the test will exit early and report success, as the
> integrity path won't be exercised in that case.
> 
> [1] https://lore.kernel.org/linux-block/aD-J9mzq_bJe26rD@kbusch-mbp/

I see, thanks for the explanation. Then I wish the expected device conditions to
be described in the test case comment. Even if blktests can not prepare devices
with the condition automatically, I think it's worth documenting.

In daily blktests run, I use a normal QEMU nvme device, but running the test
case with this device does not extend the test coverage much, probably.
Can we prepare the best device for the new test case using QEMU? I hope that
some QEMU boot options and/or nvme format commands can prepare it.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH blktests] block tests: nvme metadata passthrough
  2025-06-07 12:54 ` Shinichiro Kawasaki
  2025-06-08 19:55   ` Anuj gupta
@ 2025-06-09 15:04   ` Keith Busch
  1 sibling, 0 replies; 7+ messages in thread
From: Keith Busch @ 2025-06-09 15:04 UTC (permalink / raw)
  To: Shinichiro Kawasaki
  Cc: Keith Busch, linux-block@vger.kernel.org,
	linux-nvme@lists.infradead.org, axboe@kernel.dk

On Sat, Jun 07, 2025 at 12:54:50PM +0000, Shinichiro Kawasaki wrote:
> On Jun 05, 2025 / 17:30, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> > 
> > Get more coverage on nvme metadata passthrough. Specifically in this
> > test, read-only metadata is targeted as this had been a gap in previous
> > test coveraged.
> 
> Thanks for the patch. I ran the test case on the kernel v6.15, and it passed.
> Is this pass result expected? I guess this test case intends to extend test
> coverage, and does not intend to recreate the failure reported in the Link.
> So, I'm guessing the test case should pass with v6.15 kernel.

Oh, I thought it would fail in 6.15, or at least part of this test would
have. I changed a lot at the last second before posting, so maybe I
messed something up. I'll do another version after making sense of
what's going on.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH blktests] block tests: nvme metadata passthrough
  2025-06-06  1:41 ` Anuj gupta
@ 2025-06-09 15:24   ` Keith Busch
  0 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2025-06-09 15:24 UTC (permalink / raw)
  To: Anuj gupta
  Cc: Keith Busch, linux-block, linux-nvme, shinichiro.kawasaki, axboe

On Fri, Jun 06, 2025 at 07:11:20AM +0530, Anuj gupta wrote:
> Minor nit: should use `.nsid = nsid` instead of hardcoded `1`
> 
> > +       /* This should not be mappable for write commands */
> 
> Maybe reword this to:
> /* This buffer is read-only, so using it for write passthrough should fail */
> -- makes the intent clearer.

Err, I actually got this backwards. PROT_READ means we can write to the
disk from that memory, but we can't read from the disk into it.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-06-09 15:58 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-06  0:30 [PATCH blktests] block tests: nvme metadata passthrough Keith Busch
2025-06-06  1:41 ` Anuj gupta
2025-06-09 15:24   ` Keith Busch
2025-06-07 12:54 ` Shinichiro Kawasaki
2025-06-08 19:55   ` Anuj gupta
2025-06-09  7:40     ` Shinichiro Kawasaki
2025-06-09 15:04   ` Keith Busch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).