* [PATCH blktests] block tests: nvme metadata passthrough
@ 2025-06-06 0:30 Keith Busch
2025-06-06 1:41 ` Anuj gupta
2025-06-07 12:54 ` Shinichiro Kawasaki
0 siblings, 2 replies; 7+ messages in thread
From: Keith Busch @ 2025-06-06 0:30 UTC (permalink / raw)
To: linux-block, linux-nvme, shinichiro.kawasaki; +Cc: axboe, Keith Busch
From: Keith Busch <kbusch@kernel.org>
Get more coverage on nvme metadata passthrough. Specifically in this
test, read-only metadata is targeted as this had been a gap in previous
test coveraged.
Link: https://lore.kernel.org/linux-block/20250603184752.1185676-1-csander@purestorage.com/
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
src/Makefile | 1 +
src/nvme-passthrough-meta.c | 219 ++++++++++++++++++++++++++++++++++++
tests/nvme/064 | 22 ++++
tests/nvme/064.out | 2 +
4 files changed, 244 insertions(+)
create mode 100644 src/nvme-passthrough-meta.c
create mode 100755 tests/nvme/064
create mode 100644 tests/nvme/064.out
diff --git a/src/Makefile b/src/Makefile
index a94e5f2..f91ac62 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -13,6 +13,7 @@ C_TARGETS := \
loop_change_fd \
loop_get_status_null \
mount_clear_sock \
+ nvme-passthrough-meta \
nbdsetsize \
openclose \
sg/dxfer-from-dev \
diff --git a/src/nvme-passthrough-meta.c b/src/nvme-passthrough-meta.c
new file mode 100644
index 0000000..a8a5b1b
--- /dev/null
+++ b/src/nvme-passthrough-meta.c
@@ -0,0 +1,219 @@
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+
+#ifndef _LINUX_NVME_IOCTL_H
+#define _LINUX_NVME_IOCTL_H
+struct nvme_passthru_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u32 nsid;
+ __u32 cdw2;
+ __u32 cdw3;
+ __u64 metadata;
+ __u64 addr;
+ __u32 metadata_len;
+ __u32 data_len;
+ __u32 cdw10;
+ __u32 cdw11;
+ __u32 cdw12;
+ __u32 cdw13;
+ __u32 cdw14;
+ __u32 cdw15;
+ __u32 timeout_ms;
+ __u32 result;
+};
+
+#define NVME_IOCTL_ID _IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_passthru_cmd)
+#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
+
+struct nvme_lbaf {
+ __le16 ms;
+ __u8 ds;
+ __u8 rp;
+};
+
+struct nvme_id_ns {
+ __le64 nsze;
+ __le64 ncap;
+ __le64 nuse;
+ __u8 nsfeat;
+ __u8 nlbaf;
+ __u8 flbas;
+ __u8 mc;
+ __u8 dpc;
+ __u8 dps;
+ __u8 nmic;
+ __u8 rescap;
+ __u8 fpi;
+ __u8 dlfeat;
+ __le16 nawun;
+ __le16 nawupf;
+ __le16 nacwu;
+ __le16 nabsn;
+ __le16 nabo;
+ __le16 nabspf;
+ __le16 noiob;
+ __u8 nvmcap[16];
+ __le16 npwg;
+ __le16 npwa;
+ __le16 npdg;
+ __le16 npda;
+ __le16 nows;
+ __u8 rsvd74[18];
+ __le32 anagrpid;
+ __u8 rsvd96[3];
+ __u8 nsattr;
+ __le16 nvmsetid;
+ __le16 endgid;
+ __u8 nguid[16];
+ __u8 eui64[8];
+ struct nvme_lbaf lbaf[64];
+ __u8 vs[3712];
+};
+
+#define BUFFER_SIZE (32768)
+
+int main(int argc, char **argv)
+{
+ int ret, fd, nsid, blocks, meta_buffer_size;
+ void *buffer, *mptr = NULL, *meta = NULL;
+ struct nvme_passthru_cmd cmd;
+ struct nvme_lbaf lbaf;
+ struct nvme_id_ns ns;
+
+ __u64 block_size;
+ __u16 meta_size;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s /dev/nvmeXnY", argv[0]);
+ return EINVAL;
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ nsid = ioctl(fd, NVME_IOCTL_ID);
+ if (nsid < 0) {
+ perror("namespace id");
+ return errno;
+ }
+
+ cmd = (struct nvme_passthru_cmd) {
+ .opcode = 0x6,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t)&ns,
+ .data_len = 4096,
+ };
+
+ ret = ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
+ if (ret < 0) {
+ perror("id-ns");
+ return errno;
+ }
+
+ lbaf = ns.lbaf[ns.flbas & 0xf];
+ block_size = 1 << lbaf.ds;
+ meta_size = lbaf.ms;
+
+ /* format not appropriate for this test */
+ if (meta_size == 0)
+ return 0;
+
+ blocks = BUFFER_SIZE / block_size;
+ meta_buffer_size = blocks * meta_size;
+
+ buffer = malloc(BUFFER_SIZE);
+ mptr = mmap(NULL, 8192, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (mptr == MAP_FAILED) {
+ perror("mmap");
+ return errno;
+ }
+
+ /* this should directly use the user space buffer */
+ meta = mptr;
+ cmd = (struct nvme_passthru_cmd) {
+ .opcode = 1,
+ .nsid = 1,
+ .addr = (uintptr_t)buffer,
+ .metadata = (uintptr_t)meta,
+ .data_len = BUFFER_SIZE,
+ .metadata_len = meta_buffer_size,
+ .cdw12 = blocks - 1,
+ };
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-write");
+ return ret;
+ }
+
+ cmd.opcode = 2;
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-read");
+ return ret;
+ }
+
+ /*
+ * this offset should either force a kernel copy if we don't have
+ * contiguous pages, or test the device's metadata sgls
+ */
+ meta = mptr + 4096 - 16;
+ cmd.opcode = 1;
+ cmd.metadata = (uintptr_t)meta;
+
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-write");
+ return errno;
+ }
+
+ cmd.opcode = 2;
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-read");
+ return errno;
+ }
+
+ /* This should not be mappable for write commands */
+ mptr = mmap(NULL, 8192, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (mptr == MAP_FAILED) {
+ perror("mmap");
+ return errno;
+ }
+
+ meta = mptr;
+
+ cmd.opcode = 1;
+ cmd.metadata = (uintptr_t)meta;
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret == 0) {
+ perror("nvme-write (expect Failure)");
+ return EFAULT;
+ }
+
+ cmd.opcode = 2;
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-read");
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/tests/nvme/064 b/tests/nvme/064
new file mode 100755
index 0000000..ed9c565
--- /dev/null
+++ b/tests/nvme/064
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2025 Keith Busch <kbusch@kernel.org>
+#
+# Test out metadata through the passthrough interfaces
+
+. tests/nvme/rc
+
+requires() {
+ _nvme_requires
+}
+
+DESCRIPTION="exercise the nvme metadata usage with passthrough commands"
+QUICK=1
+
+test() {
+ echo "Running ${TEST_NAME}"
+
+ src/nvme-passthrough-meta ${TEST_DEV}
+
+ echo "Test complete"
+}
diff --git a/tests/nvme/064.out b/tests/nvme/064.out
new file mode 100644
index 0000000..5b34d4e
--- /dev/null
+++ b/tests/nvme/064.out
@@ -0,0 +1,2 @@
+Running nvme/064
+Test complete
--
2.47.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH blktests] block tests: nvme metadata passthrough
2025-06-06 0:30 [PATCH blktests] block tests: nvme metadata passthrough Keith Busch
@ 2025-06-06 1:41 ` Anuj gupta
2025-06-09 15:24 ` Keith Busch
2025-06-07 12:54 ` Shinichiro Kawasaki
1 sibling, 1 reply; 7+ messages in thread
From: Anuj gupta @ 2025-06-06 1:41 UTC (permalink / raw)
To: Keith Busch
Cc: linux-block, linux-nvme, shinichiro.kawasaki, axboe, Keith Busch
> + /* this should directly use the user space buffer */
> + meta = mptr;
> + cmd = (struct nvme_passthru_cmd) {
> + .opcode = 1,
> + .nsid = 1,
Minor nit: should use `.nsid = nsid` instead of hardcoded `1`
> + /* This should not be mappable for write commands */
Maybe reword this to:
/* This buffer is read-only, so using it for write passthrough should fail */
-- makes the intent clearer.
--
Anuj Gupta
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH blktests] block tests: nvme metadata passthrough
2025-06-06 0:30 [PATCH blktests] block tests: nvme metadata passthrough Keith Busch
2025-06-06 1:41 ` Anuj gupta
@ 2025-06-07 12:54 ` Shinichiro Kawasaki
2025-06-08 19:55 ` Anuj gupta
2025-06-09 15:04 ` Keith Busch
1 sibling, 2 replies; 7+ messages in thread
From: Shinichiro Kawasaki @ 2025-06-07 12:54 UTC (permalink / raw)
To: Keith Busch
Cc: linux-block@vger.kernel.org, linux-nvme@lists.infradead.org,
axboe@kernel.dk, Keith Busch
On Jun 05, 2025 / 17:30, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> Get more coverage on nvme metadata passthrough. Specifically in this
> test, read-only metadata is targeted as this had been a gap in previous
> test coveraged.
Thanks for the patch. I ran the test case on the kernel v6.15, and it passed.
Is this pass result expected? I guess this test case intends to extend test
coverage, and does not intend to recreate the failure reported in the Link.
So, I'm guessing the test case should pass with v6.15 kernel.
Also, please find a few comments in line.
...
> diff --git a/tests/nvme/064 b/tests/nvme/064
> new file mode 100755
> index 0000000..ed9c565
> --- /dev/null
> +++ b/tests/nvme/064
> @@ -0,0 +1,22 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-3.0+
> +# Copyright (C) 2025 Keith Busch <kbusch@kernel.org>
> +#
> +# Test out metadata through the passthrough interfaces
> +
> +. tests/nvme/rc
> +
> +requires() {
> + _nvme_requires
> +}
> +
> +DESCRIPTION="exercise the nvme metadata usage with passthrough commands"
> +QUICK=1
> +
> +test() {
This function name should be test_device() instead of test() to indicate that
it uses TEST_DEV. For test(), blktests provides empty TEST_DEV. So, actually
the src/nvme-passthrough-meta is not doing the test as expected.
> + echo "Running ${TEST_NAME}"
> +
> + src/nvme-passthrough-meta ${TEST_DEV}
I suggest to check status code of the command above, like:
if ! src/nvme-passthrough-meta ${TEST_DEV}; then
echo "src/nvme-passthrough-meta failed"
fi
This will catch the failure with empty TEST_DEV for test(). By renaming
test() to test_device(), src/nvme-passthrough-meta returns zero status.
Also, ${TEST_DEV} requires double quotations like "${TEST_DEV}", to suprress the
shellcheck WARN below:
$ make check
shellcheck -x -e SC2119 -f gcc check common/* \
tests/*/rc tests/*/[0-9]*[0-9] src/*.sh
tests/nvme/064:19:28: note: Double quote to prevent globbing and word splitting. [SC2086]
make: *** [Makefile:21: check] Error 1
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH blktests] block tests: nvme metadata passthrough
2025-06-07 12:54 ` Shinichiro Kawasaki
@ 2025-06-08 19:55 ` Anuj gupta
2025-06-09 7:40 ` Shinichiro Kawasaki
2025-06-09 15:04 ` Keith Busch
1 sibling, 1 reply; 7+ messages in thread
From: Anuj gupta @ 2025-06-08 19:55 UTC (permalink / raw)
To: Shinichiro Kawasaki
Cc: Keith Busch, linux-block@vger.kernel.org,
linux-nvme@lists.infradead.org, axboe@kernel.dk, Keith Busch
On Sat, Jun 7, 2025 at 6:25 PM Shinichiro Kawasaki
<shinichiro.kawasaki@wdc.com> wrote:
>
> On Jun 05, 2025 / 17:30, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> >
> > Get more coverage on nvme metadata passthrough. Specifically in this
> > test, read-only metadata is targeted as this had been a gap in previous
> > test coveraged.
>
> Thanks for the patch. I ran the test case on the kernel v6.15, and it passed.
> Is this pass result expected? I guess this test case intends to extend test
> coverage, and does not intend to recreate the failure reported in the Link.
> So, I'm guessing the test case should pass with v6.15 kernel.
As Keith mentioned in that thread [1], this test can catch the issue
only in the more unusual or corner-case configurations.
Additionally, if your NVMe device is not formatted with protection
information, the test will exit early and report success, as the
integrity path won't be exercised in that case.
[1] https://lore.kernel.org/linux-block/aD-J9mzq_bJe26rD@kbusch-mbp/
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH blktests] block tests: nvme metadata passthrough
2025-06-08 19:55 ` Anuj gupta
@ 2025-06-09 7:40 ` Shinichiro Kawasaki
0 siblings, 0 replies; 7+ messages in thread
From: Shinichiro Kawasaki @ 2025-06-09 7:40 UTC (permalink / raw)
To: Anuj gupta
Cc: Keith Busch, linux-block@vger.kernel.org,
linux-nvme@lists.infradead.org, axboe@kernel.dk, Keith Busch
On Jun 09, 2025 / 01:25, Anuj gupta wrote:
> On Sat, Jun 7, 2025 at 6:25 PM Shinichiro Kawasaki
> <shinichiro.kawasaki@wdc.com> wrote:
> >
> > On Jun 05, 2025 / 17:30, Keith Busch wrote:
> > > From: Keith Busch <kbusch@kernel.org>
> > >
> > > Get more coverage on nvme metadata passthrough. Specifically in this
> > > test, read-only metadata is targeted as this had been a gap in previous
> > > test coveraged.
> >
> > Thanks for the patch. I ran the test case on the kernel v6.15, and it passed.
> > Is this pass result expected? I guess this test case intends to extend test
> > coverage, and does not intend to recreate the failure reported in the Link.
> > So, I'm guessing the test case should pass with v6.15 kernel.
>
> As Keith mentioned in that thread [1], this test can catch the issue
> only in the more unusual or corner-case configurations.
>
> Additionally, if your NVMe device is not formatted with protection
> information, the test will exit early and report success, as the
> integrity path won't be exercised in that case.
>
> [1] https://lore.kernel.org/linux-block/aD-J9mzq_bJe26rD@kbusch-mbp/
I see, thanks for the explanation. Then I wish the expected device conditions to
be described in the test case comment. Even if blktests can not prepare devices
with the condition automatically, I think it's worth documenting.
In daily blktests run, I use a normal QEMU nvme device, but running the test
case with this device does not extend the test coverage much, probably.
Can we prepare the best device for the new test case using QEMU? I hope that
some QEMU boot options and/or nvme format commands can prepare it.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH blktests] block tests: nvme metadata passthrough
2025-06-07 12:54 ` Shinichiro Kawasaki
2025-06-08 19:55 ` Anuj gupta
@ 2025-06-09 15:04 ` Keith Busch
1 sibling, 0 replies; 7+ messages in thread
From: Keith Busch @ 2025-06-09 15:04 UTC (permalink / raw)
To: Shinichiro Kawasaki
Cc: Keith Busch, linux-block@vger.kernel.org,
linux-nvme@lists.infradead.org, axboe@kernel.dk
On Sat, Jun 07, 2025 at 12:54:50PM +0000, Shinichiro Kawasaki wrote:
> On Jun 05, 2025 / 17:30, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> >
> > Get more coverage on nvme metadata passthrough. Specifically in this
> > test, read-only metadata is targeted as this had been a gap in previous
> > test coveraged.
>
> Thanks for the patch. I ran the test case on the kernel v6.15, and it passed.
> Is this pass result expected? I guess this test case intends to extend test
> coverage, and does not intend to recreate the failure reported in the Link.
> So, I'm guessing the test case should pass with v6.15 kernel.
Oh, I thought it would fail in 6.15, or at least part of this test would
have. I changed a lot at the last second before posting, so maybe I
messed something up. I'll do another version after making sense of
what's going on.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH blktests] block tests: nvme metadata passthrough
2025-06-06 1:41 ` Anuj gupta
@ 2025-06-09 15:24 ` Keith Busch
0 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2025-06-09 15:24 UTC (permalink / raw)
To: Anuj gupta
Cc: Keith Busch, linux-block, linux-nvme, shinichiro.kawasaki, axboe
On Fri, Jun 06, 2025 at 07:11:20AM +0530, Anuj gupta wrote:
> Minor nit: should use `.nsid = nsid` instead of hardcoded `1`
>
> > + /* This should not be mappable for write commands */
>
> Maybe reword this to:
> /* This buffer is read-only, so using it for write passthrough should fail */
> -- makes the intent clearer.
Err, I actually got this backwards. PROT_READ means we can write to the
disk from that memory, but we can't read from the disk into it.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-06-09 15:58 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-06 0:30 [PATCH blktests] block tests: nvme metadata passthrough Keith Busch
2025-06-06 1:41 ` Anuj gupta
2025-06-09 15:24 ` Keith Busch
2025-06-07 12:54 ` Shinichiro Kawasaki
2025-06-08 19:55 ` Anuj gupta
2025-06-09 7:40 ` Shinichiro Kawasaki
2025-06-09 15:04 ` Keith Busch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).