* [PATCHv2] block tests: nvme metadata passthrough
@ 2025-06-09 15:41 Keith Busch
2025-06-10 7:31 ` Shinichiro Kawasaki
0 siblings, 1 reply; 4+ messages in thread
From: Keith Busch @ 2025-06-09 15:41 UTC (permalink / raw)
To: linux-block, linux-nvme, shinichiro.kawasaki; +Cc: axboe, Keith Busch
From: Keith Busch <kbusch@kernel.org>
Get more coverage on nvme metadata passthrough. Specifically in this
test, read-only metadata is targeted as this had been a gap in previous
test coveraged.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
This one should fail on 6.15 and pass on 6.16.
v1->v2:
Correctly used the "test_device()" function name instead of "test()".
Use the NSID value we got from the device instead of assuming 1.
Fixed the logic for which command is expected to fail when given
read-only memory.
src/Makefile | 1 +
src/nvme-passthrough-meta.c | 230 ++++++++++++++++++++++++++++++++++++
tests/nvme/064 | 24 ++++
tests/nvme/064.out | 2 +
4 files changed, 257 insertions(+)
create mode 100644 src/nvme-passthrough-meta.c
create mode 100755 tests/nvme/064
create mode 100644 tests/nvme/064.out
diff --git a/src/Makefile b/src/Makefile
index a94e5f2..f91ac62 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -13,6 +13,7 @@ C_TARGETS := \
loop_change_fd \
loop_get_status_null \
mount_clear_sock \
+ nvme-passthrough-meta \
nbdsetsize \
openclose \
sg/dxfer-from-dev \
diff --git a/src/nvme-passthrough-meta.c b/src/nvme-passthrough-meta.c
new file mode 100644
index 0000000..d19ee25
--- /dev/null
+++ b/src/nvme-passthrough-meta.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-3.0+
+// Copyright (C) 2025 Keith Busch
+
+/*
+ * Simple test exercising the user metadata interfaces used by nvme passthrough
+ * commands.
+ */
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+
+#ifndef _LINUX_NVME_IOCTL_H
+#define _LINUX_NVME_IOCTL_H
+struct nvme_passthru_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u32 nsid;
+ __u32 cdw2;
+ __u32 cdw3;
+ __u64 metadata;
+ __u64 addr;
+ __u32 metadata_len;
+ __u32 data_len;
+ __u32 cdw10;
+ __u32 cdw11;
+ __u32 cdw12;
+ __u32 cdw13;
+ __u32 cdw14;
+ __u32 cdw15;
+ __u32 timeout_ms;
+ __u32 result;
+};
+
+#define NVME_IOCTL_ID _IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_passthru_cmd)
+#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
+
+struct nvme_lbaf {
+ __le16 ms;
+ __u8 ds;
+ __u8 rp;
+};
+
+struct nvme_id_ns {
+ __le64 nsze;
+ __le64 ncap;
+ __le64 nuse;
+ __u8 nsfeat;
+ __u8 nlbaf;
+ __u8 flbas;
+ __u8 mc;
+ __u8 dpc;
+ __u8 dps;
+ __u8 nmic;
+ __u8 rescap;
+ __u8 fpi;
+ __u8 dlfeat;
+ __le16 nawun;
+ __le16 nawupf;
+ __le16 nacwu;
+ __le16 nabsn;
+ __le16 nabo;
+ __le16 nabspf;
+ __le16 noiob;
+ __u8 nvmcap[16];
+ __le16 npwg;
+ __le16 npwa;
+ __le16 npdg;
+ __le16 npda;
+ __le16 nows;
+ __u8 rsvd74[18];
+ __le32 anagrpid;
+ __u8 rsvd96[3];
+ __u8 nsattr;
+ __le16 nvmsetid;
+ __le16 endgid;
+ __u8 nguid[16];
+ __u8 eui64[8];
+ struct nvme_lbaf lbaf[64];
+ __u8 vs[3712];
+};
+
+#define BUFFER_SIZE (32768)
+
+int main(int argc, char **argv)
+{
+ int ret, fd, nsid, blocks, meta_buffer_size;
+ void *buffer, *mptr = NULL, *meta = NULL;
+ struct nvme_passthru_cmd cmd;
+ struct nvme_lbaf lbaf;
+ struct nvme_id_ns ns;
+
+ __u64 block_size;
+ __u16 meta_size;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s /dev/nvmeXnY", argv[0]);
+ return EINVAL;
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ nsid = ioctl(fd, NVME_IOCTL_ID);
+ if (nsid < 0) {
+ perror("namespace id");
+ return errno;
+ }
+
+ cmd = (struct nvme_passthru_cmd) {
+ .opcode = 0x6,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t)&ns,
+ .data_len = sizeof(ns),
+ };
+
+ ret = ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
+ if (ret < 0) {
+ perror("id-ns");
+ return errno;
+ }
+
+ lbaf = ns.lbaf[ns.flbas & 0xf];
+ block_size = 1 << lbaf.ds;
+ meta_size = lbaf.ms;
+
+ /* format not appropriate for this test */
+ if (meta_size == 0)
+ return 0;
+
+ blocks = BUFFER_SIZE / block_size;
+ meta_buffer_size = blocks * meta_size;
+
+ buffer = malloc(BUFFER_SIZE);
+ mptr = mmap(NULL, 8192, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (mptr == MAP_FAILED) {
+ perror("mmap");
+ return errno;
+ }
+
+ /* this should directly use the user space buffer */
+ meta = mptr;
+ cmd = (struct nvme_passthru_cmd) {
+ .opcode = 1,
+ .nsid = nsid,
+ .addr = (uintptr_t)buffer,
+ .metadata = (uintptr_t)meta,
+ .data_len = BUFFER_SIZE,
+ .metadata_len = meta_buffer_size,
+ .cdw12 = blocks - 1,
+ };
+
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-write");
+ return ret;
+ }
+
+ cmd.opcode = 2;
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-read");
+ return ret;
+ }
+
+ /*
+ * this offset should either force a kernel copy if we don't have
+ * contiguous pages, or test the device's metadata sgls
+ */
+ meta = mptr + 4096 - 16;
+ cmd.opcode = 1;
+ cmd.metadata = (uintptr_t)meta;
+
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-write (offset)");
+ return errno;
+ }
+
+ cmd.opcode = 2;
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-read (offset)");
+ return errno;
+ }
+
+ /*
+ * This buffer is read-only, so should not be successful with commands
+ * where it is the destination (reads)
+ */
+ mptr = mmap(NULL, 8192, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (mptr == MAP_FAILED) {
+ perror("mmap");
+ return errno;
+ }
+
+ meta = mptr;
+
+ cmd.opcode = 1;
+ cmd.metadata = (uintptr_t)meta;
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret < 0) {
+ perror("nvme-write (prot_read)");
+ return ret;
+ }
+
+ cmd.opcode = 2;
+ ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+ if (ret == 0) {
+ perror("nvme-read (expect Failure)");
+ return EFAULT;
+ }
+
+ return 0;
+}
diff --git a/tests/nvme/064 b/tests/nvme/064
new file mode 100755
index 0000000..fd72d4a
--- /dev/null
+++ b/tests/nvme/064
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2025 Keith Busch <kbusch@kernel.org>
+#
+# Test out metadata through the passthrough interfaces
+
+. tests/nvme/rc
+
+requires() {
+ _nvme_requires
+}
+
+DESCRIPTION="exercise the nvme metadata usage with passthrough commands"
+QUICK=1
+
+test_device() {
+ echo "Running ${TEST_NAME}"
+
+ if src/nvme-passthrough-meta "${TEST_DEV}"; then
+ echo "src/nvme-passthrough-meta failed"
+ fi
+
+ echo "Test complete"
+}
diff --git a/tests/nvme/064.out b/tests/nvme/064.out
new file mode 100644
index 0000000..5b34d4e
--- /dev/null
+++ b/tests/nvme/064.out
@@ -0,0 +1,2 @@
+Running nvme/064
+Test complete
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCHv2] block tests: nvme metadata passthrough
2025-06-09 15:41 [PATCHv2] block tests: nvme metadata passthrough Keith Busch
@ 2025-06-10 7:31 ` Shinichiro Kawasaki
2025-06-10 15:51 ` Keith Busch
0 siblings, 1 reply; 4+ messages in thread
From: Shinichiro Kawasaki @ 2025-06-10 7:31 UTC (permalink / raw)
To: Keith Busch
Cc: linux-block@vger.kernel.org, linux-nvme@lists.infradead.org,
axboe@kernel.dk, Keith Busch
On Jun 09, 2025 / 08:41, Keith Busch wrote:
...
> diff --git a/tests/nvme/064 b/tests/nvme/064
> new file mode 100755
> index 0000000..fd72d4a
> --- /dev/null
> +++ b/tests/nvme/064
...
> +test_device() {
> + echo "Running ${TEST_NAME}"
> +
> + if src/nvme-passthrough-meta "${TEST_DEV}"; then
I think the check logic above should be inverted:
if ! src/nvme-passthrough-meta "${TEST_DEV}"; then
Thanks for this v2. With the fix above, I was able to confirme that the test
case passes with v6.16-rc1 kernel. When I reverted the kernel commit below,
it failed. It looks working good as the fix confirmation.
43a67dd812c5 ("block: flip iter directions in blk_rq_integrity_map_user()")
To run the test case, I tried QEMU nvme emulation devices with some different
options. I found that the namespace should have format with metadata, and
extended LBA should be disabled. IOW, QEMU -drive option should have value
"pi=1,pil=1,ms=8" for the namespace.
I suggest to describe the device requirements in the test case comment. Also, I
suggest to check the requirements for the test case, and skip if the
requirements are not fulfilled. FYI, I prototyped such change as the patch
below. Please let me know what your think. If you are okay with it, I will
repost your patch together with my patch for common/rc and tests/nvme/rc as the
v3 series.
diff --git a/common/rc b/common/rc
index ff3f0a3..7155233 100644
--- a/common/rc
+++ b/common/rc
@@ -422,6 +422,14 @@ _test_dev_is_partition() {
[[ -n ${TEST_DEV_PART_SYSFS} ]]
}
+_test_dev_has_metadata() {
+ if (( ! $(<"${TEST_DEV_SYSFS}/metadata_bytes") )); then
+ SKIP_REASONS+=("$TEST_DEV does not have metadata")
+ return 1
+ fi
+ return 0
+}
+
_min_io() {
local path_or_dev=$1
local min_io
diff --git a/src/nvme-passthrough-meta.c b/src/nvme-passthrough-meta.c
index d19ee25..29980a2 100644
--- a/src/nvme-passthrough-meta.c
+++ b/src/nvme-passthrough-meta.c
@@ -139,8 +139,10 @@ int main(int argc, char **argv)
meta_size = lbaf.ms;
/* format not appropriate for this test */
- if (meta_size == 0)
- return 0;
+ if (meta_size == 0) {
+ fprintf(stderr, "Device format does not have metadata\n");
+ return -EINVAL;
+ }
blocks = BUFFER_SIZE / block_size;
meta_buffer_size = blocks * meta_size;
diff --git a/tests/nvme/064 b/tests/nvme/064
index fd72d4a..eb9ca50 100755
--- a/tests/nvme/064
+++ b/tests/nvme/064
@@ -2,7 +2,12 @@
# SPDX-License-Identifier: GPL-3.0+
# Copyright (C) 2025 Keith Busch <kbusch@kernel.org>
#
-# Test out metadata through the passthrough interfaces
+# Test out metadata through the passthrough interfaces. This test confirms the
+# fix by the kernel commit 43a67dd812c5 ("block: flip iter directions in
+# blk_rq_integrity_map_user()"). This test requires TEST_DEV as a namespace
+# formatted with metadata, and extended LBA disabled. Such namespace can be
+# prepared with QEMU NVME emulation specifying -device option with
+# "pi=1,pil=1,ms=8" values.
. tests/nvme/rc
@@ -10,13 +15,18 @@ requires() {
_nvme_requires
}
+device_requires() {
+ _test_dev_has_metadata
+ _require_test_dev_extended_lba_off
+}
+
DESCRIPTION="exercise the nvme metadata usage with passthrough commands"
QUICK=1
test_device() {
echo "Running ${TEST_NAME}"
- if src/nvme-passthrough-meta "${TEST_DEV}"; then
+ if ! src/nvme-passthrough-meta "${TEST_DEV}"; then
echo "src/nvme-passthrough-meta failed"
fi
diff --git a/tests/nvme/rc b/tests/nvme/rc
index 9dbd1ce..f69ceb9 100644
--- a/tests/nvme/rc
+++ b/tests/nvme/rc
@@ -153,6 +153,21 @@ _require_test_dev_is_not_nvme_multipath() {
return 0
}
+_require_test_dev_extended_lba_off() {
+ local flbas
+
+ if ! flbas=$(nvme id-ns "$TEST_DEV" | grep flbas | \
+ sed --quiet 's/.*: \(.*\)/\1/p'); then
+ SKIP_REASONS+=("$TEST_DEV does not have namespace flbas field")
+ return 1
+ fi
+ if (( ${flbas} & 0x10 )); then
+ SKIP_REASONS+=("$TEST_DEV has NVME_NS_FLBAS_META_EXT on")
+ return 1
+ fi
+ return 0
+}
+
_require_nvme_test_img_size() {
local require_sz_mb
local nvme_img_size_mb
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCHv2] block tests: nvme metadata passthrough
2025-06-10 7:31 ` Shinichiro Kawasaki
@ 2025-06-10 15:51 ` Keith Busch
2025-06-11 11:05 ` Shinichiro Kawasaki
0 siblings, 1 reply; 4+ messages in thread
From: Keith Busch @ 2025-06-10 15:51 UTC (permalink / raw)
To: Shinichiro Kawasaki
Cc: Keith Busch, linux-block@vger.kernel.org,
linux-nvme@lists.infradead.org, axboe@kernel.dk
On Tue, Jun 10, 2025 at 07:31:10AM +0000, Shinichiro Kawasaki wrote:
> Thanks for this v2. With the fix above, I was able to confirme that the test
> case passes with v6.16-rc1 kernel. When I reverted the kernel commit below,
> it failed. It looks working good as the fix confirmation.
>
> 43a67dd812c5 ("block: flip iter directions in blk_rq_integrity_map_user()")
We should probably put a "Link:" tag in the commit message for this:
https://lore.kernel.org/linux-block/20250603184752.1185676-1-csander@purestorage.com/
> To run the test case, I tried QEMU nvme emulation devices with some different
> options. I found that the namespace should have format with metadata, and
> extended LBA should be disabled. IOW, QEMU -drive option should have value
> "pi=1,pil=1,ms=8" for the namespace.
That's fine, though you don't need to set protection information
capabilities for this. The test will still run if you enable it, but
it's probably better if you just let it be opaque metadata. You can also
test with ms=16 or ms=64 as both are supported by qemu's nvme device.
> I suggest to describe the device requirements in the test case comment. Also, I
> suggest to check the requirements for the test case, and skip if the
> requirements are not fulfilled. FYI, I prototyped such change as the patch
> below. Please let me know what your think. If you are okay with it, I will
> repost your patch together with my patch for common/rc and tests/nvme/rc as the
> v3 series.
Your changes look good. Thank you for the suggestions!
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCHv2] block tests: nvme metadata passthrough
2025-06-10 15:51 ` Keith Busch
@ 2025-06-11 11:05 ` Shinichiro Kawasaki
0 siblings, 0 replies; 4+ messages in thread
From: Shinichiro Kawasaki @ 2025-06-11 11:05 UTC (permalink / raw)
To: Keith Busch
Cc: Keith Busch, linux-block@vger.kernel.org,
linux-nvme@lists.infradead.org, axboe@kernel.dk
On Jun 10, 2025 / 09:51, Keith Busch wrote:
> On Tue, Jun 10, 2025 at 07:31:10AM +0000, Shinichiro Kawasaki wrote:
> > Thanks for this v2. With the fix above, I was able to confirme that the test
> > case passes with v6.16-rc1 kernel. When I reverted the kernel commit below,
> > it failed. It looks working good as the fix confirmation.
> >
> > 43a67dd812c5 ("block: flip iter directions in blk_rq_integrity_map_user()")
>
> We should probably put a "Link:" tag in the commit message for this:
>
> https://lore.kernel.org/linux-block/20250603184752.1185676-1-csander@purestorage.com/
>
> > To run the test case, I tried QEMU nvme emulation devices with some different
> > options. I found that the namespace should have format with metadata, and
> > extended LBA should be disabled. IOW, QEMU -drive option should have value
> > "pi=1,pil=1,ms=8" for the namespace.
>
> That's fine, though you don't need to set protection information
> capabilities for this. The test will still run if you enable it, but
> it's probably better if you just let it be opaque metadata. You can also
> test with ms=16 or ms=64 as both are supported by qemu's nvme device.
Ah, I see. I confirmed that the QEMU -drive option values "pi=1,pil=1" are not
required. Also, I confirmed that "ms=16" and "ms=64" work. Will reflect it to
the patch series to post.
>
> > I suggest to describe the device requirements in the test case comment. Also, I
> > suggest to check the requirements for the test case, and skip if the
> > requirements are not fulfilled. FYI, I prototyped such change as the patch
> > below. Please let me know what your think. If you are okay with it, I will
> > repost your patch together with my patch for common/rc and tests/nvme/rc as the
> > v3 series.
>
> Your changes look good. Thank you for the suggestions!
Sounds, good. Will post the series within a few days.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-06-11 11:05 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-09 15:41 [PATCHv2] block tests: nvme metadata passthrough Keith Busch
2025-06-10 7:31 ` Shinichiro Kawasaki
2025-06-10 15:51 ` Keith Busch
2025-06-11 11:05 ` Shinichiro Kawasaki
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox