* [PATCH blktests] io_uring user metadata offset test
@ 2025-11-07 23:18 Keith Busch
2025-11-08 2:09 ` Chaitanya Kulkarni
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Keith Busch @ 2025-11-07 23:18 UTC (permalink / raw)
To: linux-block, hch, shinichiro.kawasaki; +Cc: Keith Busch
From: Keith Busch <kbusch@kernel.org>
For devices with metadata, tests various userspace offsets with
io_uring capabilities. If the metadata is formatted with ref tag
protection information, test various seed offsets as well.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
src/.gitignore | 1 +
src/Makefile | 7 +-
src/metadata.c | 481 ++++++++++++++++++++++++++++++++++++++++++++
tests/block/043 | 27 +++
tests/block/043.out | 2 +
5 files changed, 515 insertions(+), 3 deletions(-)
create mode 100644 src/metadata.c
create mode 100755 tests/block/043
create mode 100644 tests/block/043.out
diff --git a/src/.gitignore b/src/.gitignore
index 865675c..e6c6c38 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -3,6 +3,7 @@
/loblksize
/loop_change_fd
/loop_get_status_null
+/metadata
/mount_clear_sock
/nbdsetsize
/openclose
diff --git a/src/Makefile b/src/Makefile
index 179a673..7146db0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -22,7 +22,8 @@ C_TARGETS := \
sg/syzkaller1 \
zbdioctl
-C_MINIUBLK := miniublk
+C_MINIUBLK := miniublk \
+ metadata
HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD)
HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1)
@@ -61,8 +62,8 @@ $(C_TARGETS): %: %.c
$(CXX_TARGETS): %: %.cpp
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^
-$(C_MINIUBLK): %: miniublk.c
- $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c \
+$(C_MINIUBLK): %: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ $^ \
$(MINIUBLK_LIBS)
.PHONY: all clean install
diff --git a/src/metadata.c b/src/metadata.c
new file mode 100644
index 0000000..4628299
--- /dev/null
+++ b/src/metadata.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Description: test userspace metadata
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <liburing.h>
+
+#ifndef IORING_RW_ATTR_FLAG_PI
+#define PI_URING_COMPAT
+#define IORING_RW_ATTR_FLAG_PI (1U << 0)
+/* PI attribute information */
+struct io_uring_attr_pi {
+ __u16 flags;
+ __u16 app_tag;
+ __u32 len;
+ __u64 addr;
+ __u64 seed;
+ __u64 rsvd;
+};
+#endif
+
+#ifndef FS_IOC_GETLBMD_CAP
+/* Protection info capability flags */
+#define LBMD_PI_CAP_INTEGRITY (1 << 0)
+#define LBMD_PI_CAP_REFTAG (1 << 1)
+
+/* Checksum types for Protection Information */
+#define LBMD_PI_CSUM_NONE 0
+#define LBMD_PI_CSUM_IP 1
+#define LBMD_PI_CSUM_CRC16_T10DIF 2
+#define LBMD_PI_CSUM_CRC64_NVME 4
+
+/*
+ * Logical block metadata capability descriptor
+ * If the device does not support metadata, all the fields will be zero.
+ * Applications must check lbmd_flags to determine whether metadata is
+ * supported or not.
+ */
+struct logical_block_metadata_cap {
+ /* Bitmask of logical block metadata capability flags */
+ __u32 lbmd_flags;
+ /*
+ * The amount of data described by each unit of logical block
+ * metadata
+ */
+ __u16 lbmd_interval;
+ /*
+ * Size in bytes of the logical block metadata associated with each
+ * interval
+ */
+ __u8 lbmd_size;
+ /*
+ * Size in bytes of the opaque block tag associated with each
+ * interval
+ */
+ __u8 lbmd_opaque_size;
+ /*
+ * Offset in bytes of the opaque block tag within the logical block
+ * metadata
+ */
+ __u8 lbmd_opaque_offset;
+ /* Size in bytes of the T10 PI tuple associated with each interval */
+ __u8 lbmd_pi_size;
+ /* Offset in bytes of T10 PI tuple within the logical block metadata */
+ __u8 lbmd_pi_offset;
+ /* T10 PI guard tag type */
+ __u8 lbmd_guard_tag_type;
+ /* Size in bytes of the T10 PI application tag */
+ __u8 lbmd_app_tag_size;
+ /* Size in bytes of the T10 PI reference tag */
+ __u8 lbmd_ref_tag_size;
+ /* Size in bytes of the T10 PI storage tag */
+ __u8 lbmd_storage_tag_size;
+ __u8 pad;
+};
+
+#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap)
+#endif /* FS_IOC_GETLBMD_CAP */
+
+#ifndef IO_INTEGRITY_CHK_GUARD
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+#endif /* IO_INTEGRITY_CHK_GUARD */
+
+/* This size should guarantee at least one split */
+#define DATA_SIZE (8 * 1024 * 1024)
+
+static unsigned short lba_size;
+static unsigned char metadata_size;
+static unsigned char pi_size;
+static unsigned char pi_offset;
+static bool reftag_enabled;
+
+static long pagesize;
+
+struct t10_pi_tuple {
+ __be16 guard_tag; /* Checksum */
+ __be16 app_tag; /* Opaque storage */
+ __be32 ref_tag; /* Target LBA or indirect LBA */
+};
+
+struct crc64_pi_tuple {
+ __be64 guard_tag;
+ __be16 app_tag;
+ __u8 ref_tag[6];
+};
+
+static int init_capabilities(int fd)
+{
+ struct logical_block_metadata_cap md_cap;
+ int ret;
+
+ ret = ioctl(fd, FS_IOC_GETLBMD_CAP, &md_cap);
+ if (ret < 0)
+ return ret;
+
+ lba_size = md_cap.lbmd_interval;
+ metadata_size = md_cap.lbmd_size;
+ pi_size = md_cap.lbmd_pi_size;
+ pi_offset = md_cap.lbmd_pi_offset;
+ reftag_enabled = md_cap.lbmd_flags & LBMD_PI_CAP_REFTAG;
+
+ pagesize = sysconf(_SC_PAGE_SIZE);
+ return 0;
+}
+
+static unsigned int swap(unsigned int value)
+{
+ return ((value >> 24) & 0x000000ff) |
+ ((value >> 8) & 0x0000ff00) |
+ ((value << 8) & 0x00ff0000) |
+ ((value << 24) & 0xff000000);
+}
+
+static inline void __put_unaligned_be48(const __u64 val, __u8 *p)
+{
+ *p++ = (val >> 40) & 0xff;
+ *p++ = (val >> 32) & 0xff;
+ *p++ = (val >> 24) & 0xff;
+ *p++ = (val >> 16) & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = val & 0xff;
+}
+
+static inline void put_unaligned_be48(const __u64 val, void *p)
+{
+ __put_unaligned_be48(val, p);
+}
+
+static inline __u64 __get_unaligned_be48(const __u8 *p)
+{
+ return (__u64)p[0] << 40 | (__u64)p[1] << 32 | (__u64)p[2] << 24 |
+ p[3] << 16 | p[4] << 8 | p[5];
+}
+
+static inline __u64 get_unaligned_be48(const void *p)
+{
+ return __get_unaligned_be48(p);
+}
+
+static void init_metadata(void *p, int intervals, int ref)
+{
+ int i, j;
+
+ for (i = 0; i < intervals; i++, ref++) {
+ int remaining = metadata_size - pi_offset;
+ unsigned char *m = p;
+
+ for (j = 0; j < pi_offset; j++)
+ m[j] = (unsigned char)(ref + j + i);
+
+ p += pi_offset;
+ if (reftag_enabled) {
+ if (pi_size == 8) {
+ struct t10_pi_tuple *tuple = p;
+
+ tuple->ref_tag = swap(ref);
+ remaining -= sizeof(*tuple);
+ p += sizeof(*tuple);
+ } else if (pi_size == 16) {
+ struct crc64_pi_tuple *tuple = p;
+
+ __put_unaligned_be48(ref, tuple->ref_tag);
+ remaining -= sizeof(*tuple);
+ p += sizeof(*tuple);
+ }
+ }
+
+ m = p;
+ for (j = 0; j < remaining; j++)
+ m[j] = (unsigned char)~(ref + j + i);
+
+ p += remaining;
+ }
+}
+
+static int check_metadata(void *p, int intervals, int ref)
+{
+ int i, j;
+
+ for (i = 0; i < intervals; i++, ref++) {
+ int remaining = metadata_size - pi_offset;
+ unsigned char *m = p;
+
+ for (j = 0; j < pi_offset; j++) {
+ if (m[j] != (unsigned char)(ref + j + i)) {
+ fprintf(stderr, "(pre)interval:%d byte:%d expected:%x got:%x\n",
+ i, j, (unsigned char)(ref + j + i), m[j]);
+ return -1;
+ }
+ }
+
+ p += pi_offset;
+ if (reftag_enabled) {
+ if (pi_size == 8) {
+ struct t10_pi_tuple *tuple = p;
+
+ if (swap(tuple->ref_tag) != ref) {
+ fprintf(stderr, "reftag interval:%d expected:%x got:%x\n",
+ i, ref, swap(tuple->ref_tag));
+ return -1;
+ }
+
+ remaining -= sizeof(*tuple);
+ p += sizeof(*tuple);
+ } else if (pi_size == 16) {
+ struct crc64_pi_tuple *tuple = p;
+ __u64 v = get_unaligned_be48(tuple->ref_tag);
+
+ if (v != ref) {
+ fprintf(stderr, "reftag interval:%d expected:%x got:%llx\n",
+ i, ref, v);
+ return -1;
+ }
+ remaining -= sizeof(*tuple);
+ p += sizeof(*tuple);
+ }
+ }
+
+ m = p;
+ for (j = 0; j < remaining; j++) {
+ if (m[j] != (unsigned char)~(ref + j + i)) {
+ fprintf(stderr, "(post)interval:%d byte:%d expected:%x got:%x\n",
+ i, j, (unsigned char)~(ref + j + i), m[j]);
+ return -1;
+ }
+ }
+
+ p += remaining;
+ }
+
+ return 0;
+}
+
+static int init_data(void *data, int offset)
+{
+ unsigned char *d = data;
+ int i;
+
+ for (i = 0; i < DATA_SIZE; i++)
+ d[i] = (unsigned char)(0xaa + offset + i);
+
+ return 0;
+}
+
+static int check_data(void *data, int offset)
+{
+ unsigned char *d = data;
+ int i;
+
+ for (i = 0; i < DATA_SIZE; i++)
+ if (d[i] != (unsigned char)(0xaa + offset + i))
+ return -1;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int fd, ret, offset, intervals, metabuffer_size, metabuffer_tx_size;
+ void *orig_data_buf, *orig_pi_buf, *data_buf;
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct io_uring ring;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <dev>\n", argv[0]);
+ return 1;
+ }
+
+ fd = open(argv[1], O_RDWR | O_DIRECT);
+ if (fd < 0) {
+ perror("Failed to open device with O_DIRECT");
+ return 1;
+ }
+
+ ret = init_capabilities(fd);
+ if (ret < 0)
+ return 1;
+ if (lba_size == 0 || metadata_size == 0)
+ return 1;
+
+ intervals = DATA_SIZE / lba_size;
+ metabuffer_tx_size = intervals * metadata_size;
+ metabuffer_size = metabuffer_tx_size * 2;
+
+ if (posix_memalign(&orig_data_buf, pagesize, DATA_SIZE)) {
+ perror("posix_memalign failed for data buffer");
+ ret = 1;
+ goto close;
+ }
+
+ if (posix_memalign(&orig_pi_buf, pagesize, metabuffer_size)) {
+ perror("posix_memalign failed for metadata buffer");
+ ret = 1;
+ goto free;
+ }
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret < 0) {
+ perror("io_uring_queue_init failed");
+ goto cleanup;
+ }
+
+ data_buf = orig_data_buf;
+ for (offset = 0; offset < 512; offset++) {
+ void *pi_buf = (char *)orig_pi_buf + offset * 4;
+ struct io_uring_attr_pi pi_attr = {
+ .addr = (__u64)pi_buf,
+ .seed = offset,
+ .len = metabuffer_tx_size,
+ };
+
+ if (reftag_enabled)
+ pi_attr.flags = IO_INTEGRITY_CHK_REFTAG;
+
+ init_data(data_buf, offset);
+ init_metadata(pi_buf, intervals, offset);
+
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ fprintf(stderr, "Failed to get SQE\n");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_prep_write(sqe, fd, data_buf, DATA_SIZE, offset * lba_size * 8);
+ io_uring_sqe_set_data(sqe, (void *)1L);
+
+#ifdef PI_URING_COMPAT
+ /* old liburing, use fields that overlap in the union */
+ sqe->__pad2[0] = IORING_RW_ATTR_FLAG_PI;
+ sqe->addr3 = (__u64)&pi_attr;
+#else
+ sqe->attr_type_mask = IORING_RW_ATTR_FLAG_PI;
+ sqe->attr_ptr = (__u64)&pi_attr;
+#endif
+
+ ret = io_uring_submit(&ring);
+ if (ret < 1) {
+ perror("io_uring_submit failed (WRITE)");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ perror("io_uring_wait_cqe failed (WRITE)");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ if (cqe->res < 0) {
+ fprintf(stderr, "write failed at offset %d: %s\n",
+ offset, strerror(-cqe->res));
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_cqe_seen(&ring, cqe);
+
+ memset(data_buf, 0, DATA_SIZE);
+ memset(pi_buf, 0, metabuffer_tx_size);
+
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ fprintf(stderr, "failed to get SQE\n");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_prep_read(sqe, fd, data_buf, DATA_SIZE, offset * lba_size * 8);
+ io_uring_sqe_set_data(sqe, (void *)2L);
+
+#ifdef PI_URING_COMPAT
+ sqe->__pad2[0] = IORING_RW_ATTR_FLAG_PI;
+ sqe->addr3 = (__u64)&pi_attr;
+#else
+ sqe->attr_type_mask = IORING_RW_ATTR_FLAG_PI;
+ sqe->attr_ptr = (__u64)&pi_attr;
+#endif
+
+ ret = io_uring_submit(&ring);
+ if (ret < 1) {
+ perror("io_uring_submit failed (read)");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ fprintf(stderr, "io_uring_wait_cqe failed (read): %s\n", strerror(-ret));
+ ret = 1;
+ goto ring_exit;
+ }
+
+ if (cqe->res < 0) {
+ fprintf(stderr, "read failed at offset %d: %s\n",
+ offset, strerror(-cqe->res));
+ ret = 1;
+ goto ring_exit;
+ }
+
+ ret = check_data(data_buf, offset);
+ if (ret) {
+ fprintf(stderr, "data corruption at offset %d\n",
+ offset);
+ ret = 1;
+ goto ring_exit;
+ }
+
+ ret = check_metadata(pi_buf, intervals, offset);
+ if (ret) {
+ fprintf(stderr, "metadata corruption at offset %d\n",
+ offset);
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_cqe_seen(&ring, cqe);
+ }
+
+ memset(data_buf, 0, DATA_SIZE);
+
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_write(sqe, fd, data_buf, DATA_SIZE, 0);
+ io_uring_sqe_set_data(sqe, (void *)1L);
+
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_write(sqe, fd, data_buf, DATA_SIZE, DATA_SIZE);
+ io_uring_sqe_set_data(sqe, (void *)2L);
+
+ io_uring_submit(&ring);
+
+ io_uring_wait_cqe(&ring, &cqe);
+ io_uring_cqe_seen(&ring, cqe);
+ io_uring_wait_cqe(&ring, &cqe);
+ io_uring_cqe_seen(&ring, cqe);
+ring_exit:
+ io_uring_queue_exit(&ring);
+cleanup:
+ free(orig_pi_buf);
+free:
+ free(orig_data_buf);
+close:
+ close(fd);
+ return ret;
+}
diff --git a/tests/block/043 b/tests/block/043
new file mode 100755
index 0000000..0e6a6cb
--- /dev/null
+++ b/tests/block/043
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. tests/block/rc
+
+DESCRIPTION="Test userspace metadataoffsets"
+QUICK=1
+
+device_requires() {
+ _test_dev_has_metadata
+ _test_dev_disables_extended_lba
+}
+
+requires() {
+ _have_kernel_option IO_URING
+ _have_kernel_option BLK_DEV_INTEGRITY
+}
+
+test_device() {
+ echo "Running ${TEST_NAME}"
+
+ if ! src/metadata ${TEST_DEV}; then
+ echo "src/dio-offsets failed"
+ fi
+
+ echo "Test complete"
+}
+
diff --git a/tests/block/043.out b/tests/block/043.out
new file mode 100644
index 0000000..fda7fca
--- /dev/null
+++ b/tests/block/043.out
@@ -0,0 +1,2 @@
+Running block/043
+Test complete
--
2.47.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH blktests] io_uring user metadata offset test
2025-11-07 23:18 [PATCH blktests] io_uring user metadata offset test Keith Busch
@ 2025-11-08 2:09 ` Chaitanya Kulkarni
2025-11-19 16:50 ` Keith Busch
2025-11-08 3:26 ` Chaitanya Kulkarni
2025-11-11 7:58 ` Shinichiro Kawasaki
2 siblings, 1 reply; 6+ messages in thread
From: Chaitanya Kulkarni @ 2025-11-08 2:09 UTC (permalink / raw)
To: Keith Busch
Cc: Keith Busch, linux-block@vger.kernel.org, hch@lst.de,
shinichiro.kawasaki@wdc.com
On 11/7/25 15:18, Keith Busch wrote:
> diff --git a/tests/block/043 b/tests/block/043
> new file mode 100755
> index 0000000..0e6a6cb
> --- /dev/null
> +++ b/tests/block/043
> @@ -0,0 +1,27 @@
> +#!/bin/bash
> +
> +. tests/block/rc
> +
> +DESCRIPTION="Test userspace metadataoffsets"
nit:- DESCRIPTION="Test userspace metadata offsets" ? OR
"verify io_uring PI metadata integrity across buffer offsets" ?
> +QUICK=1
> +
> +device_requires() {
> + _test_dev_has_metadata
> + _test_dev_disables_extended_lba
> +}
> +
> +requires() {
> + _have_kernel_option IO_URING
this requires a specific kernel version or higher for
IO_URING to have this metadata support ? OR any version
of kernel would work ?
> + _have_kernel_option BLK_DEV_INTEGRITY
looking at blktests, we need something like :-
diff --git a/common/rc b/common/rc
index 86bb991..545da61 100644
--- a/common/rc
+++ b/common/rc
@@ -226,6 +226,12 @@ _have_kernel_option() {
return 0
}
+_have_kernel_options() {
+ for opt in "$@"; do
+ _have_kernel_option "$opt" || return 1
+ done
+}
+
# Compare the version string in $1 in "a.b.c" format with "$2.$3.$4".
# If "a.b.c" is smaller than "$2.$3.$4", return true. Otherwise, return
# false.
not asking you to do anything, I'll send a patch and remove these
duplicates.
> +}
> +
> +test_device() {
> + echo "Running ${TEST_NAME}"
> +
> + if ! src/metadata ${TEST_DEV}; then
> + echo "src/dio-offsets failed"
nit:- echo "src/metadata failed" ?
> + fi
> +
> + echo "Test complete"
> +}
> +
> diff --git a/tests/block/043.out b/tests/block/043.out
> new file mode 100644
> index 0000000..fda7fca
> --- /dev/null
> +++ b/tests/block/043.out
> @@ -0,0 +1,2 @@
> +Running block/043
-ck
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH blktests] io_uring user metadata offset test
2025-11-07 23:18 [PATCH blktests] io_uring user metadata offset test Keith Busch
2025-11-08 2:09 ` Chaitanya Kulkarni
@ 2025-11-08 3:26 ` Chaitanya Kulkarni
2025-11-11 7:58 ` Shinichiro Kawasaki
2 siblings, 0 replies; 6+ messages in thread
From: Chaitanya Kulkarni @ 2025-11-08 3:26 UTC (permalink / raw)
To: Keith Busch, linux-block@vger.kernel.org, hch@lst.de,
shinichiro.kawasaki@wdc.com
Cc: Keith Busch
On 11/7/25 15:18, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> For devices with metadata, tests various userspace offsets with
> io_uring capabilities. If the metadata is formatted with ref tag
> protection information, test various seed offsets as well.
>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
> src/.gitignore | 1 +
> src/Makefile | 7 +-
> src/metadata.c | 481 ++++++++++++++++++++++++++++++++++++++++++++
> tests/block/043 | 27 +++
> tests/block/043.out | 2 +
> 5 files changed, 515 insertions(+), 3 deletions(-)
> create mode 100644 src/metadata.c
> create mode 100755 tests/block/043
> create mode 100644 tests/block/043.out
>
> diff --git a/src/.gitignore b/src/.gitignore
> index 865675c..e6c6c38 100644
> --- a/src/.gitignore
> +++ b/src/.gitignore
> @@ -3,6 +3,7 @@
> /loblksize
> /loop_change_fd
> /loop_get_status_null
> +/metadata
> /mount_clear_sock
> /nbdsetsize
> /openclose
> diff --git a/src/Makefile b/src/Makefile
> index 179a673..7146db0 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -22,7 +22,8 @@ C_TARGETS := \
> sg/syzkaller1 \
> zbdioctl
>
> -C_MINIUBLK := miniublk
> +C_MINIUBLK := miniublk \
> + metadata
>
> HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD)
> HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1)
> @@ -61,8 +62,8 @@ $(C_TARGETS): %: %.c
> $(CXX_TARGETS): %: %.cpp
> $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^
>
> -$(C_MINIUBLK): %: miniublk.c
> - $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c \
> +$(C_MINIUBLK): %: %.c
> + $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ $^ \
> $(MINIUBLK_LIBS)
>
> .PHONY: all clean install
> diff --git a/src/metadata.c b/src/metadata.c
> new file mode 100644
> index 0000000..4628299
> --- /dev/null
> +++ b/src/metadata.c
> @@ -0,0 +1,481 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Description: test userspace metadata
> + */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
> +#include <liburing.h>
> +
> +#ifndef IORING_RW_ATTR_FLAG_PI
> +#define PI_URING_COMPAT
> +#define IORING_RW_ATTR_FLAG_PI (1U << 0)
> +/* PI attribute information */
> +struct io_uring_attr_pi {
> + __u16 flags;
> + __u16 app_tag;
> + __u32 len;
> + __u64 addr;
> + __u64 seed;
> + __u64 rsvd;
> +};
> +#endif
> +
> +#ifndef FS_IOC_GETLBMD_CAP
> +/* Protection info capability flags */
> +#define LBMD_PI_CAP_INTEGRITY (1 << 0)
> +#define LBMD_PI_CAP_REFTAG (1 << 1)
> +
> +/* Checksum types for Protection Information */
> +#define LBMD_PI_CSUM_NONE 0
> +#define LBMD_PI_CSUM_IP 1
> +#define LBMD_PI_CSUM_CRC16_T10DIF 2
> +#define LBMD_PI_CSUM_CRC64_NVME 4
> +
> +/*
> + * Logical block metadata capability descriptor
> + * If the device does not support metadata, all the fields will be zero.
> + * Applications must check lbmd_flags to determine whether metadata is
> + * supported or not.
> + */
> +struct logical_block_metadata_cap {
> + /* Bitmask of logical block metadata capability flags */
> + __u32 lbmd_flags;
> + /*
> + * The amount of data described by each unit of logical block
> + * metadata
> + */
> + __u16 lbmd_interval;
> + /*
> + * Size in bytes of the logical block metadata associated with each
> + * interval
> + */
> + __u8 lbmd_size;
> + /*
> + * Size in bytes of the opaque block tag associated with each
> + * interval
> + */
> + __u8 lbmd_opaque_size;
> + /*
> + * Offset in bytes of the opaque block tag within the logical block
> + * metadata
> + */
> + __u8 lbmd_opaque_offset;
> + /* Size in bytes of the T10 PI tuple associated with each interval */
> + __u8 lbmd_pi_size;
> + /* Offset in bytes of T10 PI tuple within the logical block metadata */
> + __u8 lbmd_pi_offset;
> + /* T10 PI guard tag type */
> + __u8 lbmd_guard_tag_type;
> + /* Size in bytes of the T10 PI application tag */
> + __u8 lbmd_app_tag_size;
> + /* Size in bytes of the T10 PI reference tag */
> + __u8 lbmd_ref_tag_size;
> + /* Size in bytes of the T10 PI storage tag */
> + __u8 lbmd_storage_tag_size;
> + __u8 pad;
> +};
> +
> +#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap)
> +#endif /* FS_IOC_GETLBMD_CAP */
> +
> +#ifndef IO_INTEGRITY_CHK_GUARD
> +/* flags for integrity meta */
> +#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
> +#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
> +#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
> +#endif /* IO_INTEGRITY_CHK_GUARD */
> +
> +/* This size should guarantee at least one split */
> +#define DATA_SIZE (8 * 1024 * 1024)
> +
> +static unsigned short lba_size;
> +static unsigned char metadata_size;
> +static unsigned char pi_size;
> +static unsigned char pi_offset;
nit:- do you really need above global duplication ?
you can just declare struct logical_block_metadata_cap md_cap
globally use md_cap.xxx ?
> +static bool reftag_enabled;
> +
> +static long pagesize;
> +
> +struct t10_pi_tuple {
> + __be16 guard_tag; /* Checksum */
> + __be16 app_tag; /* Opaque storage */
> + __be32 ref_tag; /* Target LBA or indirect LBA */
> +};
> +
> +struct crc64_pi_tuple {
> + __be64 guard_tag;
> + __be16 app_tag;
> + __u8 ref_tag[6];
> +};
> +
> +static int init_capabilities(int fd)
> +{
> + struct logical_block_metadata_cap md_cap;
> + int ret;
> +
> + ret = ioctl(fd, FS_IOC_GETLBMD_CAP, &md_cap);
> + if (ret < 0)
> + return ret;
fprintf(stderr, "FS_IOC_GETLBMD_CAP failed: %s\n", strerror(errno)); ?
> +
> + lba_size = md_cap.lbmd_interval;
> + metadata_size = md_cap.lbmd_size;
> + pi_size = md_cap.lbmd_pi_size;
> + pi_offset = md_cap.lbmd_pi_offset;
> + reftag_enabled = md_cap.lbmd_flags & LBMD_PI_CAP_REFTAG;
> +
> + pagesize = sysconf(_SC_PAGE_SIZE);
> + return 0;
> +}
> +
> +static unsigned int swap(unsigned int value)
> +{
> + return ((value >> 24) & 0x000000ff) |
> + ((value >> 8) & 0x0000ff00) |
> + ((value << 8) & 0x00ff0000) |
> + ((value << 24) & 0xff000000);
> +}
> +
> +static inline void __put_unaligned_be48(const __u64 val, __u8 *p)
> +{
> + *p++ = (val >> 40) & 0xff;
> + *p++ = (val >> 32) & 0xff;
> + *p++ = (val >> 24) & 0xff;
> + *p++ = (val >> 16) & 0xff;
> + *p++ = (val >> 8) & 0xff;
> + *p++ = val & 0xff;
> +}
> +
> +static inline void put_unaligned_be48(const __u64 val, void *p)
> +{
> + __put_unaligned_be48(val, p);
> +}
> +
> +static inline __u64 __get_unaligned_be48(const __u8 *p)
> +{
> + return (__u64)p[0] << 40 | (__u64)p[1] << 32 | (__u64)p[2] << 24 |
> + p[3] << 16 | p[4] << 8 | p[5];
> +}
> +
> +static inline __u64 get_unaligned_be48(const void *p)
> +{
> + return __get_unaligned_be48(p);
> +}
> +
> +static void init_metadata(void *p, int intervals, int ref)
> +{
> + int i, j;
> +
> + for (i = 0; i < intervals; i++, ref++) {
> + int remaining = metadata_size - pi_offset;
> + unsigned char *m = p;
> +
> + for (j = 0; j < pi_offset; j++)
> + m[j] = (unsigned char)(ref + j + i);
> +
> + p += pi_offset;
> + if (reftag_enabled) {
> + if (pi_size == 8) {
> + struct t10_pi_tuple *tuple = p;
> +
> + tuple->ref_tag = swap(ref);
> + remaining -= sizeof(*tuple);
> + p += sizeof(*tuple);
> + } else if (pi_size == 16) {
> + struct crc64_pi_tuple *tuple = p;
> +
> + __put_unaligned_be48(ref, tuple->ref_tag);
> + remaining -= sizeof(*tuple);
> + p += sizeof(*tuple);
> + }
> + }
> +
> + m = p;
> + for (j = 0; j < remaining; j++)
> + m[j] = (unsigned char)~(ref + j + i);
> +
> + p += remaining;
> + }
> +}
> +
> +static int check_metadata(void *p, int intervals, int ref)
> +{
> + int i, j;
> +
> + for (i = 0; i < intervals; i++, ref++) {
> + int remaining = metadata_size - pi_offset;
> + unsigned char *m = p;
> +
> + for (j = 0; j < pi_offset; j++) {
> + if (m[j] != (unsigned char)(ref + j + i)) {
> + fprintf(stderr, "(pre)interval:%d byte:%d expected:%x got:%x\n",
> + i, j, (unsigned char)(ref + j + i), m[j]);
> + return -1;
> + }
> + }
> +
> + p += pi_offset;
> + if (reftag_enabled) {
> + if (pi_size == 8) {
> + struct t10_pi_tuple *tuple = p;
> +
> + if (swap(tuple->ref_tag) != ref) {
> + fprintf(stderr, "reftag interval:%d expected:%x got:%x\n",
> + i, ref, swap(tuple->ref_tag));
> + return -1;
> + }
> +
> + remaining -= sizeof(*tuple);
> + p += sizeof(*tuple);
> + } else if (pi_size == 16) {
> + struct crc64_pi_tuple *tuple = p;
> + __u64 v = get_unaligned_be48(tuple->ref_tag);
> +
> + if (v != ref) {
> + fprintf(stderr, "reftag interval:%d expected:%x got:%llx\n",
> + i, ref, v);
> + return -1;
> + }
> + remaining -= sizeof(*tuple);
> + p += sizeof(*tuple);
> + }
> + }
> +
> + m = p;
> + for (j = 0; j < remaining; j++) {
> + if (m[j] != (unsigned char)~(ref + j + i)) {
> + fprintf(stderr, "(post)interval:%d byte:%d expected:%x got:%x\n",
> + i, j, (unsigned char)~(ref + j + i), m[j]);
> + return -1;
> + }
> + }
> +
> + p += remaining;
> + }
> +
> + return 0;
> +}
> +
> +static int init_data(void *data, int offset)
> +{
> + unsigned char *d = data;
> + int i;
> +
> + for (i = 0; i < DATA_SIZE; i++)
> + d[i] = (unsigned char)(0xaa + offset + i);
> +
> + return 0;
> +}
> +
it should be :-
static int init_data(void *data, int offset)
{
unsigned char *d = data;
int i;
for (i = 0; i < DATA_SIZE; i++)
d[i] = (unsigned char)(0xaa + offset + i);
}
> +static int check_data(void *data, int offset)
> +{
> + unsigned char *d = data;
> + int i;
> +
> + for (i = 0; i < DATA_SIZE; i++)
> + if (d[i] != (unsigned char)(0xaa + offset + i))
> + return -1;
> +
> + return 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + int fd, ret, offset, intervals, metabuffer_size, metabuffer_tx_size;
> + void *orig_data_buf, *orig_pi_buf, *data_buf;
> + struct io_uring_sqe *sqe;
> + struct io_uring_cqe *cqe;
> + struct io_uring ring;
> +
> + if (argc < 2) {
> + fprintf(stderr, "Usage: %s <dev>\n", argv[0]);
> + return 1;
> + }
> +
> + fd = open(argv[1], O_RDWR | O_DIRECT);
> + if (fd < 0) {
> + perror("Failed to open device with O_DIRECT");
> + return 1;
> + }
> +
> + ret = init_capabilities(fd);
> + if (ret < 0)
> + return 1;
> + if (lba_size == 0 || metadata_size == 0)
> + return 1;
> +
> + intervals = DATA_SIZE / lba_size;
> + metabuffer_tx_size = intervals * metadata_size;
> + metabuffer_size = metabuffer_tx_size * 2;
> +
> + if (posix_memalign(&orig_data_buf, pagesize, DATA_SIZE)) {
> + perror("posix_memalign failed for data buffer");
> + ret = 1;
> + goto close;
> + }
> +
> + if (posix_memalign(&orig_pi_buf, pagesize, metabuffer_size)) {
> + perror("posix_memalign failed for metadata buffer");
> + ret = 1;
> + goto free;
> + }
> +
> + ret = io_uring_queue_init(8, &ring, 0);
> + if (ret < 0) {
> + perror("io_uring_queue_init failed");
> + goto cleanup;
> + }
> +
> + data_buf = orig_data_buf;
both are not modified after above assignment till program exit
why can't we just use one instead of both ?
-ck
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH blktests] io_uring user metadata offset test
2025-11-07 23:18 [PATCH blktests] io_uring user metadata offset test Keith Busch
2025-11-08 2:09 ` Chaitanya Kulkarni
2025-11-08 3:26 ` Chaitanya Kulkarni
@ 2025-11-11 7:58 ` Shinichiro Kawasaki
2025-11-19 16:10 ` Keith Busch
2 siblings, 1 reply; 6+ messages in thread
From: Shinichiro Kawasaki @ 2025-11-11 7:58 UTC (permalink / raw)
To: Keith Busch; +Cc: linux-block@vger.kernel.org, hch, Keith Busch
On Nov 07, 2025 / 15:18, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> For devices with metadata, tests various userspace offsets with
> io_uring capabilities. If the metadata is formatted with ref tag
> protection information, test various seed offsets as well.
>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
Keith, thanks for this patch. Looks important.
Just for curiousity, is there any condition to make this test case fail? I ran
the test case with v6.18-rc5 kernel and QEMU NVME device (mi=8), then it
passed. Do we need specific hardware to make it fail?
If this test case just extends test coverage and no failure is expected at
this point, I think it is still useful.
This patch added the test case block/043, skipping block/042. You posted the
patch for block/042 last month [1], and it is not yet settled on the blktests
master branch. Do you have plan to respin it? If so, I think this block/043 can
be applied before the block/042 patch get applied. Otherwise, I will try to find
out my time to improve your block/042 patch and settle.
[1] https://lore.kernel.org/linux-block/20251014205420.941424-1-kbusch@meta.com/
Also, please find my some more comments in line.
> ---
> src/.gitignore | 1 +
> src/Makefile | 7 +-
> src/metadata.c | 481 ++++++++++++++++++++++++++++++++++++++++++++
> tests/block/043 | 27 +++
> tests/block/043.out | 2 +
> 5 files changed, 515 insertions(+), 3 deletions(-)
> create mode 100644 src/metadata.c
> create mode 100755 tests/block/043
> create mode 100644 tests/block/043.out
>
> diff --git a/src/.gitignore b/src/.gitignore
> index 865675c..e6c6c38 100644
> --- a/src/.gitignore
> +++ b/src/.gitignore
> @@ -3,6 +3,7 @@
> /loblksize
> /loop_change_fd
> /loop_get_status_null
> +/metadata
> /mount_clear_sock
> /nbdsetsize
> /openclose
> diff --git a/src/Makefile b/src/Makefile
> index 179a673..7146db0 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -22,7 +22,8 @@ C_TARGETS := \
> sg/syzkaller1 \
> zbdioctl
>
> -C_MINIUBLK := miniublk
> +C_MINIUBLK := miniublk \
> + metadata
Nit: After this change, C_MINIUBLK is no longer a good name. I suggest to rename
it to C_URING_TARGETS. Other variables MINIUBLK_FLAGS and MINIUBLK_LIBS should
be renamed to URING_FLAGS and URING_LIBS respectively. If you are too busy (I
assume so), I can do these changes as a separated follow-up patch.
>
> HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD)
> HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1)
> @@ -61,8 +62,8 @@ $(C_TARGETS): %: %.c
> $(CXX_TARGETS): %: %.cpp
> $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^
>
> -$(C_MINIUBLK): %: miniublk.c
> - $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c \
> +$(C_MINIUBLK): %: %.c
> + $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ $^ \
> $(MINIUBLK_LIBS)
>
> .PHONY: all clean install
> diff --git a/src/metadata.c b/src/metadata.c
> new file mode 100644
> index 0000000..4628299
> --- /dev/null
> +++ b/src/metadata.c
> @@ -0,0 +1,481 @@
> +// SPDX-License-Identifier: GPL-2.0
Nit: GPL-2.0 is fine, but many of blktests files have GPL-3.0+. If there is no
strong opinion, I suggest GPL-3.0+. Also, you may want to add copyright notice.
[...]
> diff --git a/tests/block/043 b/tests/block/043
> new file mode 100755
> index 0000000..0e6a6cb
> --- /dev/null
> +++ b/tests/block/043
> @@ -0,0 +1,27 @@
> +#!/bin/bash
Even though this is a tiny script, I suggest to add a SPDX License Identifire,
and your copyright here. Also, I suggest to add a short description here,
copying from the commit message, like,
# Test various userspace offsets with io_uring capabilities. If the metadata is
# formatted with ref tag protection information, test various seed offsets as
# well.
> +
> +. tests/block/rc
". common/nvme" is required here, or the test run reports errors below:
tests/block/043: line 9: _test_dev_has_metadata: command not found
tests/block/043: line 10: _test_dev_disables_extended_lba: command not found
> +
> +DESCRIPTION="Test userspace metadataoffsets"
> +QUICK=1
> +
> +device_requires() {
> + _test_dev_has_metadata
> + _test_dev_disables_extended_lba
> +}
> +
> +requires() {
> + _have_kernel_option IO_URING
> + _have_kernel_option BLK_DEV_INTEGRITY
> +}
> +
> +test_device() {
> + echo "Running ${TEST_NAME}"
> +
> + if ! src/metadata ${TEST_DEV}; then
> + echo "src/dio-offsets failed"
As Chaitanya noted, it should be "src/metadata failed".
> + fi
> +
> + echo "Test complete"
> +}
> +
> diff --git a/tests/block/043.out b/tests/block/043.out
> new file mode 100644
> index 0000000..fda7fca
> --- /dev/null
> +++ b/tests/block/043.out
> @@ -0,0 +1,2 @@
> +Running block/043
> +Test complete
> --
> 2.47.3
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH blktests] io_uring user metadata offset test
2025-11-11 7:58 ` Shinichiro Kawasaki
@ 2025-11-19 16:10 ` Keith Busch
0 siblings, 0 replies; 6+ messages in thread
From: Keith Busch @ 2025-11-19 16:10 UTC (permalink / raw)
To: Shinichiro Kawasaki; +Cc: Keith Busch, linux-block@vger.kernel.org, hch
On Tue, Nov 11, 2025 at 07:58:30AM +0000, Shinichiro Kawasaki wrote:
> On Nov 07, 2025 / 15:18, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> >
> > For devices with metadata, tests various userspace offsets with
> > io_uring capabilities. If the metadata is formatted with ref tag
> > protection information, test various seed offsets as well.
> >
> > Signed-off-by: Keith Busch <kbusch@kernel.org>
>
> Keith, thanks for this patch. Looks important.
>
> Just for curiousity, is there any condition to make this test case fail? I ran
> the test case with v6.18-rc5 kernel and QEMU NVME device (mi=8), then it
> passed. Do we need specific hardware to make it fail?
>
> If this test case just extends test coverage and no failure is expected at
> this point, I think it is still useful.
It's not supposed to fail for any format. The test queries the
capabilities of the device and reacts accordingly. This test will send
various alignments and lengths that span page boundaries, and the kernel
will use it directly if the hardware supports that, or bounce it if not.
> This patch added the test case block/043, skipping block/042. You posted the
> patch for block/042 last month [1], and it is not yet settled on the blktests
> master branch. Do you have plan to respin it? If so, I think this block/043 can
> be applied before the block/042 patch get applied. Otherwise, I will try to find
> out my time to improve your block/042 patch and settle.
Yes, I'm working on a respin for 042 right now.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH blktests] io_uring user metadata offset test
2025-11-08 2:09 ` Chaitanya Kulkarni
@ 2025-11-19 16:50 ` Keith Busch
0 siblings, 0 replies; 6+ messages in thread
From: Keith Busch @ 2025-11-19 16:50 UTC (permalink / raw)
To: Chaitanya Kulkarni
Cc: Keith Busch, linux-block@vger.kernel.org, hch@lst.de,
shinichiro.kawasaki@wdc.com
On Sat, Nov 08, 2025 at 02:09:33AM +0000, Chaitanya Kulkarni wrote:
> On 11/7/25 15:18, Keith Busch wrote:
>
> this requires a specific kernel version or higher for
> IO_URING to have this metadata support ? OR any version
> of kernel would work ?
It does require io_uring pi attributes, which was introduced in 6.13.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2025-11-19 16:50 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-07 23:18 [PATCH blktests] io_uring user metadata offset test Keith Busch
2025-11-08 2:09 ` Chaitanya Kulkarni
2025-11-19 16:50 ` Keith Busch
2025-11-08 3:26 ` Chaitanya Kulkarni
2025-11-11 7:58 ` Shinichiro Kawasaki
2025-11-19 16:10 ` Keith Busch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox