From: Keith Busch <kbusch@meta.com>
To: <linux-block@vger.kernel.org>, <shinichiro.kawasaki@wdc.com>
Cc: <chaitanyak@nvidia.com>, Keith Busch <kbusch@kernel.org>
Subject: [PATCHv2 2/2] blktests: test io_uring user metadata offsets
Date: Wed, 19 Nov 2025 11:54:49 -0800 [thread overview]
Message-ID: <20251119195449.2922332-3-kbusch@meta.com> (raw)
In-Reply-To: <20251119195449.2922332-1-kbusch@meta.com>
From: Keith Busch <kbusch@kernel.org>
For devices with metadata, tests various userspace offsets with
io_uring capabilities. If the metadata is formatted with ref tag
protection information, test various seed offsets as well.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
src/.gitignore | 1 +
src/Makefile | 16 +-
src/metadata.c | 488 ++++++++++++++++++++++++++++++++++++++++++++
tests/block/043 | 33 +++
tests/block/043.out | 2 +
5 files changed, 532 insertions(+), 8 deletions(-)
create mode 100644 src/metadata.c
create mode 100755 tests/block/043
create mode 100644 tests/block/043.out
diff --git a/src/.gitignore b/src/.gitignore
index 865675c..e6c6c38 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -3,6 +3,7 @@
/loblksize
/loop_change_fd
/loop_get_status_null
+/metadata
/mount_clear_sock
/nbdsetsize
/openclose
diff --git a/src/Makefile b/src/Makefile
index 179a673..dac07c7 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -22,7 +22,8 @@ C_TARGETS := \
sg/syzkaller1 \
zbdioctl
-C_MINIUBLK := miniublk
+C_URING_TARGETS := miniublk \
+ metadata
HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD)
HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1)
@@ -31,9 +32,9 @@ CXX_TARGETS := \
discontiguous-io
ifeq ($(HAVE_LIBURING)$(HAVE_UBLK_HEADER), 11)
-TARGETS := $(C_TARGETS) $(CXX_TARGETS) $(C_MINIUBLK)
+TARGETS := $(C_TARGETS) $(CXX_TARGETS) $(C_URING_TARGETS)
else
-$(info Skip $(C_MINIUBLK) build due to missing kernel header(v6.0+) or liburing(2.2+))
+$(info Skip $(C_URING_TARGETS) build due to missing kernel header(v6.0+) or liburing(2.2+))
TARGETS := $(C_TARGETS) $(CXX_TARGETS)
endif
@@ -42,8 +43,8 @@ CONFIG_DEFS := $(call HAVE_C_HEADER,linux/blkzoned.h,-DHAVE_LINUX_BLKZONED_H)
override CFLAGS := -O2 -Wall -Wshadow $(CFLAGS) $(CONFIG_DEFS)
override CXXFLAGS := -O2 -std=c++11 -Wall -Wextra -Wshadow -Wno-sign-compare \
-Werror $(CXXFLAGS) $(CONFIG_DEFS)
-MINIUBLK_FLAGS := -D_GNU_SOURCE
-MINIUBLK_LIBS := -lpthread -luring
+URING_FLAGS := -D_GNU_SOURCE
+URING_LIBS := -lpthread -luring
LDFLAGS ?=
all: $(TARGETS)
@@ -61,8 +62,7 @@ $(C_TARGETS): %: %.c
$(CXX_TARGETS): %: %.cpp
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^
-$(C_MINIUBLK): %: miniublk.c
- $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c \
- $(MINIUBLK_LIBS)
+$(C_URING_TARGETS): %: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $(URING_FLAGS) -o $@ $^ $(URING_LIBS)
.PHONY: all clean install
diff --git a/src/metadata.c b/src/metadata.c
new file mode 100644
index 0000000..d935fd6
--- /dev/null
+++ b/src/metadata.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-3.0+
+/*
+ * Copyright (c) 2025 Meta Platforms, Inc. All Rights Reserved.
+ *
+ * Description: test userspace metadata
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <liburing.h>
+
+#ifndef IORING_RW_ATTR_FLAG_PI
+#define PI_URING_COMPAT
+#define IORING_RW_ATTR_FLAG_PI (1U << 0)
+/* PI attribute information */
+struct io_uring_attr_pi {
+ __u16 flags;
+ __u16 app_tag;
+ __u32 len;
+ __u64 addr;
+ __u64 seed;
+ __u64 rsvd;
+};
+#endif
+
+#ifndef FS_IOC_GETLBMD_CAP
+/* Protection info capability flags */
+#define LBMD_PI_CAP_INTEGRITY (1 << 0)
+#define LBMD_PI_CAP_REFTAG (1 << 1)
+
+/* Checksum types for Protection Information */
+#define LBMD_PI_CSUM_NONE 0
+#define LBMD_PI_CSUM_IP 1
+#define LBMD_PI_CSUM_CRC16_T10DIF 2
+#define LBMD_PI_CSUM_CRC64_NVME 4
+
+/*
+ * Logical block metadata capability descriptor
+ * If the device does not support metadata, all the fields will be zero.
+ * Applications must check lbmd_flags to determine whether metadata is
+ * supported or not.
+ */
+struct logical_block_metadata_cap {
+ /* Bitmask of logical block metadata capability flags */
+ __u32 lbmd_flags;
+ /*
+ * The amount of data described by each unit of logical block
+ * metadata
+ */
+ __u16 lbmd_interval;
+ /*
+ * Size in bytes of the logical block metadata associated with each
+ * interval
+ */
+ __u8 lbmd_size;
+ /*
+ * Size in bytes of the opaque block tag associated with each
+ * interval
+ */
+ __u8 lbmd_opaque_size;
+ /*
+ * Offset in bytes of the opaque block tag within the logical block
+ * metadata
+ */
+ __u8 lbmd_opaque_offset;
+ /* Size in bytes of the T10 PI tuple associated with each interval */
+ __u8 lbmd_pi_size;
+ /* Offset in bytes of T10 PI tuple within the logical block metadata */
+ __u8 lbmd_pi_offset;
+ /* T10 PI guard tag type */
+ __u8 lbmd_guard_tag_type;
+ /* Size in bytes of the T10 PI application tag */
+ __u8 lbmd_app_tag_size;
+ /* Size in bytes of the T10 PI reference tag */
+ __u8 lbmd_ref_tag_size;
+ /* Size in bytes of the T10 PI storage tag */
+ __u8 lbmd_storage_tag_size;
+ __u8 pad;
+};
+
+#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap)
+#endif /* FS_IOC_GETLBMD_CAP */
+
+#ifndef IO_INTEGRITY_CHK_GUARD
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+#endif /* IO_INTEGRITY_CHK_GUARD */
+
+/* This size should guarantee at least one split */
+#define DATA_SIZE (8 * 1024 * 1024)
+
+static unsigned short lba_size;
+static unsigned char metadata_size;
+static unsigned char pi_size;
+static unsigned char pi_offset;
+static bool reftag_enabled;
+
+static long pagesize;
+
+struct t10_pi_tuple {
+ __be16 guard_tag; /* Checksum */
+ __be16 app_tag; /* Opaque storage */
+ __be32 ref_tag; /* Target LBA or indirect LBA */
+};
+
+struct crc64_pi_tuple {
+ __be64 guard_tag;
+ __be16 app_tag;
+ __u8 ref_tag[6];
+};
+
+static int init_capabilities(int fd)
+{
+ struct logical_block_metadata_cap md_cap;
+ int ret;
+
+ ret = ioctl(fd, FS_IOC_GETLBMD_CAP, &md_cap);
+ if (ret < 0) {
+ perror("FS_IOC_GETLBMD_CAP");
+ return ret;
+ }
+
+ lba_size = md_cap.lbmd_interval;
+ metadata_size = md_cap.lbmd_size;
+ pi_size = md_cap.lbmd_pi_size;
+ pi_offset = md_cap.lbmd_pi_offset;
+ reftag_enabled = md_cap.lbmd_flags & LBMD_PI_CAP_REFTAG;
+
+ pagesize = sysconf(_SC_PAGE_SIZE);
+ return 0;
+}
+
+static unsigned int swap(unsigned int value)
+{
+ return ((value >> 24) & 0x000000ff) |
+ ((value >> 8) & 0x0000ff00) |
+ ((value << 8) & 0x00ff0000) |
+ ((value << 24) & 0xff000000);
+}
+
+static inline void __put_unaligned_be48(const __u64 val, __u8 *p)
+{
+ *p++ = (val >> 40) & 0xff;
+ *p++ = (val >> 32) & 0xff;
+ *p++ = (val >> 24) & 0xff;
+ *p++ = (val >> 16) & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = val & 0xff;
+}
+
+static inline void put_unaligned_be48(const __u64 val, void *p)
+{
+ __put_unaligned_be48(val, p);
+}
+
+static inline __u64 __get_unaligned_be48(const __u8 *p)
+{
+ return (__u64)p[0] << 40 | (__u64)p[1] << 32 | (__u64)p[2] << 24 |
+ p[3] << 16 | p[4] << 8 | p[5];
+}
+
+static inline __u64 get_unaligned_be48(const void *p)
+{
+ return __get_unaligned_be48(p);
+}
+
+static void init_metadata(void *p, int intervals, int ref)
+{
+ int i, j;
+
+ for (i = 0; i < intervals; i++, ref++) {
+ int remaining = metadata_size - pi_offset;
+ unsigned char *m = p;
+
+ for (j = 0; j < pi_offset; j++)
+ m[j] = (unsigned char)(ref + j + i);
+
+ p += pi_offset;
+ if (reftag_enabled) {
+ if (pi_size == 8) {
+ struct t10_pi_tuple *tuple = p;
+
+ tuple->ref_tag = swap(ref);
+ remaining -= sizeof(*tuple);
+ p += sizeof(*tuple);
+ } else if (pi_size == 16) {
+ struct crc64_pi_tuple *tuple = p;
+
+ __put_unaligned_be48(ref, tuple->ref_tag);
+ remaining -= sizeof(*tuple);
+ p += sizeof(*tuple);
+ }
+ }
+
+ m = p;
+ for (j = 0; j < remaining; j++)
+ m[j] = (unsigned char)~(ref + j + i);
+
+ p += remaining;
+ }
+}
+
+static int check_metadata(void *p, int intervals, int ref)
+{
+ int i, j;
+
+ for (i = 0; i < intervals; i++, ref++) {
+ int remaining = metadata_size - pi_offset;
+ unsigned char *m = p;
+
+ for (j = 0; j < pi_offset; j++) {
+ if (m[j] != (unsigned char)(ref + j + i)) {
+ fprintf(stderr, "(pre)interval:%d byte:%d expected:%x got:%x\n",
+ i, j, (unsigned char)(ref + j + i), m[j]);
+ return -1;
+ }
+ }
+
+ p += pi_offset;
+ if (reftag_enabled) {
+ if (pi_size == 8) {
+ struct t10_pi_tuple *tuple = p;
+
+ if (swap(tuple->ref_tag) != ref) {
+ fprintf(stderr, "reftag interval:%d expected:%x got:%x\n",
+ i, ref, swap(tuple->ref_tag));
+ return -1;
+ }
+
+ remaining -= sizeof(*tuple);
+ p += sizeof(*tuple);
+ } else if (pi_size == 16) {
+ struct crc64_pi_tuple *tuple = p;
+ __u64 v = get_unaligned_be48(tuple->ref_tag);
+
+ if (v != ref) {
+ fprintf(stderr, "reftag interval:%d expected:%x got:%llx\n",
+ i, ref, v);
+ return -1;
+ }
+ remaining -= sizeof(*tuple);
+ p += sizeof(*tuple);
+ }
+ }
+
+ m = p;
+ for (j = 0; j < remaining; j++) {
+ if (m[j] != (unsigned char)~(ref + j + i)) {
+ fprintf(stderr, "(post)interval:%d byte:%d expected:%x got:%x\n",
+ i, j, (unsigned char)~(ref + j + i), m[j]);
+ return -1;
+ }
+ }
+
+ p += remaining;
+ }
+
+ return 0;
+}
+
+static void init_data(void *data, int offset)
+{
+ unsigned char *d = data;
+ int i;
+
+ for (i = 0; i < DATA_SIZE; i++)
+ d[i] = (unsigned char)(0xaa + offset + i);
+}
+
+static int check_data(void *data, int offset)
+{
+ unsigned char *d = data;
+ int i;
+
+ for (i = 0; i < DATA_SIZE; i++)
+ if (d[i] != (unsigned char)(0xaa + offset + i))
+ return -1;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int fd, ret, i, offset, intervals, metabuffer_size, metabuffer_tx_size;
+ void *orig_data_buf, *orig_pi_buf, *data_buf;
+ struct io_uring_cqe *cqes[2];
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ struct io_uring ring;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <dev>\n", argv[0]);
+ return 1;
+ }
+
+ fd = open(argv[1], O_RDWR | O_DIRECT);
+ if (fd < 0) {
+ perror("Failed to open device with O_DIRECT");
+ return 1;
+ }
+
+ ret = init_capabilities(fd);
+ if (ret < 0)
+ return 1;
+ if (lba_size == 0 || metadata_size == 0)
+ return 1;
+
+ intervals = DATA_SIZE / lba_size;
+ metabuffer_tx_size = intervals * metadata_size;
+ metabuffer_size = metabuffer_tx_size * 2;
+
+ if (posix_memalign(&orig_data_buf, pagesize, DATA_SIZE)) {
+ perror("posix_memalign failed for data buffer");
+ ret = 1;
+ goto close;
+ }
+
+ if (posix_memalign(&orig_pi_buf, pagesize, metabuffer_size)) {
+ perror("posix_memalign failed for metadata buffer");
+ ret = 1;
+ goto free;
+ }
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret < 0) {
+ perror("io_uring_queue_init failed");
+ goto cleanup;
+ }
+
+ data_buf = orig_data_buf;
+ for (offset = 0; offset < 512; offset++) {
+ void *pi_buf = (char *)orig_pi_buf + offset * 4;
+ struct io_uring_attr_pi pi_attr = {
+ .addr = (__u64)pi_buf,
+ .seed = offset,
+ .len = metabuffer_tx_size,
+ };
+
+ if (reftag_enabled)
+ pi_attr.flags = IO_INTEGRITY_CHK_REFTAG;
+
+ init_data(data_buf, offset);
+ init_metadata(pi_buf, intervals, offset);
+
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ fprintf(stderr, "Failed to get SQE\n");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_prep_write(sqe, fd, data_buf, DATA_SIZE, offset * lba_size * 8);
+ io_uring_sqe_set_data(sqe, (void *)1L);
+
+#ifdef PI_URING_COMPAT
+ /* old liburing, use fields that overlap in the union */
+ sqe->__pad2[0] = IORING_RW_ATTR_FLAG_PI;
+ sqe->addr3 = (__u64)&pi_attr;
+#else
+ sqe->attr_type_mask = IORING_RW_ATTR_FLAG_PI;
+ sqe->attr_ptr = (__u64)&pi_attr;
+#endif
+ ret = io_uring_submit(&ring);
+ if (ret < 1) {
+ perror("io_uring_submit failed (WRITE)");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ perror("io_uring_wait_cqe failed (WRITE)");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ if (cqe->res < 0) {
+ fprintf(stderr, "write failed at offset %d: %s\n",
+ offset, strerror(-cqe->res));
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_cqe_seen(&ring, cqe);
+
+ memset(data_buf, 0, DATA_SIZE);
+ memset(pi_buf, 0, metabuffer_tx_size);
+
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ fprintf(stderr, "failed to get SQE\n");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_prep_read(sqe, fd, data_buf, DATA_SIZE, offset * lba_size * 8);
+ io_uring_sqe_set_data(sqe, (void *)2L);
+
+#ifdef PI_URING_COMPAT
+ sqe->__pad2[0] = IORING_RW_ATTR_FLAG_PI;
+ sqe->addr3 = (__u64)&pi_attr;
+#else
+ sqe->attr_type_mask = IORING_RW_ATTR_FLAG_PI;
+ sqe->attr_ptr = (__u64)&pi_attr;
+#endif
+
+ ret = io_uring_submit(&ring);
+ if (ret < 1) {
+ perror("io_uring_submit failed (read)");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ fprintf(stderr, "io_uring_wait_cqe failed (read): %s\n", strerror(-ret));
+ ret = 1;
+ goto ring_exit;
+ }
+
+ if (cqe->res < 0) {
+ fprintf(stderr, "read failed at offset %d: %s\n",
+ offset, strerror(-cqe->res));
+ ret = 1;
+ goto ring_exit;
+ }
+
+ ret = check_data(data_buf, offset);
+ if (ret) {
+ fprintf(stderr, "data corruption at offset %d\n",
+ offset);
+ ret = 1;
+ goto ring_exit;
+ }
+
+ ret = check_metadata(pi_buf, intervals, offset);
+ if (ret) {
+ fprintf(stderr, "metadata corruption at offset %d\n",
+ offset);
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_cqe_seen(&ring, cqe);
+ }
+
+ memset(data_buf, 0, DATA_SIZE);
+ for (i = 0; i < 2; i++) {
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ fprintf(stderr, "failed get sqe\n");
+ ret = 1;
+ goto ring_exit;
+ }
+
+ io_uring_prep_write(sqe, fd, data_buf, DATA_SIZE, DATA_SIZE * i);
+ io_uring_sqe_set_data(sqe, (void *)(uintptr_t)i + 1);
+ }
+
+ ret = io_uring_submit(&ring);
+ if (ret < 1) {
+ fprintf(stderr, "failed to submit sqes\n");
+ goto ring_exit;
+ }
+ ret = io_uring_wait_cqe_nr(&ring, cqes, 2);
+ if (ret)
+ fprintf(stderr, "failed to reap cqes\n");
+ring_exit:
+ io_uring_queue_exit(&ring);
+cleanup:
+ free(orig_pi_buf);
+free:
+ free(orig_data_buf);
+close:
+ close(fd);
+ return ret;
+}
diff --git a/tests/block/043 b/tests/block/043
new file mode 100755
index 0000000..dcd80d4
--- /dev/null
+++ b/tests/block/043
@@ -0,0 +1,33 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2025 Keith Busch <kbusch@kernel.org>
+#
+# Tests various user space metadata offsets with io_uring capabilities. If the
+# format uses ref tag protection, test various seed offsets as well.
+
+. tests/block/rc
+. common/nvme
+
+DESCRIPTION="Test userspace metadata offsets"
+QUICK=1
+
+device_requires() {
+ _test_dev_has_metadata
+ _test_dev_disables_extended_lba
+}
+
+requires() {
+ _have_kernel_option IO_URING
+ _have_kernel_option BLK_DEV_INTEGRITY
+}
+
+test_device() {
+ echo "Running ${TEST_NAME}"
+
+ if ! src/metadata ${TEST_DEV}; then
+ echo "src/metadata failed"
+ fi
+
+ echo "Test complete"
+}
+
diff --git a/tests/block/043.out b/tests/block/043.out
new file mode 100644
index 0000000..fda7fca
--- /dev/null
+++ b/tests/block/043.out
@@ -0,0 +1,2 @@
+Running block/043
+Test complete
--
2.47.3
next prev parent reply other threads:[~2025-11-19 19:55 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-19 19:54 [PATCHv2 0/2] blktests: add tests with offsets Keith Busch
2025-11-19 19:54 ` [PATCHv2 1/2] blktests: test direct io offsets Keith Busch
2025-11-19 23:39 ` Chaitanya Kulkarni
2025-11-19 23:48 ` Chaitanya Kulkarni
2025-11-19 23:59 ` Keith Busch
2025-11-20 0:56 ` Chaitanya Kulkarni
2025-11-20 14:32 ` Keith Busch
2025-11-25 11:26 ` Shinichiro Kawasaki
2025-11-25 16:42 ` Keith Busch
2025-12-02 10:22 ` Shinichiro Kawasaki
2025-11-19 19:54 ` Keith Busch [this message]
2025-11-19 23:39 ` [PATCHv2 2/2] blktests: test io_uring user metadata offsets Chaitanya Kulkarni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251119195449.2922332-3-kbusch@meta.com \
--to=kbusch@meta.com \
--cc=chaitanyak@nvidia.com \
--cc=kbusch@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=shinichiro.kawasaki@wdc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox