io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] liburing: add test for metadata
@ 2025-11-07  4:29 Keith Busch
  2025-11-08  0:27 ` Jens Axboe
  0 siblings, 1 reply; 2+ messages in thread
From: Keith Busch @ 2025-11-07  4:29 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, Keith Busch

From: Keith Busch <kbusch@kernel.org>

If the test device supports metadata, try attaching a pi buffer with
various page offsets, and seed offsets. If the metadata contains opaque
data preceding or following the data integrity field, fill it with a
pattern of data, and verify the expected data and metadata matches on
the other side.

The sizes and offsets send should guarantee kernel splits and bounce
buffers will get used.

At the end of the test, the written blocks are overwritten without
providing metadata. This test doesn't calculate the guard tags, so
writing without metadata lets the kernel generate the expected guard and
ref tags so that buffered IO won't get seemingly unexpected failures.

Tested on qemu nvme with 512b and 4k logical block size with 8, 16, and
64 metadata, both with and without pi offsets for the larger formats.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 src/include/liburing/io_uring.h |  16 ++
 test/Makefile                   |   1 +
 test/metadata.c                 | 404 ++++++++++++++++++++++++++++++++
 3 files changed, 421 insertions(+)
 create mode 100644 test/metadata.c

diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 44ce8229..a54e5b42 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -100,6 +100,10 @@ struct io_uring_sqe {
 			__u64	addr3;
 			__u64	__pad2[1];
 		};
+		struct {
+			__u64   attr_ptr; /* pointer to attribute information */
+			__u64   attr_type_mask; /* bit mask of attributes */
+                };
 		__u64	optval;
 		/*
 		 * If the ring is initialized with IORING_SETUP_SQE128, then
@@ -109,6 +113,18 @@ struct io_uring_sqe {
 	};
 };
 
+/* sqe->attr_type_mask flags */
+#define IORING_RW_ATTR_FLAG_PI  (1U << 0)
+/* PI attribute information */
+struct io_uring_attr_pi {
+		__u16	flags;
+		__u16	app_tag;
+		__u32	len;
+		__u64	addr;
+		__u64	seed;
+		__u64	rsvd;
+};
+
 /*
  * If sqe->file_index is set to this for opcodes that instantiate a new
  * direct descriptor (like openat/openat2/accept), then io_uring will allocate
diff --git a/test/Makefile b/test/Makefile
index ee983680..8c4c6db5 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -141,6 +141,7 @@ test_srcs := \
 	link-timeout.c \
 	linked-defer-close.c \
 	madvise.c \
+	metadata.c \
 	min-timeout.c \
 	min-timeout-wait.c \
 	mkdir.c \
diff --git a/test/metadata.c b/test/metadata.c
new file mode 100644
index 00000000..66a2565b
--- /dev/null
+++ b/test/metadata.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Description: test userspace metadata
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+
+#include "liburing.h"
+#include "helpers.h"
+#include "test.h"
+
+#ifndef FS_IOC_GETLBMD_CAP
+/* Protection info capability flags */
+#define LBMD_PI_CAP_INTEGRITY           (1 << 0)
+#define LBMD_PI_CAP_REFTAG              (1 << 1)
+
+/* Checksum types for Protection Information */
+#define LBMD_PI_CSUM_NONE               0
+#define LBMD_PI_CSUM_IP                 1
+#define LBMD_PI_CSUM_CRC16_T10DIF       2
+#define LBMD_PI_CSUM_CRC64_NVME         4
+
+/*
+ * Logical block metadata capability descriptor
+ * If the device does not support metadata, all the fields will be zero.
+ * Applications must check lbmd_flags to determine whether metadata is
+ * supported or not.
+ */
+struct logical_block_metadata_cap {
+	/* Bitmask of logical block metadata capability flags */
+	__u32	lbmd_flags;
+	/*
+	 * The amount of data described by each unit of logical block
+	 * metadata
+	 */
+	__u16	lbmd_interval;
+	/*
+	 * Size in bytes of the logical block metadata associated with each
+	 * interval
+	 */
+	__u8	lbmd_size;
+	/*
+	 * Size in bytes of the opaque block tag associated with each
+	 * interval
+	 */
+	__u8	lbmd_opaque_size;
+	/*
+	 * Offset in bytes of the opaque block tag within the logical block
+	 * metadata
+	 */
+	__u8	lbmd_opaque_offset;
+	/* Size in bytes of the T10 PI tuple associated with each interval */
+	__u8	lbmd_pi_size;
+	/* Offset in bytes of T10 PI tuple within the logical block metadata */
+	__u8	lbmd_pi_offset;
+	/* T10 PI guard tag type */
+	__u8	lbmd_guard_tag_type;
+	/* Size in bytes of the T10 PI application tag */
+	__u8	lbmd_app_tag_size;
+	/* Size in bytes of the T10 PI reference tag */
+	__u8	lbmd_ref_tag_size;
+	/* Size in bytes of the T10 PI storage tag */
+	__u8	lbmd_storage_tag_size;
+	__u8	pad;
+};
+
+#define FS_IOC_GETLBMD_CAP                      _IOWR(0x15, 2, struct logical_block_metadata_cap)
+#endif /* FS_IOC_GETLBMD_CAP */
+
+#ifndef IO_INTEGRITY_CHK_GUARD
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD          (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG         (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG         (1U << 2) /* enforce app check */
+#endif /* IO_INTEGRITY_CHK_GUARD */
+
+/* This size should guarantee at least one split */
+#define DATA_SIZE (8 * 1024 * 1024)
+
+static unsigned short lba_size;
+static unsigned char metadata_size;
+static unsigned char pi_offset;
+static bool reftag_enabled;
+
+static long pagesize;
+
+struct t10_pi_tuple {
+        __be16 guard_tag;       /* Checksum */
+        __be16 app_tag;         /* Opaque storage */
+        __be32 ref_tag;         /* Target LBA or indirect LBA */
+};
+
+static int init_capabilities(int fd)
+{
+	struct logical_block_metadata_cap md_cap;
+	int ret;
+
+	ret = ioctl(fd, FS_IOC_GETLBMD_CAP, &md_cap);
+	if (ret < 0)
+		return ret;
+
+	lba_size = md_cap.lbmd_interval;
+	metadata_size = md_cap.lbmd_size;
+	pi_offset = md_cap.lbmd_pi_offset;
+	reftag_enabled = md_cap.lbmd_flags & LBMD_PI_CAP_REFTAG;
+
+	pagesize = sysconf(_SC_PAGE_SIZE);
+	return 0;
+}
+
+static unsigned int swap(unsigned int value)
+{
+	return ((value >> 24) & 0x000000ff) |
+		((value >> 8)  & 0x0000ff00) |
+		((value << 8)  & 0x00ff0000) |
+		((value << 24) & 0xff000000);
+}
+
+static void init_metadata(void *p, int intervals, int ref)
+{
+	int i, j;
+
+	for (i = 0; i < intervals; i++, ref++) {
+		int remaining = metadata_size - pi_offset;
+		unsigned char *m = p;
+
+		for (j = 0; j < pi_offset; j++)
+			m[j] = (unsigned char)(ref + j + i);
+
+		p += pi_offset;
+		if (reftag_enabled) {
+			struct t10_pi_tuple *tuple = p;
+
+			tuple->ref_tag = swap(ref);
+			remaining -= sizeof(*tuple);
+			p += sizeof(*tuple);
+		}
+
+		m = p;
+		for (j = 0; j < remaining; j++)
+			m[j] = (unsigned char)~(ref + j + i);
+
+		p += remaining;
+	}
+}
+
+static int check_metadata(void *p, int intervals, int ref)
+{
+	int i, j;
+
+	for (i = 0; i < intervals; i++, ref++) {
+		int remaining = metadata_size - pi_offset;
+		unsigned char *m = p;
+
+		for (j = 0; j < pi_offset; j++) {
+			if (m[j] != (unsigned char)(ref + j + i)) {
+				fprintf(stderr, "(pre)interval:%d byte:%d expected:%x got:%x\n",
+					i, j, (unsigned char)(ref + j + i), m[j]);
+				return -1;
+			}
+		}
+
+		p += pi_offset;
+		if (reftag_enabled) {
+			struct t10_pi_tuple *tuple = p;
+
+			if (swap(tuple->ref_tag) != ref) {
+				fprintf(stderr, "reftag interval:%d expected:%x got:%x\n",
+					i, ref, swap(tuple->ref_tag));
+				return -1;
+			}
+
+			remaining -= sizeof(*tuple);
+			p += sizeof(*tuple);
+		}
+
+		m = p;
+		for (j = 0; j < remaining; j++) {
+			if (m[j] != (unsigned char)~(ref + j + i)) {
+				fprintf(stderr, "(post)interval:%d byte:%d expected:%x got:%x\n",
+					i, j, (unsigned char)~(ref + j + i), m[j]);
+				return -1;
+			}
+		}
+
+		p += remaining;
+	}
+
+	return 0;
+}
+
+static int init_data(void *data, int offset)
+{
+	unsigned char *d = data;
+	int i;
+
+	for (i = 0; i < DATA_SIZE; i++)
+		d[i] = (unsigned char)(0xaa + offset + i);
+
+	return 0;
+}
+
+static int check_data(void *data, int offset)
+{
+	unsigned char *d = data;
+	int i;
+
+	for (i = 0; i < DATA_SIZE; i++)
+		if (d[i] != (unsigned char)(0xaa + offset + i))
+			return -1;
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int fd, ret, offset, intervals, metabuffer_size, metabuffer_tx_size;
+	void *orig_data_buf, *orig_pi_buf, *data_buf;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s <dev>\n", argv[0]);
+		return T_EXIT_FAIL;
+	}
+
+	fd = open(argv[1], O_RDWR | O_DIRECT);
+	if (fd < 0) {
+		perror("Failed to open device with O_DIRECT");
+		return T_EXIT_FAIL;
+	}
+
+	ret = init_capabilities(fd);
+	if (ret < 0)
+		return T_EXIT_FAIL;
+	if (lba_size == 0 || metadata_size == 0)
+		return T_EXIT_SKIP;
+
+	intervals = DATA_SIZE / lba_size;
+	metabuffer_tx_size = intervals * metadata_size;
+	metabuffer_size = metabuffer_tx_size * 2;
+
+	if (posix_memalign(&orig_data_buf, pagesize, DATA_SIZE)) {
+		perror("posix_memalign failed for data buffer");
+		ret = T_EXIT_FAIL;
+		goto close;
+	}
+
+	if (posix_memalign(&orig_pi_buf, pagesize, metabuffer_size)) {
+		perror("posix_memalign failed for metadata buffer");
+		ret = T_EXIT_FAIL;
+		goto free;
+	}
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret < 0) {
+		perror("io_uring_queue_init failed");
+		goto cleanup;
+	}
+
+	data_buf = orig_data_buf;
+	for (offset = 0; offset < 512; offset++) {
+		void *pi_buf = (char *)orig_pi_buf + offset * 4;
+		struct io_uring_attr_pi pi_attr = {
+			.addr = (__u64)pi_buf,
+			.seed = offset,
+			.len = metabuffer_tx_size,
+		};
+
+		if (reftag_enabled)
+			pi_attr.flags = IO_INTEGRITY_CHK_REFTAG;
+
+		init_data(data_buf, offset);
+		init_metadata(pi_buf, intervals, offset);
+
+		sqe = io_uring_get_sqe(&ring);
+		if (!sqe) {
+			fprintf(stderr, "Failed to get SQE\n");
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		io_uring_prep_write(sqe, fd, data_buf, DATA_SIZE, offset * lba_size * 8);
+		io_uring_sqe_set_data(sqe, (void *)1L);
+
+		sqe->attr_type_mask = IORING_RW_ATTR_FLAG_PI;
+		sqe->attr_ptr = (__u64)&pi_attr;
+
+		ret = io_uring_submit(&ring);
+		if (ret < 1) {
+			perror("io_uring_submit failed (WRITE)");
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret < 0) {
+			perror("io_uring_wait_cqe failed (WRITE)");
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		if (cqe->res < 0) {
+			fprintf(stderr, "write failed at offset %d: %s\n",
+				offset, strerror(-cqe->res));
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		io_uring_cqe_seen(&ring, cqe);
+
+		memset(data_buf, 0, DATA_SIZE);
+		memset(pi_buf, 0, metabuffer_tx_size);
+
+		sqe = io_uring_get_sqe(&ring);
+		if (!sqe) {
+			fprintf(stderr, "failed to get SQE\n");
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		io_uring_prep_read(sqe, fd, data_buf, DATA_SIZE, offset * lba_size * 8);
+		io_uring_sqe_set_data(sqe, (void *)2L);
+
+		sqe->attr_type_mask = IORING_RW_ATTR_FLAG_PI;
+		sqe->attr_ptr = (__u64)&pi_attr;
+
+		ret = io_uring_submit(&ring);
+		if (ret < 1) {
+			perror("io_uring_submit failed (read)");
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "io_uring_wait_cqe failed (read): %s\n", strerror(-ret));
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		if (cqe->res < 0) {
+			fprintf(stderr, "read failed at offset %d: %s\n",
+				offset, strerror(-cqe->res));
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		ret = check_data(data_buf, offset);
+		if (ret) {
+			fprintf(stderr, "data corruption at offset %d\n",
+				offset);
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		ret = check_metadata(pi_buf, intervals, offset);
+		if (ret) {
+			fprintf(stderr, "metadata corruption at offset %d\n",
+				offset);
+			ret = T_EXIT_FAIL;
+			goto ring_exit;
+		}
+
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	memset(data_buf, 0, DATA_SIZE);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_write(sqe, fd, data_buf, DATA_SIZE, 0);
+	io_uring_sqe_set_data(sqe, (void *)1L);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_write(sqe, fd, data_buf, DATA_SIZE, DATA_SIZE);
+	io_uring_sqe_set_data(sqe, (void *)2L);
+
+	io_uring_submit(&ring);
+
+	io_uring_wait_cqe(&ring, &cqe);
+	io_uring_cqe_seen(&ring, cqe);
+	io_uring_wait_cqe(&ring, &cqe);
+	io_uring_cqe_seen(&ring, cqe);
+ring_exit:
+    io_uring_queue_exit(&ring);
+cleanup:
+    free(orig_pi_buf);
+free:
+    free(orig_data_buf);
+close:
+    close(fd);
+    return ret;
+}
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-11-08  0:27 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-07  4:29 [PATCH] liburing: add test for metadata Keith Busch
2025-11-08  0:27 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).