From: Chaitanya Kulkarni <chaitanyak@nvidia.com>
To: Keith Busch <kbusch@meta.com>,
"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
"hch@lst.de" <hch@lst.de>,
"shinichiro.kawasaki@wdc.com" <shinichiro.kawasaki@wdc.com>
Cc: Keith Busch <kbusch@kernel.org>
Subject: Re: [PATCH blktests] io_uring user metadata offset test
Date: Sat, 8 Nov 2025 03:26:20 +0000 [thread overview]
Message-ID: <6de276d4-ef64-48db-b756-284bf66ec420@nvidia.com> (raw)
In-Reply-To: <20251107231836.1280881-1-kbusch@meta.com>
On 11/7/25 15:18, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> For devices with metadata, tests various userspace offsets with
> io_uring capabilities. If the metadata is formatted with ref tag
> protection information, test various seed offsets as well.
>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
> src/.gitignore | 1 +
> src/Makefile | 7 +-
> src/metadata.c | 481 ++++++++++++++++++++++++++++++++++++++++++++
> tests/block/043 | 27 +++
> tests/block/043.out | 2 +
> 5 files changed, 515 insertions(+), 3 deletions(-)
> create mode 100644 src/metadata.c
> create mode 100755 tests/block/043
> create mode 100644 tests/block/043.out
>
> diff --git a/src/.gitignore b/src/.gitignore
> index 865675c..e6c6c38 100644
> --- a/src/.gitignore
> +++ b/src/.gitignore
> @@ -3,6 +3,7 @@
> /loblksize
> /loop_change_fd
> /loop_get_status_null
> +/metadata
> /mount_clear_sock
> /nbdsetsize
> /openclose
> diff --git a/src/Makefile b/src/Makefile
> index 179a673..7146db0 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -22,7 +22,8 @@ C_TARGETS := \
> sg/syzkaller1 \
> zbdioctl
>
> -C_MINIUBLK := miniublk
> +C_MINIUBLK := miniublk \
> + metadata
>
> HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD)
> HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1)
> @@ -61,8 +62,8 @@ $(C_TARGETS): %: %.c
> $(CXX_TARGETS): %: %.cpp
> $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^
>
> -$(C_MINIUBLK): %: miniublk.c
> - $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c \
> +$(C_MINIUBLK): %: %.c
> + $(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ $^ \
> $(MINIUBLK_LIBS)
>
> .PHONY: all clean install
> diff --git a/src/metadata.c b/src/metadata.c
> new file mode 100644
> index 0000000..4628299
> --- /dev/null
> +++ b/src/metadata.c
> @@ -0,0 +1,481 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Description: test userspace metadata
> + */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
> +#include <liburing.h>
> +
> +#ifndef IORING_RW_ATTR_FLAG_PI
> +#define PI_URING_COMPAT
> +#define IORING_RW_ATTR_FLAG_PI (1U << 0)
> +/* PI attribute information */
> +struct io_uring_attr_pi {
> + __u16 flags;
> + __u16 app_tag;
> + __u32 len;
> + __u64 addr;
> + __u64 seed;
> + __u64 rsvd;
> +};
> +#endif
> +
> +#ifndef FS_IOC_GETLBMD_CAP
> +/* Protection info capability flags */
> +#define LBMD_PI_CAP_INTEGRITY (1 << 0)
> +#define LBMD_PI_CAP_REFTAG (1 << 1)
> +
> +/* Checksum types for Protection Information */
> +#define LBMD_PI_CSUM_NONE 0
> +#define LBMD_PI_CSUM_IP 1
> +#define LBMD_PI_CSUM_CRC16_T10DIF 2
> +#define LBMD_PI_CSUM_CRC64_NVME 4
> +
> +/*
> + * Logical block metadata capability descriptor
> + * If the device does not support metadata, all the fields will be zero.
> + * Applications must check lbmd_flags to determine whether metadata is
> + * supported or not.
> + */
> +struct logical_block_metadata_cap {
> + /* Bitmask of logical block metadata capability flags */
> + __u32 lbmd_flags;
> + /*
> + * The amount of data described by each unit of logical block
> + * metadata
> + */
> + __u16 lbmd_interval;
> + /*
> + * Size in bytes of the logical block metadata associated with each
> + * interval
> + */
> + __u8 lbmd_size;
> + /*
> + * Size in bytes of the opaque block tag associated with each
> + * interval
> + */
> + __u8 lbmd_opaque_size;
> + /*
> + * Offset in bytes of the opaque block tag within the logical block
> + * metadata
> + */
> + __u8 lbmd_opaque_offset;
> + /* Size in bytes of the T10 PI tuple associated with each interval */
> + __u8 lbmd_pi_size;
> + /* Offset in bytes of T10 PI tuple within the logical block metadata */
> + __u8 lbmd_pi_offset;
> + /* T10 PI guard tag type */
> + __u8 lbmd_guard_tag_type;
> + /* Size in bytes of the T10 PI application tag */
> + __u8 lbmd_app_tag_size;
> + /* Size in bytes of the T10 PI reference tag */
> + __u8 lbmd_ref_tag_size;
> + /* Size in bytes of the T10 PI storage tag */
> + __u8 lbmd_storage_tag_size;
> + __u8 pad;
> +};
> +
> +#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap)
> +#endif /* FS_IOC_GETLBMD_CAP */
> +
> +#ifndef IO_INTEGRITY_CHK_GUARD
> +/* flags for integrity meta */
> +#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
> +#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
> +#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
> +#endif /* IO_INTEGRITY_CHK_GUARD */
> +
> +/* This size should guarantee at least one split */
> +#define DATA_SIZE (8 * 1024 * 1024)
> +
> +static unsigned short lba_size;
> +static unsigned char metadata_size;
> +static unsigned char pi_size;
> +static unsigned char pi_offset;
nit:- do you really need above global duplication ?
you can just declare struct logical_block_metadata_cap md_cap
globally use md_cap.xxx ?
> +static bool reftag_enabled;
> +
> +static long pagesize;
> +
> +struct t10_pi_tuple {
> + __be16 guard_tag; /* Checksum */
> + __be16 app_tag; /* Opaque storage */
> + __be32 ref_tag; /* Target LBA or indirect LBA */
> +};
> +
> +struct crc64_pi_tuple {
> + __be64 guard_tag;
> + __be16 app_tag;
> + __u8 ref_tag[6];
> +};
> +
> +static int init_capabilities(int fd)
> +{
> + struct logical_block_metadata_cap md_cap;
> + int ret;
> +
> + ret = ioctl(fd, FS_IOC_GETLBMD_CAP, &md_cap);
> + if (ret < 0)
> + return ret;
fprintf(stderr, "FS_IOC_GETLBMD_CAP failed: %s\n", strerror(errno)); ?
> +
> + lba_size = md_cap.lbmd_interval;
> + metadata_size = md_cap.lbmd_size;
> + pi_size = md_cap.lbmd_pi_size;
> + pi_offset = md_cap.lbmd_pi_offset;
> + reftag_enabled = md_cap.lbmd_flags & LBMD_PI_CAP_REFTAG;
> +
> + pagesize = sysconf(_SC_PAGE_SIZE);
> + return 0;
> +}
> +
> +static unsigned int swap(unsigned int value)
> +{
> + return ((value >> 24) & 0x000000ff) |
> + ((value >> 8) & 0x0000ff00) |
> + ((value << 8) & 0x00ff0000) |
> + ((value << 24) & 0xff000000);
> +}
> +
> +static inline void __put_unaligned_be48(const __u64 val, __u8 *p)
> +{
> + *p++ = (val >> 40) & 0xff;
> + *p++ = (val >> 32) & 0xff;
> + *p++ = (val >> 24) & 0xff;
> + *p++ = (val >> 16) & 0xff;
> + *p++ = (val >> 8) & 0xff;
> + *p++ = val & 0xff;
> +}
> +
> +static inline void put_unaligned_be48(const __u64 val, void *p)
> +{
> + __put_unaligned_be48(val, p);
> +}
> +
> +static inline __u64 __get_unaligned_be48(const __u8 *p)
> +{
> + return (__u64)p[0] << 40 | (__u64)p[1] << 32 | (__u64)p[2] << 24 |
> + p[3] << 16 | p[4] << 8 | p[5];
> +}
> +
> +static inline __u64 get_unaligned_be48(const void *p)
> +{
> + return __get_unaligned_be48(p);
> +}
> +
> +static void init_metadata(void *p, int intervals, int ref)
> +{
> + int i, j;
> +
> + for (i = 0; i < intervals; i++, ref++) {
> + int remaining = metadata_size - pi_offset;
> + unsigned char *m = p;
> +
> + for (j = 0; j < pi_offset; j++)
> + m[j] = (unsigned char)(ref + j + i);
> +
> + p += pi_offset;
> + if (reftag_enabled) {
> + if (pi_size == 8) {
> + struct t10_pi_tuple *tuple = p;
> +
> + tuple->ref_tag = swap(ref);
> + remaining -= sizeof(*tuple);
> + p += sizeof(*tuple);
> + } else if (pi_size == 16) {
> + struct crc64_pi_tuple *tuple = p;
> +
> + __put_unaligned_be48(ref, tuple->ref_tag);
> + remaining -= sizeof(*tuple);
> + p += sizeof(*tuple);
> + }
> + }
> +
> + m = p;
> + for (j = 0; j < remaining; j++)
> + m[j] = (unsigned char)~(ref + j + i);
> +
> + p += remaining;
> + }
> +}
> +
> +static int check_metadata(void *p, int intervals, int ref)
> +{
> + int i, j;
> +
> + for (i = 0; i < intervals; i++, ref++) {
> + int remaining = metadata_size - pi_offset;
> + unsigned char *m = p;
> +
> + for (j = 0; j < pi_offset; j++) {
> + if (m[j] != (unsigned char)(ref + j + i)) {
> + fprintf(stderr, "(pre)interval:%d byte:%d expected:%x got:%x\n",
> + i, j, (unsigned char)(ref + j + i), m[j]);
> + return -1;
> + }
> + }
> +
> + p += pi_offset;
> + if (reftag_enabled) {
> + if (pi_size == 8) {
> + struct t10_pi_tuple *tuple = p;
> +
> + if (swap(tuple->ref_tag) != ref) {
> + fprintf(stderr, "reftag interval:%d expected:%x got:%x\n",
> + i, ref, swap(tuple->ref_tag));
> + return -1;
> + }
> +
> + remaining -= sizeof(*tuple);
> + p += sizeof(*tuple);
> + } else if (pi_size == 16) {
> + struct crc64_pi_tuple *tuple = p;
> + __u64 v = get_unaligned_be48(tuple->ref_tag);
> +
> + if (v != ref) {
> + fprintf(stderr, "reftag interval:%d expected:%x got:%llx\n",
> + i, ref, v);
> + return -1;
> + }
> + remaining -= sizeof(*tuple);
> + p += sizeof(*tuple);
> + }
> + }
> +
> + m = p;
> + for (j = 0; j < remaining; j++) {
> + if (m[j] != (unsigned char)~(ref + j + i)) {
> + fprintf(stderr, "(post)interval:%d byte:%d expected:%x got:%x\n",
> + i, j, (unsigned char)~(ref + j + i), m[j]);
> + return -1;
> + }
> + }
> +
> + p += remaining;
> + }
> +
> + return 0;
> +}
> +
> +static int init_data(void *data, int offset)
> +{
> + unsigned char *d = data;
> + int i;
> +
> + for (i = 0; i < DATA_SIZE; i++)
> + d[i] = (unsigned char)(0xaa + offset + i);
> +
> + return 0;
> +}
> +
it should be :-
static int init_data(void *data, int offset)
{
unsigned char *d = data;
int i;
for (i = 0; i < DATA_SIZE; i++)
d[i] = (unsigned char)(0xaa + offset + i);
}
> +static int check_data(void *data, int offset)
> +{
> + unsigned char *d = data;
> + int i;
> +
> + for (i = 0; i < DATA_SIZE; i++)
> + if (d[i] != (unsigned char)(0xaa + offset + i))
> + return -1;
> +
> + return 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + int fd, ret, offset, intervals, metabuffer_size, metabuffer_tx_size;
> + void *orig_data_buf, *orig_pi_buf, *data_buf;
> + struct io_uring_sqe *sqe;
> + struct io_uring_cqe *cqe;
> + struct io_uring ring;
> +
> + if (argc < 2) {
> + fprintf(stderr, "Usage: %s <dev>\n", argv[0]);
> + return 1;
> + }
> +
> + fd = open(argv[1], O_RDWR | O_DIRECT);
> + if (fd < 0) {
> + perror("Failed to open device with O_DIRECT");
> + return 1;
> + }
> +
> + ret = init_capabilities(fd);
> + if (ret < 0)
> + return 1;
> + if (lba_size == 0 || metadata_size == 0)
> + return 1;
> +
> + intervals = DATA_SIZE / lba_size;
> + metabuffer_tx_size = intervals * metadata_size;
> + metabuffer_size = metabuffer_tx_size * 2;
> +
> + if (posix_memalign(&orig_data_buf, pagesize, DATA_SIZE)) {
> + perror("posix_memalign failed for data buffer");
> + ret = 1;
> + goto close;
> + }
> +
> + if (posix_memalign(&orig_pi_buf, pagesize, metabuffer_size)) {
> + perror("posix_memalign failed for metadata buffer");
> + ret = 1;
> + goto free;
> + }
> +
> + ret = io_uring_queue_init(8, &ring, 0);
> + if (ret < 0) {
> + perror("io_uring_queue_init failed");
> + goto cleanup;
> + }
> +
> + data_buf = orig_data_buf;
both are not modified after above assignment till program exit
why can't we just use one instead of both ?
-ck
next prev parent reply other threads:[~2025-11-08 3:26 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-07 23:18 [PATCH blktests] io_uring user metadata offset test Keith Busch
2025-11-08 2:09 ` Chaitanya Kulkarni
2025-11-19 16:50 ` Keith Busch
2025-11-08 3:26 ` Chaitanya Kulkarni [this message]
2025-11-11 7:58 ` Shinichiro Kawasaki
2025-11-19 16:10 ` Keith Busch
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=6de276d4-ef64-48db-b756-284bf66ec420@nvidia.com \
--to=chaitanyak@nvidia.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=kbusch@meta.com \
--cc=linux-block@vger.kernel.org \
--cc=shinichiro.kawasaki@wdc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox