public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
From: Chaitanya Kulkarni <chaitanyak@nvidia.com>
To: Keith Busch <kbusch@meta.com>,
	"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
	"hch@lst.de" <hch@lst.de>,
	"shinichiro.kawasaki@wdc.com" <shinichiro.kawasaki@wdc.com>
Cc: Keith Busch <kbusch@kernel.org>
Subject: Re: [PATCH blktests] io_uring user metadata offset test
Date: Sat, 8 Nov 2025 03:26:20 +0000	[thread overview]
Message-ID: <6de276d4-ef64-48db-b756-284bf66ec420@nvidia.com> (raw)
In-Reply-To: <20251107231836.1280881-1-kbusch@meta.com>

On 11/7/25 15:18, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> For devices with metadata, tests various userspace offsets with
> io_uring capabilities. If the metadata is formatted with ref tag
> protection information, test various seed offsets as well.
>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>   src/.gitignore      |   1 +
>   src/Makefile        |   7 +-
>   src/metadata.c      | 481 ++++++++++++++++++++++++++++++++++++++++++++
>   tests/block/043     |  27 +++
>   tests/block/043.out |   2 +
>   5 files changed, 515 insertions(+), 3 deletions(-)
>   create mode 100644 src/metadata.c
>   create mode 100755 tests/block/043
>   create mode 100644 tests/block/043.out
>
> diff --git a/src/.gitignore b/src/.gitignore
> index 865675c..e6c6c38 100644
> --- a/src/.gitignore
> +++ b/src/.gitignore
> @@ -3,6 +3,7 @@
>   /loblksize
>   /loop_change_fd
>   /loop_get_status_null
> +/metadata
>   /mount_clear_sock
>   /nbdsetsize
>   /openclose
> diff --git a/src/Makefile b/src/Makefile
> index 179a673..7146db0 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -22,7 +22,8 @@ C_TARGETS := \
>   	sg/syzkaller1 \
>   	zbdioctl
>   
> -C_MINIUBLK := miniublk
> +C_MINIUBLK := miniublk \
> +		metadata
>   
>   HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD)
>   HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1)
> @@ -61,8 +62,8 @@ $(C_TARGETS): %: %.c
>   $(CXX_TARGETS): %: %.cpp
>   	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^
>   
> -$(C_MINIUBLK): %: miniublk.c
> -	$(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c \
> +$(C_MINIUBLK): %: %.c
> +	$(CC) $(CFLAGS) $(LDFLAGS) $(MINIUBLK_FLAGS) -o $@ $^ \
>   		$(MINIUBLK_LIBS)
>   
>   .PHONY: all clean install
> diff --git a/src/metadata.c b/src/metadata.c
> new file mode 100644
> index 0000000..4628299
> --- /dev/null
> +++ b/src/metadata.c
> @@ -0,0 +1,481 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Description: test userspace metadata
> + */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <sys/ioctl.h>
> +#include <linux/fs.h>
> +#include <liburing.h>
> +
> +#ifndef IORING_RW_ATTR_FLAG_PI
> +#define PI_URING_COMPAT
> +#define IORING_RW_ATTR_FLAG_PI  (1U << 0)
> +/* PI attribute information */
> +struct io_uring_attr_pi {
> +	__u16   flags;
> +	__u16   app_tag;
> +	__u32   len;
> +	__u64   addr;
> +	__u64   seed;
> +	__u64   rsvd;
> +};
> +#endif
> +
> +#ifndef FS_IOC_GETLBMD_CAP
> +/* Protection info capability flags */
> +#define LBMD_PI_CAP_INTEGRITY           (1 << 0)
> +#define LBMD_PI_CAP_REFTAG              (1 << 1)
> +
> +/* Checksum types for Protection Information */
> +#define LBMD_PI_CSUM_NONE               0
> +#define LBMD_PI_CSUM_IP                 1
> +#define LBMD_PI_CSUM_CRC16_T10DIF       2
> +#define LBMD_PI_CSUM_CRC64_NVME         4
> +
> +/*
> + * Logical block metadata capability descriptor
> + * If the device does not support metadata, all the fields will be zero.
> + * Applications must check lbmd_flags to determine whether metadata is
> + * supported or not.
> + */
> +struct logical_block_metadata_cap {
> +	/* Bitmask of logical block metadata capability flags */
> +	__u32	lbmd_flags;
> +	/*
> +	 * The amount of data described by each unit of logical block
> +	 * metadata
> +	 */
> +	__u16	lbmd_interval;
> +	/*
> +	 * Size in bytes of the logical block metadata associated with each
> +	 * interval
> +	 */
> +	__u8	lbmd_size;
> +	/*
> +	 * Size in bytes of the opaque block tag associated with each
> +	 * interval
> +	 */
> +	__u8	lbmd_opaque_size;
> +	/*
> +	 * Offset in bytes of the opaque block tag within the logical block
> +	 * metadata
> +	 */
> +	__u8	lbmd_opaque_offset;
> +	/* Size in bytes of the T10 PI tuple associated with each interval */
> +	__u8	lbmd_pi_size;
> +	/* Offset in bytes of T10 PI tuple within the logical block metadata */
> +	__u8	lbmd_pi_offset;
> +	/* T10 PI guard tag type */
> +	__u8	lbmd_guard_tag_type;
> +	/* Size in bytes of the T10 PI application tag */
> +	__u8	lbmd_app_tag_size;
> +	/* Size in bytes of the T10 PI reference tag */
> +	__u8	lbmd_ref_tag_size;
> +	/* Size in bytes of the T10 PI storage tag */
> +	__u8	lbmd_storage_tag_size;
> +	__u8	pad;
> +};
> +
> +#define FS_IOC_GETLBMD_CAP                      _IOWR(0x15, 2, struct logical_block_metadata_cap)
> +#endif /* FS_IOC_GETLBMD_CAP */
> +
> +#ifndef IO_INTEGRITY_CHK_GUARD
> +/* flags for integrity meta */
> +#define IO_INTEGRITY_CHK_GUARD          (1U << 0) /* enforce guard check */
> +#define IO_INTEGRITY_CHK_REFTAG         (1U << 1) /* enforce ref check */
> +#define IO_INTEGRITY_CHK_APPTAG         (1U << 2) /* enforce app check */
> +#endif /* IO_INTEGRITY_CHK_GUARD */
> +
> +/* This size should guarantee at least one split */
> +#define DATA_SIZE (8 * 1024 * 1024)
> +
> +static unsigned short lba_size;
> +static unsigned char metadata_size;
> +static unsigned char pi_size;
> +static unsigned char pi_offset;

nit:- do you really need above global duplication ?
you can just declare struct logical_block_metadata_cap md_cap
globally use md_cap.xxx ?

> +static bool reftag_enabled;
> +
> +static long pagesize;
> +
> +struct t10_pi_tuple {
> +        __be16 guard_tag;       /* Checksum */
> +        __be16 app_tag;         /* Opaque storage */
> +        __be32 ref_tag;         /* Target LBA or indirect LBA */
> +};
> +
> +struct crc64_pi_tuple {
> +        __be64 guard_tag;
> +        __be16 app_tag;
> +        __u8   ref_tag[6];
> +};
> +
> +static int init_capabilities(int fd)
> +{
> +	struct logical_block_metadata_cap md_cap;
> +	int ret;
> +
> +	ret = ioctl(fd, FS_IOC_GETLBMD_CAP, &md_cap);
> +	if (ret < 0)
> +		return ret;

fprintf(stderr, "FS_IOC_GETLBMD_CAP failed: %s\n", strerror(errno)); ?

> +
> +	lba_size = md_cap.lbmd_interval;
> +	metadata_size = md_cap.lbmd_size;
> +	pi_size = md_cap.lbmd_pi_size;
> +	pi_offset = md_cap.lbmd_pi_offset;
> +	reftag_enabled = md_cap.lbmd_flags & LBMD_PI_CAP_REFTAG;
> +
> +	pagesize = sysconf(_SC_PAGE_SIZE);
> +	return 0;
> +}
> +
> +static unsigned int swap(unsigned int value)
> +{
> +	return ((value >> 24) & 0x000000ff) |
> +		((value >> 8)  & 0x0000ff00) |
> +		((value << 8)  & 0x00ff0000) |
> +		((value << 24) & 0xff000000);
> +}
> +
> +static inline void __put_unaligned_be48(const __u64 val, __u8 *p)
> +{
> +	*p++ = (val >> 40) & 0xff;
> +	*p++ = (val >> 32) & 0xff;
> +	*p++ = (val >> 24) & 0xff;
> +	*p++ = (val >> 16) & 0xff;
> +	*p++ = (val >> 8) & 0xff;
> +	*p++ = val & 0xff;
> +}
> +
> +static inline void put_unaligned_be48(const __u64 val, void *p)
> +{
> +	__put_unaligned_be48(val, p);
> +}
> +
> +static inline __u64 __get_unaligned_be48(const __u8 *p)
> +{
> +	return (__u64)p[0] << 40 | (__u64)p[1] << 32 | (__u64)p[2] << 24 |
> +		p[3] << 16 | p[4] << 8 | p[5];
> +}
> +
> +static inline __u64 get_unaligned_be48(const void *p)
> +{
> +	return __get_unaligned_be48(p);
> +}
> +
> +static void init_metadata(void *p, int intervals, int ref)
> +{
> +	int i, j;
> +
> +	for (i = 0; i < intervals; i++, ref++) {
> +		int remaining = metadata_size - pi_offset;
> +		unsigned char *m = p;
> +
> +		for (j = 0; j < pi_offset; j++)
> +			m[j] = (unsigned char)(ref + j + i);
> +
> +		p += pi_offset;
> +		if (reftag_enabled) {
> +			if (pi_size == 8) {
> +				struct t10_pi_tuple *tuple = p;
> +
> +				tuple->ref_tag = swap(ref);
> +				remaining -= sizeof(*tuple);
> +				p += sizeof(*tuple);
> +			} else if (pi_size == 16) {
> +				struct crc64_pi_tuple *tuple = p;
> +
> +				__put_unaligned_be48(ref, tuple->ref_tag);
> +				remaining -= sizeof(*tuple);
> +				p += sizeof(*tuple);
> +			}
> +		}
> +
> +		m = p;
> +		for (j = 0; j < remaining; j++)
> +			m[j] = (unsigned char)~(ref + j + i);
> +
> +		p += remaining;
> +	}
> +}
> +
> +static int check_metadata(void *p, int intervals, int ref)
> +{
> +	int i, j;
> +
> +	for (i = 0; i < intervals; i++, ref++) {
> +		int remaining = metadata_size - pi_offset;
> +		unsigned char *m = p;
> +
> +		for (j = 0; j < pi_offset; j++) {
> +			if (m[j] != (unsigned char)(ref + j + i)) {
> +				fprintf(stderr, "(pre)interval:%d byte:%d expected:%x got:%x\n",
> +					i, j, (unsigned char)(ref + j + i), m[j]);
> +				return -1;
> +			}
> +		}
> +
> +		p += pi_offset;
> +		if (reftag_enabled) {
> +			if (pi_size == 8) {
> +				struct t10_pi_tuple *tuple = p;
> +
> +				if (swap(tuple->ref_tag) != ref) {
> +					fprintf(stderr, "reftag interval:%d expected:%x got:%x\n",
> +						i, ref, swap(tuple->ref_tag));
> +					return -1;
> +				}
> +
> +				remaining -= sizeof(*tuple);
> +				p += sizeof(*tuple);
> +			} else if (pi_size == 16) {
> +				struct crc64_pi_tuple *tuple = p;
> +				__u64 v = get_unaligned_be48(tuple->ref_tag);
> +
> +				if (v != ref) {
> +					fprintf(stderr, "reftag interval:%d expected:%x got:%llx\n",
> +						i, ref, v);
> +					return -1;
> +				}
> +				remaining -= sizeof(*tuple);
> +				p += sizeof(*tuple);
> +			}
> +		}
> +
> +		m = p;
> +		for (j = 0; j < remaining; j++) {
> +			if (m[j] != (unsigned char)~(ref + j + i)) {
> +				fprintf(stderr, "(post)interval:%d byte:%d expected:%x got:%x\n",
> +					i, j, (unsigned char)~(ref + j + i), m[j]);
> +				return -1;
> +			}
> +		}
> +
> +		p += remaining;
> +	}
> +
> +	return 0;
> +}
> +
> +static int init_data(void *data, int offset)
> +{
> +	unsigned char *d = data;
> +	int i;
> +
> +	for (i = 0; i < DATA_SIZE; i++)
> +		d[i] = (unsigned char)(0xaa + offset + i);
> +
> +	return 0;
> +}
> +

it should be :-

static int init_data(void *data, int offset)
{
	unsigned char *d = data;
	int i;

	for (i = 0; i < DATA_SIZE; i++)
		d[i] = (unsigned char)(0xaa + offset + i);

}
  

> +static int check_data(void *data, int offset)
> +{
> +	unsigned char *d = data;
> +	int i;
> +
> +	for (i = 0; i < DATA_SIZE; i++)
> +		if (d[i] != (unsigned char)(0xaa + offset + i))
> +			return -1;
> +
> +	return 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	int fd, ret, offset, intervals, metabuffer_size, metabuffer_tx_size;
> +	void *orig_data_buf, *orig_pi_buf, *data_buf;
> +	struct io_uring_sqe *sqe;
> +	struct io_uring_cqe *cqe;
> +	struct io_uring ring;
> +
> +	if (argc < 2) {
> +		fprintf(stderr, "Usage: %s <dev>\n", argv[0]);
> +		return 1;
> +	}
> +
> +	fd = open(argv[1], O_RDWR | O_DIRECT);
> +	if (fd < 0) {
> +		perror("Failed to open device with O_DIRECT");
> +		return 1;
> +	}
> +
> +	ret = init_capabilities(fd);
> +	if (ret < 0)
> +		return 1;
> +	if (lba_size == 0 || metadata_size == 0)
> +		return 1;
> +
> +	intervals = DATA_SIZE / lba_size;
> +	metabuffer_tx_size = intervals * metadata_size;
> +	metabuffer_size = metabuffer_tx_size * 2;
> +
> +	if (posix_memalign(&orig_data_buf, pagesize, DATA_SIZE)) {
> +		perror("posix_memalign failed for data buffer");
> +		ret = 1;
> +		goto close;
> +	}
> +
> +	if (posix_memalign(&orig_pi_buf, pagesize, metabuffer_size)) {
> +		perror("posix_memalign failed for metadata buffer");
> +		ret = 1;
> +		goto free;
> +	}
> +
> +	ret = io_uring_queue_init(8, &ring, 0);
> +	if (ret < 0) {
> +		perror("io_uring_queue_init failed");
> +		goto cleanup;
> +	}
> +
> +	data_buf = orig_data_buf;

both are not modified after above assignment till program exit
why can't we just use one instead of both ?

-ck



  parent reply	other threads:[~2025-11-08  3:26 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-07 23:18 [PATCH blktests] io_uring user metadata offset test Keith Busch
2025-11-08  2:09 ` Chaitanya Kulkarni
2025-11-19 16:50   ` Keith Busch
2025-11-08  3:26 ` Chaitanya Kulkarni [this message]
2025-11-11  7:58 ` Shinichiro Kawasaki
2025-11-19 16:10   ` Keith Busch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6de276d4-ef64-48db-b756-284bf66ec420@nvidia.com \
    --to=chaitanyak@nvidia.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=kbusch@meta.com \
    --cc=linux-block@vger.kernel.org \
    --cc=shinichiro.kawasaki@wdc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox