* [PATCH] block: always allocate integrity buffer when required
@ 2025-05-08 17:58 Keith Busch
2025-05-09 4:19 ` Christoph Hellwig
0 siblings, 1 reply; 4+ messages in thread
From: Keith Busch @ 2025-05-08 17:58 UTC (permalink / raw)
To: axboe, linux-block, martin.petersen, hch; +Cc: linux-nvme, Keith Busch
From: Keith Busch <kbusch@kernel.org>
Many nvme metadata formats can not strip or generate the metadata on the
controller side. For these, a host provided integrity buffer is
mandatory, even if it isn't checked.
The block integrity read_verify and write_generate attributes prevent
allocating the metadata buffer, but we need it when the format requires
it, otherwise reads and writes will be rejected by the driver with IO
errors.
Add a new blk_integrity flag to indicate if the disk format requires an
integrity buffer. When this flag is set, provide an unchecked buffer if
the sysfs attributes disabled verify or generation. This fixes the
following nvme warning:
------------[ cut here ]------------
WARNING: CPU: 1 PID: 371 at drivers/nvme/host/core.c:1036 nvme_setup_rw+0x122/0x210
...
RIP: 0010:nvme_setup_rw+0x122/0x210
...
Call Trace:
<TASK>
nvme_setup_cmd+0x1b4/0x280
nvme_queue_rqs+0xc4/0x1f0 [nvme]
blk_mq_dispatch_queue_requests+0x24a/0x430
blk_mq_flush_plug_list+0x50/0x140
__blk_flush_plug+0xc1/0x100
__submit_bio+0x1c1/0x360
? submit_bio_noacct_nocheck+0x2d6/0x3c0
submit_bio_noacct_nocheck+0x2d6/0x3c0
? submit_bio_noacct+0x47/0x4c0
submit_bio_wait+0x48/0xa0
__blkdev_direct_IO_simple+0xee/0x210
? current_time+0x1d/0x100
? current_time+0x1d/0x100
? __bio_clone+0xb0/0xb0
blkdev_read_iter+0xbb/0x140
vfs_read+0x239/0x310
ksys_read+0x58/0xc0
do_syscall_64+0x6c/0x180
entry_SYSCALL_64_after_hwframe+0x4b/0x53
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
v2->v3:
Use a helper function to test if an integrity payload needs to be
checked. And use all the different kinds of check flags.
Allocate an unchecked buffer only if the integrity format requires it.
The nvme formats that don't work with PRACT is the first subscriber to
this new flag.
block/bio-integrity-auto.c | 44 +++++++++++++++++++++++------------
drivers/nvme/host/core.c | 3 +++
include/linux/blk-integrity.h | 1 +
3 files changed, 33 insertions(+), 15 deletions(-)
diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
index e524c609be506..da07212087e06 100644
--- a/block/bio-integrity-auto.c
+++ b/block/bio-integrity-auto.c
@@ -43,6 +43,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
bio_endio(bio);
}
+#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
+static bool bip_should_check(struct bio_integrity_payload *bip)
+{
+ return bip->bip_flags & BIP_CHECK_FLAGS;
+}
+
/**
* __bio_integrity_endio - Integrity I/O completion function
* @bio: Protected bio
@@ -54,12 +60,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
*/
bool __bio_integrity_endio(struct bio *bio)
{
- struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
struct bio_integrity_data *bid =
container_of(bip, struct bio_integrity_data, bip);
- if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
+ if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
+ bip_should_check(bip)) {
INIT_WORK(&bid->work, bio_integrity_verify_fn);
queue_work(kintegrityd_wq, &bid->work);
return false;
@@ -84,6 +90,7 @@ bool bio_integrity_prep(struct bio *bio)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_data *bid;
+ bool set_flags = true;
gfp_t gfp = GFP_NOIO;
unsigned int len;
void *buf;
@@ -100,19 +107,24 @@ bool bio_integrity_prep(struct bio *bio)
switch (bio_op(bio)) {
case REQ_OP_READ:
- if (bi->flags & BLK_INTEGRITY_NOVERIFY)
- return true;
+ if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
+ if (!(bi->flags & BLK_INTEGRITY_BUFFER_REQUIRED))
+ return true;
+ set_flags = false;
+ }
break;
case REQ_OP_WRITE:
- if (bi->flags & BLK_INTEGRITY_NOGENERATE)
- return true;
-
/*
* Zero the memory allocated to not leak uninitialized kernel
* memory to disk for non-integrity metadata where nothing else
* initializes the memory.
*/
- if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
+ if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
+ if (!(bi->flags & BLK_INTEGRITY_BUFFER_REQUIRED))
+ return true;
+ gfp |= __GFP_ZERO;
+ set_flags = false;
+ } else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
gfp |= __GFP_ZERO;
break;
default:
@@ -137,19 +149,21 @@ bool bio_integrity_prep(struct bio *bio)
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
- if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
- bid->bip.bip_flags |= BIP_IP_CHECKSUM;
- if (bi->csum_type)
- bid->bip.bip_flags |= BIP_CHECK_GUARD;
- if (bi->flags & BLK_INTEGRITY_REF_TAG)
- bid->bip.bip_flags |= BIP_CHECK_REFTAG;
+ if (set_flags) {
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
+ bid->bip.bip_flags |= BIP_IP_CHECKSUM;
+ if (bi->csum_type)
+ bid->bip.bip_flags |= BIP_CHECK_GUARD;
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
+ bid->bip.bip_flags |= BIP_CHECK_REFTAG;
+ }
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf)) < len)
goto err_end_io;
/* Auto-generate integrity metadata if this is a write */
- if (bio_data_dir(bio) == WRITE)
+ if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
blk_integrity_generate(bio);
else
bid->saved_bio_iter = bio->bi_iter;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index af871d268fcb6..f0992a4e59512 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1867,6 +1867,9 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
break;
}
+ if (!nvme_ns_has_pi(head))
+ bi->flags |= BLK_INTEGRITY_BUFFER_REQUIRED;
+
bi->tuple_size = head->ms;
bi->pi_offset = info->pi_offset;
return true;
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index c7eae0bfb013f..ad657d086e715 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -13,6 +13,7 @@ enum blk_integrity_flags {
BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2,
BLK_INTEGRITY_REF_TAG = 1 << 3,
BLK_INTEGRITY_STACKED = 1 << 4,
+ BLK_INTEGRITY_BUFFER_REQUIRED = 1 << 5,
};
const char *blk_integrity_profile_name(struct blk_integrity *bi);
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH] block: always allocate integrity buffer when required
2025-05-08 17:58 [PATCH] block: always allocate integrity buffer when required Keith Busch
@ 2025-05-09 4:19 ` Christoph Hellwig
2025-05-09 14:33 ` Keith Busch
0 siblings, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2025-05-09 4:19 UTC (permalink / raw)
To: Keith Busch
Cc: axboe, linux-block, martin.petersen, hch, linux-nvme, Keith Busch
On Thu, May 08, 2025 at 10:58:14AM -0700, Keith Busch wrote:
> Add a new blk_integrity flag to indicate if the disk format requires an
> integrity buffer. When this flag is set, provide an unchecked buffer if
> the sysfs attributes disabled verify or generation. This fixes the
> following nvme warning:
Do we even need the flag? I think we could just deduce it from
tag_size < tuple_size, which feels more robust.
Otherwise this looks good to me.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] block: always allocate integrity buffer when required
2025-05-09 4:19 ` Christoph Hellwig
@ 2025-05-09 14:33 ` Keith Busch
2025-05-09 14:39 ` Christoph Hellwig
0 siblings, 1 reply; 4+ messages in thread
From: Keith Busch @ 2025-05-09 14:33 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Keith Busch, axboe, linux-block, martin.petersen, linux-nvme
On Fri, May 09, 2025 at 06:19:49AM +0200, Christoph Hellwig wrote:
> On Thu, May 08, 2025 at 10:58:14AM -0700, Keith Busch wrote:
> > Add a new blk_integrity flag to indicate if the disk format requires an
> > integrity buffer. When this flag is set, provide an unchecked buffer if
> > the sysfs attributes disabled verify or generation. This fixes the
> > following nvme warning:
>
> Do we even need the flag? I think we could just deduce it from
> tag_size < tuple_size, which feels more robust.
It looks like tag_size just refers to the space for the "application
tag", which can vary depending on if ref_tag is used or not. But it's
always going to be smaller than the tuple size.
I think you mean 'if tuple_size == sizeof(struct {t10|crc64}_pi_tuple)',
depending on which csum type is used. I introduced a new flag because I
thought that gen/strip property was just an arbitrary decision that NVMe
made for PRACT, but if it's a universal thing, then we can totally use
that.
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH] block: always allocate integrity buffer when required
2025-05-09 14:33 ` Keith Busch
@ 2025-05-09 14:39 ` Christoph Hellwig
0 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2025-05-09 14:39 UTC (permalink / raw)
To: Keith Busch
Cc: Christoph Hellwig, Keith Busch, axboe, linux-block,
martin.petersen, linux-nvme
On Fri, May 09, 2025 at 08:33:00AM -0600, Keith Busch wrote:
> It looks like tag_size just refers to the space for the "application
> tag", which can vary depending on if ref_tag is used or not. But it's
> always going to be smaller than the tuple size.
>
> I think you mean 'if tuple_size == sizeof(struct {t10|crc64}_pi_tuple)',
> depending on which csum type is used.
Ah yes, of course.
> I introduced a new flag because I
> thought that gen/strip property was just an arbitrary decision that NVMe
> made for PRACT, but if it's a universal thing, then we can totally use
> that.
There is no other protocol supporting mixed PI / totally user defined
metadata. But strip/insert can't be supported if there is more metadata
than the PI tuple, so I suspect anyone else picking it up would have
to treat it that way. If I'm wrong and someone will eventually do
something else we can still go down the flag route when needed.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-05-09 14:39 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-08 17:58 [PATCH] block: always allocate integrity buffer when required Keith Busch
2025-05-09 4:19 ` Christoph Hellwig
2025-05-09 14:33 ` Keith Busch
2025-05-09 14:39 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox