From: Keith Busch <kbusch@meta.com>
To: <linux-block@vger.kernel.org>
Cc: <hch@lst.de>, <linux-nvme@lists.infradead.org>,
Keith Busch <kbusch@kernel.org>,
"Martin K. Petersen" <martin.petersen@oracle.com>,
Jens Axboe <axboe@kernel.dk>
Subject: [PATCHv2] block: always allocate integrity buffer
Date: Wed, 7 May 2025 12:14:24 -0700 [thread overview]
Message-ID: <20250507191424.2436350-1-kbusch@meta.com> (raw)
From: Keith Busch <kbusch@kernel.org>
The integrity buffer, whether or not you want it generated or verified, is
mandatory for nvme formats that have metadata. The block integrity attributes
read_verify and write_generate had been stopping the metadata buffer from being
allocated and attached to the bio entirely. We only want to suppress the
protection checks on the device and host, but we still need the buffer.
Otherwise, reads and writes will just get IO errors and this nvme warning:
------------[ cut here ]------------
WARNING: CPU: 1 PID: 371 at drivers/nvme/host/core.c:1036 nvme_setup_rw+0x122/0x210
...
RIP: 0010:nvme_setup_rw+0x122/0x210
...
Call Trace:
<TASK>
nvme_setup_cmd+0x1b4/0x280
nvme_queue_rqs+0xc4/0x1f0 [nvme]
blk_mq_dispatch_queue_requests+0x24a/0x430
blk_mq_flush_plug_list+0x50/0x140
__blk_flush_plug+0xc1/0x100
__submit_bio+0x1c1/0x360
? submit_bio_noacct_nocheck+0x2d6/0x3c0
submit_bio_noacct_nocheck+0x2d6/0x3c0
? submit_bio_noacct+0x47/0x4c0
submit_bio_wait+0x48/0xa0
__blkdev_direct_IO_simple+0xee/0x210
? current_time+0x1d/0x100
? current_time+0x1d/0x100
? __bio_clone+0xb0/0xb0
blkdev_read_iter+0xbb/0x140
vfs_read+0x239/0x310
ksys_read+0x58/0xc0
do_syscall_64+0x6c/0x180
entry_SYSCALL_64_after_hwframe+0x4b/0x53
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
v1->v2:
The bip_flags are initialized based on the the bi->flags and don't
change for the lifetime of the bio. Check this instead to avoid any
races with bi->flags changing by a user modifying the read_verify and
write_generate attributes.
Check if we can skip the verify step before scheduling the deferred
completion work.
block/bio-integrity-auto.c | 37 ++++++++++++++++++++++---------------
1 file changed, 22 insertions(+), 15 deletions(-)
diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
index e524c609be506..2c43e27b332ca 100644
--- a/block/bio-integrity-auto.c
+++ b/block/bio-integrity-auto.c
@@ -54,12 +54,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
*/
bool __bio_integrity_endio(struct bio *bio)
{
- struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
struct bio_integrity_data *bid =
container_of(bip, struct bio_integrity_data, bip);
- if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
+ if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
+ bip->bip_flags & BIP_CHECK_GUARD) {
INIT_WORK(&bid->work, bio_integrity_verify_fn);
queue_work(kintegrityd_wq, &bid->work);
return false;
@@ -69,6 +69,16 @@ bool __bio_integrity_endio(struct bio *bio)
return true;
}
+static inline void bio_set_bip_flags(struct blk_integrity *bi, u16 *bip_flags)
+{
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
+ *bip_flags |= BIP_IP_CHECKSUM;
+ if (bi->csum_type)
+ *bip_flags |= BIP_CHECK_GUARD;
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
+ *bip_flags |= BIP_CHECK_REFTAG;
+}
+
/**
* bio_integrity_prep - Prepare bio for integrity I/O
* @bio: bio to prepare
@@ -83,6 +93,7 @@ bool __bio_integrity_endio(struct bio *bio)
bool bio_integrity_prep(struct bio *bio)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ unsigned short bip_flags = BIP_BLOCK_INTEGRITY;
struct bio_integrity_data *bid;
gfp_t gfp = GFP_NOIO;
unsigned int len;
@@ -101,19 +112,22 @@ bool bio_integrity_prep(struct bio *bio)
switch (bio_op(bio)) {
case REQ_OP_READ:
if (bi->flags & BLK_INTEGRITY_NOVERIFY)
- return true;
+ break;
+ bio_set_bip_flags(bi, &bip_flags);
break;
case REQ_OP_WRITE:
- if (bi->flags & BLK_INTEGRITY_NOGENERATE)
- return true;
-
/*
* Zero the memory allocated to not leak uninitialized kernel
* memory to disk for non-integrity metadata where nothing else
* initializes the memory.
*/
+ if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
+ gfp |= __GFP_ZERO;
+ break;
+ }
if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
gfp |= __GFP_ZERO;
+ bio_set_bip_flags(bi, &bip_flags);
break;
default:
return true;
@@ -134,22 +148,15 @@ bool bio_integrity_prep(struct bio *bio)
bid->bio = bio;
- bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
+ bid->bip.bip_flags = bip_flags;
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
- if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
- bid->bip.bip_flags |= BIP_IP_CHECKSUM;
- if (bi->csum_type)
- bid->bip.bip_flags |= BIP_CHECK_GUARD;
- if (bi->flags & BLK_INTEGRITY_REF_TAG)
- bid->bip.bip_flags |= BIP_CHECK_REFTAG;
-
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf)) < len)
goto err_end_io;
/* Auto-generate integrity metadata if this is a write */
- if (bio_data_dir(bio) == WRITE)
+ if (bio_data_dir(bio) == WRITE && bid->bip.bip_flags & BIP_CHECK_GUARD)
blk_integrity_generate(bio);
else
bid->saved_bio_iter = bio->bi_iter;
--
2.47.1
next reply other threads:[~2025-05-07 19:14 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-07 19:14 Keith Busch [this message]
2025-05-07 22:31 ` [PATCHv2] block: always allocate integrity buffer Martin K. Petersen
2025-05-08 5:15 ` Christoph Hellwig
2025-05-08 5:12 ` Christoph Hellwig
2025-05-08 16:14 ` Keith Busch
2025-05-08 16:19 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250507191424.2436350-1-kbusch@meta.com \
--to=kbusch@meta.com \
--cc=axboe@kernel.dk \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=martin.petersen@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox