linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv4] block: always allocate integrity buffer when required
@ 2025-05-09 15:38 Keith Busch
  2025-05-12  4:21 ` Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Keith Busch @ 2025-05-09 15:38 UTC (permalink / raw)
  To: axboe, linux-block, martin.petersen, hch; +Cc: linux-nvme, Keith Busch

From: Keith Busch <kbusch@kernel.org>

Many nvme metadata formats can not strip or generate the metadata on the
controller side. For these formats, a host provided integrity buffer is
mandatory even if it isn't checked.

The block integrity read_verify and write_generate attributes prevent
allocating the metadata buffer, but we need it when the format requires
it, otherwise reads and writes will be rejected by the driver with IO
errors.

Assume the integrity buffer can be offloaded to the controller if the
metadata size is the same as the protection information size. Otherwise
provide an unchecked host buffer when the read verify or write
generation attributes are disabled. This fixes the following nvme
warning:

 ------------[ cut here ]------------
 WARNING: CPU: 1 PID: 371 at drivers/nvme/host/core.c:1036 nvme_setup_rw+0x122/0x210
 ...
 RIP: 0010:nvme_setup_rw+0x122/0x210
 ...
 Call Trace:
  <TASK>
  nvme_setup_cmd+0x1b4/0x280
  nvme_queue_rqs+0xc4/0x1f0 [nvme]
  blk_mq_dispatch_queue_requests+0x24a/0x430
  blk_mq_flush_plug_list+0x50/0x140
  __blk_flush_plug+0xc1/0x100
  __submit_bio+0x1c1/0x360
  ? submit_bio_noacct_nocheck+0x2d6/0x3c0
  submit_bio_noacct_nocheck+0x2d6/0x3c0
  ? submit_bio_noacct+0x47/0x4c0
  submit_bio_wait+0x48/0xa0
  __blkdev_direct_IO_simple+0xee/0x210
  ? current_time+0x1d/0x100
  ? current_time+0x1d/0x100
  ? __bio_clone+0xb0/0xb0
  blkdev_read_iter+0xbb/0x140
  vfs_read+0x239/0x310
  ksys_read+0x58/0xc0
  do_syscall_64+0x6c/0x180
  entry_SYSCALL_64_after_hwframe+0x4b/0x53

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
v3->v4:

  Assume tuple size and checksum type can be used to infer if the
  host is required to provide an integrity buffer, or if that can be
  offloaded to the device. No need for format specific flags in that
  case.

 block/bio-integrity-auto.c | 62 +++++++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
index e524c609be506..9c66576647920 100644
--- a/block/bio-integrity-auto.c
+++ b/block/bio-integrity-auto.c
@@ -9,6 +9,7 @@
  * not aware of PI.
  */
 #include <linux/blk-integrity.h>
+#include <linux/t10-pi.h>
 #include <linux/workqueue.h>
 #include "blk.h"
 
@@ -43,6 +44,29 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	bio_endio(bio);
 }
 
+#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
+static bool bip_should_check(struct bio_integrity_payload *bip)
+{
+	return bip->bip_flags & BIP_CHECK_FLAGS;
+}
+
+static bool bi_offload_capable(struct blk_integrity *bi)
+{
+	switch (bi->csum_type) {
+	case BLK_INTEGRITY_CSUM_CRC64:
+		return bi->tuple_size == sizeof(struct crc64_pi_tuple);
+	case BLK_INTEGRITY_CSUM_CRC:
+	case BLK_INTEGRITY_CSUM_IP:
+		return bi->tuple_size == sizeof(struct t10_pi_tuple);
+	default:
+		pr_warn_once("%s: unknown integrity checksum type:%d\n",
+			__func__, bi->csum_type);
+		fallthrough;
+	case BLK_INTEGRITY_CSUM_NONE:
+		return false;
+	}
+}
+
 /**
  * __bio_integrity_endio - Integrity I/O completion function
  * @bio:	Protected bio
@@ -54,12 +78,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  */
 bool __bio_integrity_endio(struct bio *bio)
 {
-	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_integrity_data *bid =
 		container_of(bip, struct bio_integrity_data, bip);
 
-	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
+	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
+	    bip_should_check(bip)) {
 		INIT_WORK(&bid->work, bio_integrity_verify_fn);
 		queue_work(kintegrityd_wq, &bid->work);
 		return false;
@@ -84,6 +108,7 @@ bool bio_integrity_prep(struct bio *bio)
 {
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 	struct bio_integrity_data *bid;
+	bool set_flags = true;
 	gfp_t gfp = GFP_NOIO;
 	unsigned int len;
 	void *buf;
@@ -100,19 +125,24 @@ bool bio_integrity_prep(struct bio *bio)
 
 	switch (bio_op(bio)) {
 	case REQ_OP_READ:
-		if (bi->flags & BLK_INTEGRITY_NOVERIFY)
-			return true;
+		if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
+			if (bi_offload_capable(bi))
+				return true;
+			set_flags = false;
+		}
 		break;
 	case REQ_OP_WRITE:
-		if (bi->flags & BLK_INTEGRITY_NOGENERATE)
-			return true;
-
 		/*
 		 * Zero the memory allocated to not leak uninitialized kernel
 		 * memory to disk for non-integrity metadata where nothing else
 		 * initializes the memory.
 		 */
-		if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
+		if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
+			if (bi_offload_capable(bi))
+				return true;
+			set_flags = false;
+			gfp |= __GFP_ZERO;
+		} else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
 			gfp |= __GFP_ZERO;
 		break;
 	default:
@@ -137,19 +167,21 @@ bool bio_integrity_prep(struct bio *bio)
 	bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
 	bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
 
-	if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
-		bid->bip.bip_flags |= BIP_IP_CHECKSUM;
-	if (bi->csum_type)
-		bid->bip.bip_flags |= BIP_CHECK_GUARD;
-	if (bi->flags & BLK_INTEGRITY_REF_TAG)
-		bid->bip.bip_flags |= BIP_CHECK_REFTAG;
+	if (set_flags) {
+		if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
+			bid->bip.bip_flags |= BIP_IP_CHECKSUM;
+		if (bi->csum_type)
+			bid->bip.bip_flags |= BIP_CHECK_GUARD;
+		if (bi->flags & BLK_INTEGRITY_REF_TAG)
+			bid->bip.bip_flags |= BIP_CHECK_REFTAG;
+	}
 
 	if (bio_integrity_add_page(bio, virt_to_page(buf), len,
 			offset_in_page(buf)) < len)
 		goto err_end_io;
 
 	/* Auto-generate integrity metadata if this is a write */
-	if (bio_data_dir(bio) == WRITE)
+	if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
 		blk_integrity_generate(bio);
 	else
 		bid->saved_bio_iter = bio->bi_iter;
-- 
2.47.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCHv4] block: always allocate integrity buffer when required
  2025-05-09 15:38 [PATCHv4] block: always allocate integrity buffer when required Keith Busch
@ 2025-05-12  4:21 ` Christoph Hellwig
  2025-05-12 12:31 ` Martin K. Petersen
  2025-05-12 13:14 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2025-05-12  4:21 UTC (permalink / raw)
  To: Keith Busch
  Cc: axboe, linux-block, martin.petersen, hch, linux-nvme, Keith Busch

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCHv4] block: always allocate integrity buffer when required
  2025-05-09 15:38 [PATCHv4] block: always allocate integrity buffer when required Keith Busch
  2025-05-12  4:21 ` Christoph Hellwig
@ 2025-05-12 12:31 ` Martin K. Petersen
  2025-05-12 13:14 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Martin K. Petersen @ 2025-05-12 12:31 UTC (permalink / raw)
  To: Keith Busch
  Cc: axboe, linux-block, martin.petersen, hch, linux-nvme, Keith Busch


Keith,

> Many nvme metadata formats can not strip or generate the metadata on the
> controller side. For these formats, a host provided integrity buffer is
> mandatory even if it isn't checked.

Looks good.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCHv4] block: always allocate integrity buffer when required
  2025-05-09 15:38 [PATCHv4] block: always allocate integrity buffer when required Keith Busch
  2025-05-12  4:21 ` Christoph Hellwig
  2025-05-12 12:31 ` Martin K. Petersen
@ 2025-05-12 13:14 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2025-05-12 13:14 UTC (permalink / raw)
  To: linux-block, martin.petersen, hch, Keith Busch; +Cc: linux-nvme, Keith Busch


On Fri, 09 May 2025 08:38:02 -0700, Keith Busch wrote:
> Many nvme metadata formats can not strip or generate the metadata on the
> controller side. For these formats, a host provided integrity buffer is
> mandatory even if it isn't checked.
> 
> The block integrity read_verify and write_generate attributes prevent
> allocating the metadata buffer, but we need it when the format requires
> it, otherwise reads and writes will be rejected by the driver with IO
> errors.
> 
> [...]

Applied, thanks!

[1/1] block: always allocate integrity buffer when required
      commit: 8098514bd5ca98beca6ec725751d82d0d5b492d8

Best regards,
-- 
Jens Axboe





^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-05-12 13:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-09 15:38 [PATCHv4] block: always allocate integrity buffer when required Keith Busch
2025-05-12  4:21 ` Christoph Hellwig
2025-05-12 12:31 ` Martin K. Petersen
2025-05-12 13:14 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).