Linux block layer
 help / color / mirror / Atom feed
From: "Javier González" <javier@javigon.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>,
	Damien Le Moal <Damien.LeMoal@wdc.com>,
	Hans Holmberg <hans@owltronix.com>,
	linux-block@vger.kernel.org
Subject: Re: [PATCH 8/8] block: set the zone size in blk_revalidate_disk_zones atomically
Date: Tue, 3 Dec 2019 15:00:09 +0100	[thread overview]
Message-ID: <20191203140009.weqdw2kopzaizuoo@mpHalley.local> (raw)
In-Reply-To: <20191203093908.24612-9-hch@lst.de>

On 03.12.2019 10:39, Christoph Hellwig wrote:
>The current zone revalidation code has a major problem in that it
>doesn't update the zone size and q->nr_zones atomically, leading
>to a short window where an out of bounds access to the zone arrays
>is possible.
>
>To fix this move the setting of the zone size into the crticial

nip: critical

>sections blk_revalidate_disk_zones so that it gets updated together
>with the zone bitmaps and q->nr_zones.  This also slightly simplifies
>the caller as it deducts the zone size from the report_zones.

This part makes sense. Good catch.
>
>This change also allows to check for a power of two zone size in generic
>code.

I think however that this checks should remain at the driver level, or
at least depend on a flag that signals that the zoned device is actually
a power of two.

>
>Reported-by: Hans Holmberg <hans@owltronix.com>
>Signed-off-by: Christoph Hellwig <hch@lst.de>
>---
> block/blk-zoned.c             | 59 ++++++++++++++++++++---------------
> drivers/block/null_blk_main.c |  3 +-
> drivers/scsi/sd_zbc.c         |  2 --
> 3 files changed, 35 insertions(+), 29 deletions(-)
>
>diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>index 51d427659ce7..d00fcfd71dfe 100644
>--- a/block/blk-zoned.c
>+++ b/block/blk-zoned.c
>@@ -343,6 +343,7 @@ struct blk_revalidate_zone_args {
> 	unsigned long	*conv_zones_bitmap;
> 	unsigned long	*seq_zones_wlock;
> 	unsigned int	nr_zones;
>+	sector_t	zone_sectors;
> 	sector_t	sector;
> };
>
>@@ -355,25 +356,33 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
> 	struct blk_revalidate_zone_args *args = data;
> 	struct gendisk *disk = args->disk;
> 	struct request_queue *q = disk->queue;
>-	sector_t zone_sectors = blk_queue_zone_sectors(q);
> 	sector_t capacity = get_capacity(disk);
>
> 	/*
> 	 * All zones must have the same size, with the exception on an eventual
> 	 * smaller last zone.
> 	 */
>-	if (zone->start + zone_sectors < capacity &&
>-	    zone->len != zone_sectors) {
>-		pr_warn("%s: Invalid zoned device with non constant zone size\n",
>-			disk->disk_name);
>-		return false;
>-	}
>+	if (zone->start == 0) {
>+		if (zone->len == 0 || !is_power_of_2(zone->len)) {
>+			pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
>+				disk->disk_name, zone->len);
>+			return -ENODEV;
>+		}
>
>-	if (zone->start + zone->len >= capacity &&
>-	    zone->len > zone_sectors) {
>-		pr_warn("%s: Invalid zoned device with larger last zone size\n",
>-			disk->disk_name);
>-		return -ENODEV;
>+		args->zone_sectors = zone->len;
>+		args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
>+	} else if (zone->start + args->zone_sectors < capacity) {
>+		if (zone->len != args->zone_sectors) {
>+			pr_warn("%s: Invalid zoned device with non constant zone size\n",
>+				disk->disk_name);
>+			return -ENODEV;
>+		}
>+	} else {
>+		if (zone->len > args->zone_sectors) {
>+			pr_warn("%s: Invalid zoned device with larger last zone size\n",
>+				disk->disk_name);
>+			return -ENODEV;
>+		}
> 	}
>
> 	/* Check for holes in the zone report */
>@@ -428,9 +437,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
> 	struct request_queue *q = disk->queue;
> 	struct blk_revalidate_zone_args args = {
> 		.disk		= disk,
>-		.nr_zones	= blkdev_nr_zones(disk),
> 	};
>-	int ret = 0;
>+	unsigned int noio_flag;
>+	int ret;
>
> 	if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
> 		return -EIO;
>@@ -438,24 +447,22 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
> 		return -EIO;
>
> 	/*
>-	 * Ensure that all memory allocations in this context are done as
>-	 * if GFP_NOIO was specified.
>+	 * Ensure that all memory allocations in this context are done as if
>+	 * GFP_NOIO was specified.
> 	 */
>-	if (args.nr_zones) {
>-		unsigned int noio_flag = memalloc_noio_save();
>-
>-		ret = disk->fops->report_zones(disk, 0, args.nr_zones,
>-					       blk_revalidate_zone_cb, &args);
>-		memalloc_noio_restore(noio_flag);
>-	}
>+	noio_flag = memalloc_noio_save();
>+	ret = disk->fops->report_zones(disk, 0, UINT_MAX,
>+				       blk_revalidate_zone_cb, &args);
>+	memalloc_noio_restore(noio_flag);
>
> 	/*
>-	 * Install the new bitmaps, making sure the queue is stopped and
>-	 * all I/Os are completed (i.e. a scheduler is not referencing the
>-	 * bitmaps).
>+	 * Install the new bitmaps and update nr_zones only once the queue is
>+	 * stopped and all I/Os are completed (i.e. a scheduler is not
>+	 * referencing the bitmaps).
> 	 */
> 	blk_mq_freeze_queue(q);
> 	if (ret >= 0) {
>+		blk_queue_chunk_sectors(q, args.zone_sectors);
> 		q->nr_zones = args.nr_zones;
> 		swap(q->seq_zones_wlock, args.seq_zones_wlock);
> 		swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
>diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
>index 068cd0ae6e2c..997b7dc095b9 100644
>--- a/drivers/block/null_blk_main.c
>+++ b/drivers/block/null_blk_main.c
>@@ -1583,6 +1583,8 @@ static int null_gendisk_register(struct nullb *nullb)
> 			if (ret)
> 				return ret;
> 		} else {
>+			blk_queue_chunk_sectors(nullb->q,
>+					nullb->dev->zone_size_sects);
> 			nullb->q->nr_zones = blkdev_nr_zones(disk);
> 		}
> 	}
>@@ -1746,7 +1748,6 @@ static int null_add_dev(struct nullb_device *dev)
> 		if (rv)
> 			goto out_cleanup_blk_queue;
>
>-		blk_queue_chunk_sectors(nullb->q, dev->zone_size_sects);
> 		nullb->q->limits.zoned = BLK_ZONED_HM;
> 		blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q);
> 		blk_queue_required_elevator_features(nullb->q,
>diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
>index 0e5ede48f045..27d72c1d4654 100644
>--- a/drivers/scsi/sd_zbc.c
>+++ b/drivers/scsi/sd_zbc.c
>@@ -412,8 +412,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
> 		goto err;
>
> 	/* The drive satisfies the kernel restrictions: set it up */
>-	blk_queue_chunk_sectors(sdkp->disk->queue,
>-			logical_to_sectors(sdkp->device, zone_blocks));
> 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, sdkp->disk->queue);
> 	blk_queue_required_elevator_features(sdkp->disk->queue,
> 					     ELEVATOR_F_ZBD_SEQ_WRITE);
>-- 
>2.20.1
>

  reply	other threads:[~2019-12-03 14:00 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-03  9:39 avoid out of bounds zone bitmap access Christoph Hellwig
2019-12-03  9:39 ` [PATCH 1/8] null_blk: fix zone size paramter check Christoph Hellwig
2019-12-03  9:39 ` [PATCH 2/8] null_blk: cleanup null_gendisk_register Christoph Hellwig
2019-12-03  9:39 ` [PATCH 3/8] block: remove the empty line at the end of blk-zoned.c Christoph Hellwig
2019-12-03  9:39 ` [PATCH 4/8] block: simplify blkdev_nr_zones Christoph Hellwig
2019-12-03  9:39 ` [PATCH 5/8] block: replace seq_zones_bitmap with conv_zones_bitmap Christoph Hellwig
2019-12-03 14:02   ` Javier González
2019-12-03  9:39 ` [PATCH 6/8] block: allocate the zone bitmaps lazily Christoph Hellwig
2019-12-03 14:03   ` Javier González
2019-12-03  9:39 ` [PATCH 7/8] block: don't handle bio based drivers in blk_revalidate_disk_zones Christoph Hellwig
2019-12-03 14:04   ` Javier González
2019-12-03  9:39 ` [PATCH 8/8] block: set the zone size in blk_revalidate_disk_zones atomically Christoph Hellwig
2019-12-03 14:00   ` Javier González [this message]
2019-12-03 15:18     ` Christoph Hellwig
2019-12-03 15:34       ` Javier González
2019-12-03 15:42         ` Christoph Hellwig
2019-12-03 17:17           ` Javier González
2019-12-03 16:00 ` avoid out of bounds zone bitmap access Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191203140009.weqdw2kopzaizuoo@mpHalley.local \
    --to=javier@javigon.com \
    --cc=Damien.LeMoal@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=hans@owltronix.com \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox