* [PATCH 1/5] null_blk: Introduce the zone_full parameter
2024-07-03 23:39 [PATCH 0/5] Remove zone reset all emulation Damien Le Moal
@ 2024-07-03 23:39 ` Damien Le Moal
2024-07-04 5:13 ` Christoph Hellwig
2024-07-03 23:39 ` [PATCH 2/5] dm: Refactor is_abnormal_io() Damien Le Moal
` (3 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Damien Le Moal @ 2024-07-03 23:39 UTC (permalink / raw)
To: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
Allow creating a zoned null_blk device with the initial state of its
sequential write required zones to be FULL. This is convenient to avoid
having to first write these zones to perform read performance evaluation
or test zone management operations such as zone reset (and zone reset
all).
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
drivers/block/null_blk/main.c | 9 ++++++++-
drivers/block/null_blk/null_blk.h | 1 +
drivers/block/null_blk/zoned.c | 10 ++++++++--
3 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 5de9ca4eceb4..783f730efff4 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -262,6 +262,10 @@ module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444
MODULE_PARM_DESC(zone_append_max_sectors,
"Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
+static bool g_zone_full;
+module_param_named(zone_full, g_zone_full, bool, S_IRUGO);
+MODULE_PARM_DESC(zone_full, "Initialize the sequential write required zones of a zoned device to be full. Default: false");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -458,6 +462,7 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
+NULLB_DEVICE_ATTR(zone_full, bool, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
@@ -610,6 +615,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_readonly,
&nullb_device_attr_zone_offline,
+ &nullb_device_attr_zone_full,
&nullb_device_attr_virt_boundary,
&nullb_device_attr_no_sched,
&nullb_device_attr_shared_tags,
@@ -700,7 +706,7 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
"shared_tags,size,submit_queues,use_per_node_hctx,"
"virt_boundary,zoned,zone_capacity,zone_max_active,"
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
- "zone_size,zone_append_max_sectors\n");
+ "zone_size,zone_append_max_sectors,zone_full\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -781,6 +787,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
dev->zone_append_max_sectors = g_zone_append_max_sectors;
+ dev->zone_full = g_zone_full;
dev->virt_boundary = g_virt_boundary;
dev->no_sched = g_no_sched;
dev->shared_tags = g_shared_tags;
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 3234e6c85eed..a7bb32f73ec3 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -101,6 +101,7 @@ struct nullb_device {
bool memory_backed; /* if data is stored in memory */
bool discard; /* if support discard */
bool zoned; /* if device is zoned */
+ bool zone_full; /* Initialize zones to be full */
bool virt_boundary; /* virtual boundary on/off for the device */
bool no_sched; /* no IO scheduler for the device */
bool shared_tags; /* share tag set between devices for blk-mq */
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 9f7151ad93cf..7996e2e7dce2 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -145,7 +145,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
zone = &dev->zones[i];
null_init_zone_lock(dev, zone);
- zone->start = zone->wp = sector;
+ zone->start = sector;
if (zone->start + dev->zone_size_sects > dev_capacity_sects)
zone->len = dev_capacity_sects - zone->start;
else
@@ -153,7 +153,13 @@ int null_init_zoned_dev(struct nullb_device *dev,
zone->capacity =
min_t(sector_t, zone->len, zone_capacity_sects);
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
- zone->cond = BLK_ZONE_COND_EMPTY;
+ if (dev->zone_full) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zone->capacity;
+ } else{
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ }
sector += dev->zone_size_sects;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 1/5] null_blk: Introduce the zone_full parameter
2024-07-03 23:39 ` [PATCH 1/5] null_blk: Introduce the zone_full parameter Damien Le Moal
@ 2024-07-04 5:13 ` Christoph Hellwig
0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2024-07-04 5:13 UTC (permalink / raw)
To: Damien Le Moal
Cc: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 2/5] dm: Refactor is_abnormal_io()
2024-07-03 23:39 [PATCH 0/5] Remove zone reset all emulation Damien Le Moal
2024-07-03 23:39 ` [PATCH 1/5] null_blk: Introduce the zone_full parameter Damien Le Moal
@ 2024-07-03 23:39 ` Damien Le Moal
2024-07-04 5:13 ` Christoph Hellwig
2024-07-03 23:39 ` [PATCH 3/5] dm: handle REQ_OP_ZONE_RESET_ALL Damien Le Moal
` (2 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: Damien Le Moal @ 2024-07-03 23:39 UTC (permalink / raw)
To: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
Use a single switch-case to simplify is_abnormal_io() and make this
function more readable and easier to modify.
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
drivers/md/dm.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7d107ae06e1a..0d80caccbd9e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1598,20 +1598,18 @@ static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
static bool is_abnormal_io(struct bio *bio)
{
- enum req_op op = bio_op(bio);
-
- if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) {
- switch (op) {
- case REQ_OP_DISCARD:
- case REQ_OP_SECURE_ERASE:
- case REQ_OP_WRITE_ZEROES:
- return true;
- default:
- break;
- }
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
+ case REQ_OP_FLUSH:
+ return false;
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
+ return true;
+ default:
+ return false;
}
-
- return false;
}
static blk_status_t __process_abnormal_io(struct clone_info *ci,
--
2.45.2
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 2/5] dm: Refactor is_abnormal_io()
2024-07-03 23:39 ` [PATCH 2/5] dm: Refactor is_abnormal_io() Damien Le Moal
@ 2024-07-04 5:13 ` Christoph Hellwig
0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2024-07-04 5:13 UTC (permalink / raw)
To: Damien Le Moal
Cc: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3/5] dm: handle REQ_OP_ZONE_RESET_ALL
2024-07-03 23:39 [PATCH 0/5] Remove zone reset all emulation Damien Le Moal
2024-07-03 23:39 ` [PATCH 1/5] null_blk: Introduce the zone_full parameter Damien Le Moal
2024-07-03 23:39 ` [PATCH 2/5] dm: Refactor is_abnormal_io() Damien Le Moal
@ 2024-07-03 23:39 ` Damien Le Moal
2024-07-04 5:17 ` Christoph Hellwig
2024-07-03 23:39 ` [PATCH 4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation Damien Le Moal
2024-07-03 23:39 ` [PATCH 5/5] block: Remove blk_alloc_zone_bitmap() Damien Le Moal
4 siblings, 1 reply; 13+ messages in thread
From: Damien Le Moal @ 2024-07-03 23:39 UTC (permalink / raw)
To: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
This commit implements processing of the REQ_OP_ZONE_RESET_ALL operation
for zoned mapped devices. Given that this operation always has a BIO
sector of 0 and a 0 size, processing through the regular BIO
__split_and_process_bio() function does not work because this function
would always select the first target. Instead, handling of this
operation is implemented using the function __send_zone_reset_all().
Similarly to the __send_empty_flush() function, the new
__send_zone_reset_all() function manually goes through all targets of a
mapped device table doing the following:
1) If the target can natively support REQ_OP_ZONE_RESET_ALL,
__send_duplicate_bios() is used to forward the reset all operation to
the target. This case is handled with the
__send_zone_reset_all_native() function.
2) For other targets, the function __send_zone_reset_all_emulated() is
executed to emulate the execution of REQ_OP_ZONE_RESET_ALL using
regular REQ_OP_ZONE_RESET operations.
Targets that can natively support REQ_OP_ZONE_RESET_ALL are identified
using the new target field zone_reset_all_supported. This boolean is set
to true in for targets that have reliable zone limitsi, that is, targets
that map all sequential write required zones of their zoned device(s).
Setting this field is handled in dm_set_zones_restrictions() and
device_get_zone_resource_limits().
For targets with unreliable zone limits, REQ_OP_ZONE_RESET_ALL must be
emulated (case 2 above). This is implemented with
__send_zone_reset_all_emulated() and is similar to the block layer
function blkdev_zone_reset_all_emulated(): first a report zones is done
for the zones of the target to identify zones that need reset, that is,
any sequential write required zone that is not already empty. This is
done using a bitmap and the function dm_zone_get_reset_bitmap() which
sets to 1 the bit corresponding to a zone that needs reset. Next, this
zone bitmap is inspected and a clone BIO modified to use the
REQ_OP_ZONE_RESET operation issued for any zone with its bit set in the
zone bitmap.
This implementation is more efficient than what the block layer does
with blkdev_zone_reset_all_emulated(), which is always used for DM zoned
devices currently: as we can natively use REQ_OP_ZONE_RESET_ALL on
targets mapping all sequential write required zones, resetting all zones
of a zoned mapped device can be much faster compared to always emulating
this operation using regular per-zone reset. In the worst case, this
implementation is as-efficient as the block layer emulation. This
reduction in the time it takes to reset all zones of a zoned mapped
device depends directly on the mapped device targets mapping (reliable
zone limits or not).
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
drivers/md/dm-zone.c | 50 ++++++++++++-
drivers/md/dm.c | 135 +++++++++++++++++++++++++++++++++-
drivers/md/dm.h | 10 +++
include/linux/device-mapper.h | 7 ++
4 files changed, 197 insertions(+), 5 deletions(-)
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 4d37e53b50ee..41b4d230f6f6 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -292,10 +292,12 @@ static int device_get_zone_resource_limits(struct dm_target *ti,
/*
* If the target does not map all sequential zones, the limits
- * will not be reliable.
+ * will not be reliable and we cannot use REQ_OP_ZONE_RESET_ALL.
*/
- if (zc.target_nr_seq_zones < zc.total_nr_seq_zones)
+ if (zc.target_nr_seq_zones < zc.total_nr_seq_zones) {
zlim->reliable_limits = false;
+ ti->zone_reset_all_supported = false;
+ }
/*
* If the target maps less sequential zones than the limit values, then
@@ -353,6 +355,14 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
+ /*
+ * Assume that the target can accept REQ_OP_ZONE_RESET_ALL.
+ * device_get_zone_resource_limits() may adjust this if one of
+ * the device used by the target does not have all its sequential
+ * write required zones mapped.
+ */
+ ti->zone_reset_all_supported = true;
+
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti,
device_get_zone_resource_limits, &zlim)) {
@@ -420,3 +430,39 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
return;
}
+
+static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ /*
+ * For an all-zones reset, ignore conventional, empty, read-only
+ * and offline zones.
+ */
+ switch (zone->cond) {
+ case BLK_ZONE_COND_NOT_WP:
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_READONLY:
+ case BLK_ZONE_COND_OFFLINE:
+ return 0;
+ default:
+ set_bit(idx, (unsigned long *)data);
+ return 0;
+ }
+}
+
+int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset)
+{
+ int ret;
+
+ ret = dm_blk_do_report_zones(md, t, sector, nr_zones,
+ dm_zone_need_reset_cb, need_reset);
+ if (ret != nr_zones) {
+ DMERR("Get %s zone reset bitmap failed\n",
+ md->disk->disk_name);
+ return -EIO;
+ }
+
+ return 0;
+}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0d80caccbd9e..9ee5b8355269 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1606,6 +1606,7 @@ static bool is_abnormal_io(struct bio *bio)
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_ZONE_RESET_ALL:
return true;
default:
return false;
@@ -1774,6 +1775,119 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
}
+
+static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
+ struct dm_target *ti)
+{
+ struct bio_list blist = BIO_EMPTY_LIST;
+ struct mapped_device *md = ci->io->md;
+ unsigned int zone_sectors = md->disk->queue->limits.chunk_sectors;
+ unsigned long *need_reset;
+ unsigned int i, nr_zones, nr_reset;
+ unsigned int num_bios = 0;
+ blk_status_t sts = BLK_STS_OK;
+ sector_t sector = ti->begin;
+ struct bio *clone;
+ int ret;
+
+ nr_zones = ti->len >> ilog2(zone_sectors);
+ need_reset = bitmap_zalloc(nr_zones, GFP_NOIO);
+ if (!need_reset)
+ return BLK_STS_RESOURCE;
+
+ ret = dm_zone_get_reset_bitmap(md, ci->map, ti->begin,
+ nr_zones, need_reset);
+ if (ret) {
+ sts = BLK_STS_IOERR;
+ goto free_bitmap;
+ }
+
+ /* If we have no zone to reset, we are done. */
+ nr_reset = bitmap_weight(need_reset, nr_zones);
+ if (!nr_reset)
+ goto free_bitmap;
+
+ atomic_add(nr_zones, &ci->io->io_count);
+
+ for (i = 0; i < nr_zones; i++) {
+
+ if (!test_bit(i, need_reset)) {
+ sector += zone_sectors;
+ continue;
+ }
+
+ if (bio_list_empty(&blist)) {
+ /* This may take a while, so be nice to others */
+ if (i > 0)
+ cond_resched();
+
+ /*
+ * We may need to reset thousands of zones, so let's
+ * not go crazy with the clone allocation.
+ */
+ alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32),
+ NULL, GFP_NOIO);
+ }
+
+ /* Get a clone and change it to a regular reset operation. */
+ clone = bio_list_pop(&blist);
+ clone->bi_opf &= ~REQ_OP_MASK;
+ clone->bi_opf |= REQ_OP_ZONE_RESET | REQ_SYNC;
+ clone->bi_iter.bi_sector = sector;
+ clone->bi_iter.bi_size = 0;
+ __map_bio(clone);
+
+ sector += zone_sectors;
+ num_bios++;
+ nr_reset--;
+ }
+
+ WARN_ON_ONCE(!bio_list_empty(&blist));
+ atomic_sub(nr_zones - num_bios, &ci->io->io_count);
+ ci->sector_count = 0;
+
+free_bitmap:
+ bitmap_free(need_reset);
+
+ return sts;
+}
+
+static void __send_zone_reset_all_native(struct clone_info *ci,
+ struct dm_target *ti)
+{
+ unsigned int bios;
+
+ atomic_add(1, &ci->io->io_count);
+ bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO);
+ atomic_sub(1 - bios, &ci->io->io_count);
+
+ ci->sector_count = 0;
+}
+
+static blk_status_t __send_zone_reset_all(struct clone_info *ci)
+{
+ struct dm_table *t = ci->map;
+ blk_status_t sts = BLK_STS_OK;
+
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (ti->zone_reset_all_supported) {
+ __send_zone_reset_all_native(ci, ti);
+ continue;
+ }
+
+ sts = __send_zone_reset_all_emulated(ci, ti);
+ if (sts != BLK_STS_OK)
+ break;
+ }
+
+ /* Release the reference that alloc_io() took for submission. */
+ atomic_sub(1, &ci->io->io_count);
+
+ return sts;
+}
+
#else
static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
struct bio *bio)
@@ -1784,6 +1898,10 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return false;
}
+static blk_status_t __send_zone_reset_all(struct clone_info *ci)
+{
+ return BLK_STS_NOTSUPP;
+}
#endif
/*
@@ -1797,9 +1915,14 @@ static void dm_split_and_process_bio(struct mapped_device *md,
blk_status_t error = BLK_STS_OK;
bool is_abnormal, need_split;
- need_split = is_abnormal = is_abnormal_io(bio);
- if (static_branch_unlikely(&zoned_enabled))
- need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
+ is_abnormal = is_abnormal_io(bio);
+ if (static_branch_unlikely(&zoned_enabled)) {
+ /* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */
+ need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) &&
+ (is_abnormal || dm_zone_bio_needs_split(md, bio));
+ } else {
+ need_split = is_abnormal;
+ }
if (unlikely(need_split)) {
/*
@@ -1840,6 +1963,12 @@ static void dm_split_and_process_bio(struct mapped_device *md,
goto out;
}
+ if (static_branch_unlikely(&zoned_enabled) &&
+ (bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
+ error = __send_zone_reset_all(&ci);
+ goto out;
+ }
+
error = __split_and_process_bio(&ci);
if (error || !ci.sector_count)
goto out;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index c984ecb64b1e..4137bcdb95e7 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -110,6 +110,9 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
int dm_zone_map_bio(struct dm_target_io *io);
+int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset);
#else
#define dm_blk_report_zones NULL
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
@@ -120,6 +123,13 @@ static inline int dm_zone_map_bio(struct dm_target_io *tio)
{
return DM_MAPIO_KILL;
}
+static inline int dm_zone_get_reset_bitmap(struct mapped_device *md,
+ struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset)
+{
+ return -ENOTSUPP;
+}
#endif
/*
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 82b2195efaca..15d28164bbbd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -357,6 +357,13 @@ struct dm_target {
*/
bool discards_supported:1;
+ /*
+ * Automatically set by dm-core if this target supports
+ * REQ_OP_ZONE_RESET_ALL. Otherwise, this operation will be emulated
+ * using REQ_OP_ZONE_RESET. Target drivers must not set this manually.
+ */
+ bool zone_reset_all_supported:1;
+
/*
* Set if this target requires that discards be split on
* 'max_discard_sectors' boundaries.
--
2.45.2
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 3/5] dm: handle REQ_OP_ZONE_RESET_ALL
2024-07-03 23:39 ` [PATCH 3/5] dm: handle REQ_OP_ZONE_RESET_ALL Damien Le Moal
@ 2024-07-04 5:17 ` Christoph Hellwig
0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2024-07-04 5:17 UTC (permalink / raw)
To: Damien Le Moal
Cc: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
> Targets that can natively support REQ_OP_ZONE_RESET_ALL are identified
> using the new target field zone_reset_all_supported. This boolean is set
> to true in for targets that have reliable zone limitsi, that is, targets
s/limitsi/limits/
> + /*
> + * Assume that the target can accept REQ_OP_ZONE_RESET_ALL.
> + * device_get_zone_resource_limits() may adjust this if one of
> + * the device used by the target does not have all its sequential
Overly long line here.
Otherwise looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation
2024-07-03 23:39 [PATCH 0/5] Remove zone reset all emulation Damien Le Moal
` (2 preceding siblings ...)
2024-07-03 23:39 ` [PATCH 3/5] dm: handle REQ_OP_ZONE_RESET_ALL Damien Le Moal
@ 2024-07-03 23:39 ` Damien Le Moal
2024-07-04 0:07 ` Ed Tsai (蔡宗軒)
2024-07-04 5:21 ` Christoph Hellwig
2024-07-03 23:39 ` [PATCH 5/5] block: Remove blk_alloc_zone_bitmap() Damien Le Moal
4 siblings, 2 replies; 13+ messages in thread
From: Damien Le Moal @ 2024-07-03 23:39 UTC (permalink / raw)
To: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
Now that device mapper can handle resetting all zones of a mapped zoned
device using REQ_OP_ZONE_RESET_ALL, all zoned block device drivers
support this operation. With this, the request queue feature
BLK_FEAT_ZONE_RESETALL is not necessary and the emulation code in
blk-zone.c can be removed.
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
block/blk-core.c | 2 +-
block/blk-zoned.c | 76 ++--------------------------------
drivers/block/null_blk/zoned.c | 2 +-
drivers/block/ublk_drv.c | 2 +-
drivers/block/virtio_blk.c | 2 +-
drivers/nvme/host/zns.c | 2 +-
drivers/scsi/sd_zbc.c | 2 +-
include/linux/blkdev.h | 5 ---
8 files changed, 9 insertions(+), 84 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 71b7622c523a..0c25df9758d0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -834,7 +834,7 @@ void submit_bio_noacct(struct bio *bio)
goto not_supported;
break;
case REQ_OP_ZONE_RESET_ALL:
- if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
+ if (!bdev_is_zoned(bio->bi_bdev))
goto not_supported;
break;
case REQ_OP_DRV_IN:
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 07831fb67201..b104f5175783 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -157,70 +157,6 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
GFP_NOIO, node);
}
-static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
-{
- /*
- * For an all-zones reset, ignore conventional, empty, read-only
- * and offline zones.
- */
- switch (zone->cond) {
- case BLK_ZONE_COND_NOT_WP:
- case BLK_ZONE_COND_EMPTY:
- case BLK_ZONE_COND_READONLY:
- case BLK_ZONE_COND_OFFLINE:
- return 0;
- default:
- set_bit(idx, (unsigned long *)data);
- return 0;
- }
-}
-
-static int blkdev_zone_reset_all_emulated(struct block_device *bdev)
-{
- struct gendisk *disk = bdev->bd_disk;
- sector_t capacity = bdev_nr_sectors(bdev);
- sector_t zone_sectors = bdev_zone_sectors(bdev);
- unsigned long *need_reset;
- struct bio *bio = NULL;
- sector_t sector = 0;
- int ret;
-
- need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones);
- if (!need_reset)
- return -ENOMEM;
-
- ret = disk->fops->report_zones(disk, 0, disk->nr_zones,
- blk_zone_need_reset_cb, need_reset);
- if (ret < 0)
- goto out_free_need_reset;
-
- ret = 0;
- while (sector < capacity) {
- if (!test_bit(disk_zone_no(disk, sector), need_reset)) {
- sector += zone_sectors;
- continue;
- }
-
- bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
- GFP_KERNEL);
- bio->bi_iter.bi_sector = sector;
- sector += zone_sectors;
-
- /* This may take a while, so be nice to others */
- cond_resched();
- }
-
- if (bio) {
- ret = submit_bio_wait(bio);
- bio_put(bio);
- }
-
-out_free_need_reset:
- kfree(need_reset);
- return ret;
-}
-
static int blkdev_zone_reset_all(struct block_device *bdev)
{
struct bio bio;
@@ -247,7 +183,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev)
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
sector_t sector, sector_t nr_sectors)
{
- struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors = bdev_zone_sectors(bdev);
sector_t capacity = bdev_nr_sectors(bdev);
sector_t end_sector = sector + nr_sectors;
@@ -275,16 +210,11 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
return -EINVAL;
/*
- * In the case of a zone reset operation over all zones,
- * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
- * command. For other devices, we emulate this command behavior by
- * identifying the zones needing a reset.
+ * In the case of a zone reset operation over all zones, use
+ * REQ_OP_ZONE_RESET_ALL.
*/
- if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
- if (!blk_queue_zone_resetall(q))
- return blkdev_zone_reset_all_emulated(bdev);
+ if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity)
return blkdev_zone_reset_all(bdev);
- }
while (sector < end_sector) {
bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 7996e2e7dce2..9bc768b2ca56 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -164,7 +164,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
sector += dev->zone_size_sects;
}
- lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim->features |= BLK_FEAT_ZONED;
lim->chunk_sectors = dev->zone_size_sects;
lim->max_zone_append_sectors = dev->zone_append_max_sectors;
lim->max_open_zones = dev->zone_max_open;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 4fdff13fc23b..d10a2ea07292 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -2194,7 +2194,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
return -EOPNOTSUPP;
- lim.features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim.features |= BLK_FEAT_ZONED;
lim.max_active_zones = p->max_active_zones;
lim.max_open_zones = p->max_open_zones;
lim.max_zone_append_sectors = p->max_zone_append_sectors;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6c64a67ab9c9..84c3efd0c611 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk,
dev_dbg(&vdev->dev, "probing host-managed zoned device\n");
- lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim->features |= BLK_FEAT_ZONED;
virtio_cread(vdev, struct virtio_blk_config,
zoned.max_open_zones, &v);
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 99bb89c2495a..9a06f9d98cd6 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -108,7 +108,7 @@ int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
struct nvme_zone_info *zi)
{
- lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim->features |= BLK_FEAT_ZONED;
lim->max_open_zones = zi->max_open_zones;
lim->max_active_zones = zi->max_active_zones;
lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index f7067afac79c..c8b9654d30f0 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -599,7 +599,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim,
if (sdkp->device->type != TYPE_ZBC)
return 0;
- lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim->features |= BLK_FEAT_ZONED;
/*
* Per ZBC and ZAC specifications, writes in sequential write required
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4d0d4b83bc74..dc250d8070d2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -318,9 +318,6 @@ typedef unsigned int __bitwise blk_features_t;
/* is a zoned device */
#define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10))
-/* supports Zone Reset All */
-#define BLK_FEAT_ZONE_RESETALL ((__force blk_features_t)(1u << 11))
-
/* supports PCI(e) p2p requests */
#define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12))
@@ -618,8 +615,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL))
#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT)
-#define blk_queue_zone_resetall(q) \
- ((q)->limits.features & BLK_FEAT_ZONE_RESETALL)
#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX)
#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
--
2.45.2
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation
2024-07-03 23:39 ` [PATCH 4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation Damien Le Moal
@ 2024-07-04 0:07 ` Ed Tsai (蔡宗軒)
2024-07-04 1:14 ` Damien Le Moal
2024-07-04 5:21 ` Christoph Hellwig
1 sibling, 1 reply; 13+ messages in thread
From: Ed Tsai (蔡宗軒) @ 2024-07-04 0:07 UTC (permalink / raw)
To: dlemoal@kernel.org, linux-block@vger.kernel.org, hch@lst.de,
martin.petersen@oracle.com, axboe@kernel.dk, ming.lei@redhat.com,
linux-scsi@vger.kernel.org, mpatocka@redhat.com, mst@redhat.com,
dm-devel@lists.linux.dev, jasowang@redhat.com, snitzer@kernel.org
On Thu, 2024-07-04 at 08:39 +0900, Damien Le Moal wrote:
> Now that device mapper can handle resetting all zones of a mapped
> zoned
> device using REQ_OP_ZONE_RESET_ALL, all zoned block device drivers
> support this operation. With this, the request queue feature
> BLK_FEAT_ZONE_RESETALL is not necessary and the emulation code in
> blk-zone.c can be removed.
>
> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
> ---
> block/blk-core.c | 2 +-
> block/blk-zoned.c | 76 ++----------------------------
> ----
> drivers/block/null_blk/zoned.c | 2 +-
> drivers/block/ublk_drv.c | 2 +-
> drivers/block/virtio_blk.c | 2 +-
> drivers/nvme/host/zns.c | 2 +-
> drivers/scsi/sd_zbc.c | 2 +-
> include/linux/blkdev.h | 5 ---
> 8 files changed, 9 insertions(+), 84 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 71b7622c523a..0c25df9758d0 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -834,7 +834,7 @@ void submit_bio_noacct(struct bio *bio)
> goto not_supported;
> break;
> case REQ_OP_ZONE_RESET_ALL:
> - if (!bdev_is_zoned(bio->bi_bdev) ||
> !blk_queue_zone_resetall(q))
> + if (!bdev_is_zoned(bio->bi_bdev))
> goto not_supported;
> break;
> case REQ_OP_DRV_IN:
It does the same thing as other zone operations, putting these together
will be more cleaner?
> ...
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation
2024-07-04 0:07 ` Ed Tsai (蔡宗軒)
@ 2024-07-04 1:14 ` Damien Le Moal
0 siblings, 0 replies; 13+ messages in thread
From: Damien Le Moal @ 2024-07-04 1:14 UTC (permalink / raw)
To: Ed Tsai (蔡宗軒), linux-block@vger.kernel.org,
hch@lst.de, martin.petersen@oracle.com, axboe@kernel.dk,
ming.lei@redhat.com, linux-scsi@vger.kernel.org,
mpatocka@redhat.com, mst@redhat.com, dm-devel@lists.linux.dev,
jasowang@redhat.com, snitzer@kernel.org
On 7/4/24 09:07, Ed Tsai (蔡宗軒) wrote:
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 71b7622c523a..0c25df9758d0 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -834,7 +834,7 @@ void submit_bio_noacct(struct bio *bio)
>> goto not_supported;
>> break;
>> case REQ_OP_ZONE_RESET_ALL:
>> - if (!bdev_is_zoned(bio->bi_bdev) ||
>> !blk_queue_zone_resetall(q))
>> + if (!bdev_is_zoned(bio->bi_bdev))
>> goto not_supported;
>> break;
>> case REQ_OP_DRV_IN:
>
> It does the same thing as other zone operations, putting these together
> will be more cleaner?
Indeed. Will do that in v2.
--
Damien Le Moal
Western Digital Research
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation
2024-07-03 23:39 ` [PATCH 4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation Damien Le Moal
2024-07-04 0:07 ` Ed Tsai (蔡宗軒)
@ 2024-07-04 5:21 ` Christoph Hellwig
1 sibling, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2024-07-04 5:21 UTC (permalink / raw)
To: Damien Le Moal
Cc: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 5/5] block: Remove blk_alloc_zone_bitmap()
2024-07-03 23:39 [PATCH 0/5] Remove zone reset all emulation Damien Le Moal
` (3 preceding siblings ...)
2024-07-03 23:39 ` [PATCH 4/5] block: Remove REQ_OP_ZONE_RESET_ALL emulation Damien Le Moal
@ 2024-07-03 23:39 ` Damien Le Moal
2024-07-04 5:22 ` Christoph Hellwig
4 siblings, 1 reply; 13+ messages in thread
From: Damien Le Moal @ 2024-07-03 23:39 UTC (permalink / raw)
To: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
Remove the helper function blk_alloc_zone_bitmap() and replace its
single call site with a call to bitmap_alloc(). To be consistent with
this change, use bitmap_free() to free a disk convnetional zone bitmap.
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
block/blk-zoned.c | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index b104f5175783..af19296fa50d 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -150,13 +150,6 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
-static inline unsigned long *blk_alloc_zone_bitmap(int node,
- unsigned int nr_zones)
-{
- return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
- GFP_NOIO, node);
-}
-
static int blkdev_zone_reset_all(struct block_device *bdev)
{
struct bio bio;
@@ -1485,7 +1478,7 @@ void disk_free_zone_resources(struct gendisk *disk)
mempool_destroy(disk->zone_wplugs_pool);
disk->zone_wplugs_pool = NULL;
- kfree(disk->conv_zones_bitmap);
+ bitmap_free(disk->conv_zones_bitmap);
disk->conv_zones_bitmap = NULL;
disk->zone_capacity = 0;
disk->last_zone_capacity = 0;
@@ -1607,7 +1600,6 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
struct blk_revalidate_zone_args *args)
{
struct gendisk *disk = args->disk;
- struct request_queue *q = disk->queue;
if (zone->capacity != zone->len) {
pr_warn("%s: Invalid conventional zone capacity\n",
@@ -1623,7 +1615,7 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
if (!args->conv_zones_bitmap) {
args->conv_zones_bitmap =
- blk_alloc_zone_bitmap(q->node, args->nr_zones);
+ bitmap_zalloc(args->nr_zones, GFP_NOIO);
if (!args->conv_zones_bitmap)
return -ENOMEM;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 5/5] block: Remove blk_alloc_zone_bitmap()
2024-07-03 23:39 ` [PATCH 5/5] block: Remove blk_alloc_zone_bitmap() Damien Le Moal
@ 2024-07-04 5:22 ` Christoph Hellwig
0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2024-07-04 5:22 UTC (permalink / raw)
To: Damien Le Moal
Cc: Jens Axboe, linux-block, dm-devel, Mike Snitzer, Mikulas Patocka,
linux-scsi, Martin K . Petersen, Ming Lei, Michael S . Tsirkin,
Jason Wang, Christoph Hellwig
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 13+ messages in thread