* [PATCH v3 1/5] block: Add extra checks in blk_validate_atomic_write_limits()
2024-11-01 14:46 [PATCH v3 0/5] RAID 0/1/10 atomic write support John Garry
@ 2024-11-01 14:46 ` John Garry
2024-11-01 14:46 ` [PATCH v3 2/5] block: Support atomic writes limits for stacked devices John Garry
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: John Garry @ 2024-11-01 14:46 UTC (permalink / raw)
To: axboe, song, yukuai3, hch
Cc: linux-block, linux-kernel, linux-raid, martin.petersen,
John Garry
It is so far expected that the limits passed are valid.
In future atomic writes will be supported for stacked block devices, and
calculating the limits there will be complicated, so add extra sanity
checks to ensure that the values are always valid.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
block/blk-settings.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 95fc39d09872..c5a753f980bf 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -179,9 +179,26 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim)
if (!lim->atomic_write_hw_max)
goto unsupported;
+ if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_min)))
+ goto unsupported;
+
+ if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_max)))
+ goto unsupported;
+
+ if (WARN_ON_ONCE(lim->atomic_write_hw_unit_min >
+ lim->atomic_write_hw_unit_max))
+ goto unsupported;
+
+ if (WARN_ON_ONCE(lim->atomic_write_hw_unit_max >
+ lim->atomic_write_hw_max))
+ goto unsupported;
+
boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT;
if (boundary_sectors) {
+ if (WARN_ON_ONCE(lim->atomic_write_hw_max >
+ lim->atomic_write_hw_boundary))
+ goto unsupported;
/*
* A feature of boundary support is that it disallows bios to
* be merged which would result in a merged request which
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v3 2/5] block: Support atomic writes limits for stacked devices
2024-11-01 14:46 [PATCH v3 0/5] RAID 0/1/10 atomic write support John Garry
2024-11-01 14:46 ` [PATCH v3 1/5] block: Add extra checks in blk_validate_atomic_write_limits() John Garry
@ 2024-11-01 14:46 ` John Garry
2024-11-04 7:36 ` Christoph Hellwig
2024-11-01 14:46 ` [PATCH v3 3/5] md/raid0: Atomic write support John Garry
` (2 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: John Garry @ 2024-11-01 14:46 UTC (permalink / raw)
To: axboe, song, yukuai3, hch
Cc: linux-block, linux-kernel, linux-raid, martin.petersen,
John Garry
Allow stacked devices to support atomic writes by aggregating the minimum
capability of all bottom devices.
Flag BLK_FEAT_ATOMIC_WRITES_STACKED is set for stacked devices which
have been enabled to support atomic writes.
Some things to note on the implementation:
- For simplicity, all bottom devices must have same atomic write boundary
value (if any)
- The atomic write boundary must be a power-of-2 already, but this
restriction could be relaxed. Furthermore, it is now required that the
chunk sectors for a top device must be aligned with this boundary.
- If a bottom device atomic write unit min/max are not aligned with the
top device chunk sectors, the top device atomic write unit min/max are
reduced to a value which works for the chunk sectors.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
block/blk-settings.c | 115 +++++++++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 4 ++
2 files changed, 119 insertions(+)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c5a753f980bf..8d3a9a55462e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -496,6 +496,119 @@ static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lb
return sectors;
}
+/* Check if second and later bottom devices are compliant */
+static bool blk_stack_atomic_writes_tail(struct queue_limits *t,
+ struct queue_limits *b)
+{
+ /* We're not going to support different boundary sizes.. yet */
+ if (t->atomic_write_hw_boundary != b->atomic_write_hw_boundary)
+ return false;
+
+ /* Can't support this */
+ if (t->atomic_write_hw_unit_min > b->atomic_write_hw_unit_max)
+ return false;
+
+ /* Or this */
+ if (t->atomic_write_hw_unit_max < b->atomic_write_hw_unit_min)
+ return false;
+
+ t->atomic_write_hw_max = min(t->atomic_write_hw_max,
+ b->atomic_write_hw_max);
+ t->atomic_write_hw_unit_min = max(t->atomic_write_hw_unit_min,
+ b->atomic_write_hw_unit_min);
+ t->atomic_write_hw_unit_max = min(t->atomic_write_hw_unit_max,
+ b->atomic_write_hw_unit_max);
+ return true;
+}
+
+/* Check for valid boundary of first bottom device */
+static bool blk_stack_atomic_writes_boundary_head(struct queue_limits *t,
+ struct queue_limits *b)
+{
+ /*
+ * Ensure atomic write boundary is aligned with chunk sectors. Stacked
+ * devices store chunk sectors in t->io_min.
+ */
+ if (b->atomic_write_hw_boundary > t->io_min &&
+ b->atomic_write_hw_boundary % t->io_min)
+ return false;
+ if (t->io_min > b->atomic_write_hw_boundary &&
+ t->io_min % b->atomic_write_hw_boundary)
+ return false;
+
+ t->atomic_write_hw_boundary = b->atomic_write_hw_boundary;
+ return true;
+}
+
+
+/* Check stacking of first bottom device */
+static bool blk_stack_atomic_writes_head(struct queue_limits *t,
+ struct queue_limits *b)
+{
+ if (b->atomic_write_hw_boundary &&
+ !blk_stack_atomic_writes_boundary_head(t, b))
+ return false;
+
+ if (t->io_min <= SECTOR_SIZE) {
+ /* No chunk sectors, so use bottom device values directly */
+ t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
+ t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min;
+ t->atomic_write_hw_max = b->atomic_write_hw_max;
+ return true;
+ }
+
+ /*
+ * Find values for limits which work for chunk size.
+ * b->atomic_write_hw_unit_{min, max} may not be aligned with chunk
+ * size (t->io_min), as chunk size is not restricted to a power-of-2.
+ * So we need to find highest power-of-2 which works for the chunk
+ * size.
+ * As an example scenario, we could have b->unit_max = 16K and
+ * t->io_min = 24K. For this case, reduce t->unit_max to a value
+ * aligned with both limits, i.e. 8K in this example.
+ */
+ t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
+ while (t->io_min % t->atomic_write_hw_unit_max)
+ t->atomic_write_hw_unit_max /= 2;
+
+ t->atomic_write_hw_unit_min = min(b->atomic_write_hw_unit_min,
+ t->atomic_write_hw_unit_max);
+ t->atomic_write_hw_max = min(b->atomic_write_hw_max, t->io_min);
+
+ return true;
+}
+
+static void blk_stack_atomic_writes_limits(struct queue_limits *t,
+ struct queue_limits *b)
+{
+ if (!(t->features & BLK_FEAT_ATOMIC_WRITES_STACKED))
+ goto unsupported;
+
+ if (!b->atomic_write_unit_min)
+ goto unsupported;
+
+ /*
+ * If atomic_write_hw_max is set, we have already stacked 1x bottom
+ * device, so check for compliance.
+ */
+ if (t->atomic_write_hw_max) {
+ if (!blk_stack_atomic_writes_tail(t, b))
+ goto unsupported;
+ return;
+ }
+
+ if (!blk_stack_atomic_writes_head(t, b))
+ goto unsupported;
+ return;
+
+unsupported:
+ t->atomic_write_hw_max = 0;
+ t->atomic_write_hw_unit_max = 0;
+ t->atomic_write_hw_unit_min = 0;
+ t->atomic_write_hw_boundary = 0;
+ t->features &= ~BLK_FEAT_ATOMIC_WRITES_STACKED;
+}
+
/**
* blk_stack_limits - adjust queue_limits for stacked devices
* @t: the stacking driver limits (top device)
@@ -656,6 +769,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->zone_write_granularity = 0;
t->max_zone_append_sectors = 0;
}
+ blk_stack_atomic_writes_limits(t, b);
+
return ret;
}
EXPORT_SYMBOL(blk_stack_limits);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7bfc877e159e..272e7cd03297 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -333,6 +333,10 @@ typedef unsigned int __bitwise blk_features_t;
#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
((__force blk_features_t)(1u << 15))
+/* stacked device can/does support atomic writes */
+#define BLK_FEAT_ATOMIC_WRITES_STACKED \
+ ((__force blk_features_t)(1u << 16))
+
/*
* Flags automatically inherited when stacking limits.
*/
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v3 3/5] md/raid0: Atomic write support
2024-11-01 14:46 [PATCH v3 0/5] RAID 0/1/10 atomic write support John Garry
2024-11-01 14:46 ` [PATCH v3 1/5] block: Add extra checks in blk_validate_atomic_write_limits() John Garry
2024-11-01 14:46 ` [PATCH v3 2/5] block: Support atomic writes limits for stacked devices John Garry
@ 2024-11-01 14:46 ` John Garry
2024-11-04 11:06 ` Yu Kuai
2024-11-01 14:46 ` [PATCH v3 4/5] md/raid1: " John Garry
2024-11-01 14:46 ` [PATCH v3 5/5] md/raid10: " John Garry
4 siblings, 1 reply; 9+ messages in thread
From: John Garry @ 2024-11-01 14:46 UTC (permalink / raw)
To: axboe, song, yukuai3, hch
Cc: linux-block, linux-kernel, linux-raid, martin.petersen,
John Garry
Set BLK_FEAT_ATOMIC_WRITES_STACKED to enable atomic writes. All other
stacked device request queue limits should automatically be set properly.
With regards to atomic write max bytes limit, this will be set at
hw_max_sectors and this is limited by the stripe width, which we want.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/md/raid0.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index baaf5f8b80ae..7049ec7fb8eb 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -384,6 +384,7 @@ static int raid0_set_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * mddev->raid_disks;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err) {
queue_limits_cancel_update(mddev->gendisk->queue);
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH v3 3/5] md/raid0: Atomic write support
2024-11-01 14:46 ` [PATCH v3 3/5] md/raid0: Atomic write support John Garry
@ 2024-11-04 11:06 ` Yu Kuai
0 siblings, 0 replies; 9+ messages in thread
From: Yu Kuai @ 2024-11-04 11:06 UTC (permalink / raw)
To: John Garry, axboe, song, hch
Cc: linux-block, linux-kernel, linux-raid, martin.petersen,
yukuai (C)
在 2024/11/01 22:46, John Garry 写道:
> Set BLK_FEAT_ATOMIC_WRITES_STACKED to enable atomic writes. All other
> stacked device request queue limits should automatically be set properly.
> With regards to atomic write max bytes limit, this will be set at
> hw_max_sectors and this is limited by the stripe width, which we want.
>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
> drivers/md/raid0.c | 1 +
> 1 file changed, 1 insertion(+)
>
LGTM
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index baaf5f8b80ae..7049ec7fb8eb 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -384,6 +384,7 @@ static int raid0_set_limits(struct mddev *mddev)
> lim.max_write_zeroes_sectors = mddev->chunk_sectors;
> lim.io_min = mddev->chunk_sectors << 9;
> lim.io_opt = lim.io_min * mddev->raid_disks;
> + lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
> err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
> if (err) {
> queue_limits_cancel_update(mddev->gendisk->queue);
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 4/5] md/raid1: Atomic write support
2024-11-01 14:46 [PATCH v3 0/5] RAID 0/1/10 atomic write support John Garry
` (2 preceding siblings ...)
2024-11-01 14:46 ` [PATCH v3 3/5] md/raid0: Atomic write support John Garry
@ 2024-11-01 14:46 ` John Garry
2024-11-04 1:11 ` kernel test robot
2024-11-01 14:46 ` [PATCH v3 5/5] md/raid10: " John Garry
4 siblings, 1 reply; 9+ messages in thread
From: John Garry @ 2024-11-01 14:46 UTC (permalink / raw)
To: axboe, song, yukuai3, hch
Cc: linux-block, linux-kernel, linux-raid, martin.petersen,
John Garry
Set BLK_FEAT_ATOMIC_WRITES_STACKED to enable atomic writes.
For an attempt to atomic write to a region which has bad blocks, error
the write as we just cannot do this. It is unlikely to find devices which
support atomic writes and bad blocks.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/md/raid1.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7e023e9303c8..795bd0c7caff 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1547,7 +1547,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
continue;
}
if (is_bad) {
- int good_sectors = first_bad - r1_bio->sector;
+ int good_sectors;
+
+ if (bio->bi_opf & REQ_ATOMIC) {
+ /* We just cannot atomically write this ... */
+ error = -EFAULT;
+ goto err_handle;
+ }
+
+ good_sectors = first_bad - r1_bio->sector;
if (good_sectors < max_sectors)
max_sectors = good_sectors;
}
@@ -1654,7 +1662,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
mbio->bi_end_io = raid1_end_write_request;
- mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA));
+ mbio->bi_opf = bio_op(bio) |
+ (bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC));
if (test_bit(FailFast, &rdev->flags) &&
!test_bit(WriteMostly, &rdev->flags) &&
conf->raid_disks - mddev->degraded > 1)
@@ -3221,6 +3230,7 @@ static int raid1_set_limits(struct mddev *mddev)
md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err) {
queue_limits_cancel_update(mddev->gendisk->queue);
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH v3 4/5] md/raid1: Atomic write support
2024-11-01 14:46 ` [PATCH v3 4/5] md/raid1: " John Garry
@ 2024-11-04 1:11 ` kernel test robot
0 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2024-11-04 1:11 UTC (permalink / raw)
To: John Garry, axboe, song, yukuai3, hch
Cc: oe-kbuild-all, linux-block, linux-kernel, linux-raid,
martin.petersen, John Garry
Hi John,
kernel test robot noticed the following build errors:
[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on linus/master v6.12-rc5 next-20241101]
[cannot apply to song-md/md-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/John-Garry/block-Add-extra-checks-in-blk_validate_atomic_write_limits/20241101-225310
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link: https://lore.kernel.org/r/20241101144616.497602-5-john.g.garry%40oracle.com
patch subject: [PATCH v3 4/5] md/raid1: Atomic write support
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241104/202411040805.745M3bMe-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411040805.745M3bMe-lkp@intel.com/
All errors (new ones prefixed by >>):
make[1]: Circular tools/testing/selftests/alsa/global-timer <- tools/testing/selftests/alsa/global-timer dependency dropped.
Makefile:60: warning: overriding recipe for target 'emit_tests'
../lib.mk:182: warning: ignoring old recipe for target 'emit_tests'
make[1]: *** No targets. Stop.
>> Makefile:47: *** Cannot find a vmlinux for VMLINUX_BTF at any of " ../../../../vmlinux /sys/kernel/btf/vmlinux /boot/vmlinux-5.9.0-2-amd64". Stop.
make[1]: *** No targets. Stop.
make[1]: *** No targets. Stop.
vim +47 Makefile
3812b8c5c5d527 Masahiro Yamada 2019-02-22 46
3812b8c5c5d527 Masahiro Yamada 2019-02-22 @47 # Do not use make's built-in rules and variables
3812b8c5c5d527 Masahiro Yamada 2019-02-22 48 # (this increases performance and avoids hard-to-debug behaviour)
3812b8c5c5d527 Masahiro Yamada 2019-02-22 49 MAKEFLAGS += -rR
3812b8c5c5d527 Masahiro Yamada 2019-02-22 50
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 5/5] md/raid10: Atomic write support
2024-11-01 14:46 [PATCH v3 0/5] RAID 0/1/10 atomic write support John Garry
` (3 preceding siblings ...)
2024-11-01 14:46 ` [PATCH v3 4/5] md/raid1: " John Garry
@ 2024-11-01 14:46 ` John Garry
4 siblings, 0 replies; 9+ messages in thread
From: John Garry @ 2024-11-01 14:46 UTC (permalink / raw)
To: axboe, song, yukuai3, hch
Cc: linux-block, linux-kernel, linux-raid, martin.petersen,
John Garry
Set BLK_FEAT_ATOMIC_WRITES_STACKED to enable atomic writes.
For an attempt to atomic write to a region which has bad blocks, error
the write as we just cannot do this. It is unlikely to find devices which
support atomic writes and bad blocks.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/md/raid10.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ccd95459b192..55175ad55525 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1255,6 +1255,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
const enum req_op op = bio_op(bio);
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
+ const blk_opf_t do_atomic = bio->bi_opf & REQ_ATOMIC;
unsigned long flags;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev;
@@ -1273,7 +1274,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
choose_data_offset(r10_bio, rdev));
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_opf = op | do_sync | do_fua;
+ mbio->bi_opf = op | do_sync | do_fua | do_atomic;
if (!replacement && test_bit(FailFast,
&conf->mirrors[devnum].rdev->flags)
&& enough(conf, devnum))
@@ -1472,7 +1473,15 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
continue;
}
if (is_bad) {
- int good_sectors = first_bad - dev_sector;
+ int good_sectors;
+
+ if (bio->bi_opf & REQ_ATOMIC) {
+ /* We just cannot atomically write this ... */
+ error = -EFAULT;
+ goto err_handle;
+ }
+
+ good_sectors = first_bad - dev_sector;
if (good_sectors < max_sectors)
max_sectors = good_sectors;
}
@@ -4029,6 +4038,7 @@ static int raid10_set_queue_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = 0;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
+ lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err) {
queue_limits_cancel_update(mddev->gendisk->queue);
--
2.31.1
^ permalink raw reply related [flat|nested] 9+ messages in thread