[PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking

public inbox for linux-raid@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking
@ 2026-04-22  2:33 Chen Cheng
  2026-04-22  2:33 ` [PATCH 2/4] md/raid10: prepare r10bio allocation width tracking Chen Cheng
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Chen Cheng @ 2026-04-22  2:33 UTC (permalink / raw)
  To: linux-raid, yukuai; +Cc: chencheng, chenchneg33

From: Chen Cheng <chencheng@fnnas.com>

raid10 reuses r10bio objects from both r10bio_pool and r10buf_pool. Track
the number of devs[] slots used by each request in the r10bio itself and
initialize it whenever one of these objects is reused.

No functional change yet. A later patch will use this width when reshape
changes conf->geo.raid_disks.
---
 drivers/md/raid10.c | 4 ++++
 drivers/md/raid10.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0653b5d8545a..e93933632893 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1540,6 +1540,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
 	r10_bio->sector = bio->bi_iter.bi_sector;
 	r10_bio->state = 0;
 	r10_bio->read_slot = -1;
+	r10_bio->used_nr_devs = conf->geo.raid_disks;
 	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
 			conf->geo.raid_disks);
 
@@ -1727,6 +1728,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 	r10_bio->mddev = mddev;
 	r10_bio->state = 0;
 	r10_bio->sectors = 0;
+	r10_bio->used_nr_devs = geo->raid_disks;
 	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
 	wait_blocked_dev(mddev, r10_bio);
 
@@ -3061,6 +3063,8 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
 	else
 		nalloc = 2; /* recovery */
 
+	r10bio->used_nr_devs = nalloc;
+
 	for (i = 0; i < nalloc; i++) {
 		bio = r10bio->devs[i].bio;
 		rp = bio->bi_private;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index ec79d87fb92f..92e8743023e6 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -127,6 +127,7 @@ struct r10bio {
 	 * if the IO is in READ direction, then this is where we read
 	 */
 	int			read_slot;
+	unsigned int		used_nr_devs;
 
 	struct list_head	retry_list;
 	/*
-- 
2.53.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/4] md/raid10: prepare r10bio allocation width tracking
  2026-04-22  2:33 [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Chen Cheng
@ 2026-04-22  2:33 ` Chen Cheng
  2026-04-22  2:33 ` [PATCH 3/4] md/raid10: fix r10bio devs overflow across reshape Chen Cheng
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Chen Cheng @ 2026-04-22  2:33 UTC (permalink / raw)
  To: linux-raid, yukuai; +Cc: chencheng, chenchneg33

From: Chen Cheng <chencheng@fnnas.com>

Record how many devs[] slots each r10bio was allocated with.

Keep the active r10bio pool in a separate object that carries its width.
This keeps the allocation width separate from the per-request width stored
in used_nr_devs and prepares the pool for replacement during reshape.
---
 drivers/md/raid10.c | 40 +++++++++++++++++++++++++++-------------
 drivers/md/raid10.h |  8 +++++++-
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e93933632893..b447903fbdc6 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -103,14 +103,22 @@ static inline struct r10bio *get_resync_r10bio(struct bio *bio)
 	return get_resync_pages(bio)->raid_bio;
 }
 
-static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
+static void *r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
-	struct r10conf *conf = data;
-	int size = offsetof(struct r10bio, devs[conf->geo.raid_disks]);
+	struct r10bio_pool *pool = data;
+	int size = offsetof(struct r10bio, devs[pool->nr_devs]);
+	struct r10bio *r10_bio = kzalloc(size, gfp_flags);
+
+	if (r10_bio)
+		r10_bio->alloc_nr_devs = pool->nr_devs;
+	return r10_bio;
+}
 
-	/* allocate a r10bio with room for raid_disks entries in the
-	 * bios array */
-	return kzalloc(size, gfp_flags);
+static int init_r10bio_pool(struct r10bio_pool *pool, unsigned int nr_devs)
+{
+	pool->nr_devs = nr_devs;
+	return mempool_init(&pool->pool, NR_RAID_BIOS, r10bio_pool_alloc,
+			    rbio_pool_free, pool);
 }
 
 #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
@@ -137,7 +145,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 	int nalloc, nalloc_rp;
 	struct resync_pages *rps;
 
-	r10_bio = r10bio_pool_alloc(gfp_flags, conf);
+	r10_bio = r10bio_pool_alloc(gfp_flags, conf->r10bio_pool);
 	if (!r10_bio)
 		return NULL;
 
@@ -277,7 +285,7 @@ static void free_r10bio(struct r10bio *r10_bio)
 	struct r10conf *conf = r10_bio->mddev->private;
 
 	put_all_bios(conf, r10_bio);
-	mempool_free(r10_bio, &conf->r10bio_pool);
+	mempool_free(r10_bio, &conf->r10bio_pool->pool);
 }
 
 static void put_buf(struct r10bio *r10_bio)
@@ -1531,7 +1539,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
 	struct r10conf *conf = mddev->private;
 	struct r10bio *r10_bio;
 
-	r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
+	r10_bio = mempool_alloc(&conf->r10bio_pool->pool, GFP_NOIO);
 
 	r10_bio->master_bio = bio;
 	r10_bio->sectors = sectors;
@@ -1724,7 +1732,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 				(last_stripe_index << geo->chunk_shift);
 
 retry_discard:
-	r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
+	r10_bio = mempool_alloc(&conf->r10bio_pool->pool, GFP_NOIO);
 	r10_bio->mddev = mddev;
 	r10_bio->state = 0;
 	r10_bio->sectors = 0;
@@ -3825,7 +3833,10 @@ static void raid10_free_conf(struct r10conf *conf)
 	if (!conf)
 		return;
 
-	mempool_exit(&conf->r10bio_pool);
+	if (conf->r10bio_pool) {
+		mempool_exit(&conf->r10bio_pool->pool);
+		kfree(conf->r10bio_pool);
+	}
 	kfree(conf->mirrors);
 	kfree(conf->mirrors_old);
 	kfree(conf->mirrors_new);
@@ -3870,10 +3881,13 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 	if (!conf->tmppage)
 		goto out;
 
+	conf->r10bio_pool = kzalloc_obj(struct r10bio_pool);
+	if (!conf->r10bio_pool)
+		goto out;
+
 	conf->geo = geo;
 	conf->copies = copies;
-	err = mempool_init(&conf->r10bio_pool, NR_RAID_BIOS, r10bio_pool_alloc,
-			   rbio_pool_free, conf);
+	err = init_r10bio_pool(conf->r10bio_pool, conf->geo.raid_disks);
 	if (err)
 		goto out;
 
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 92e8743023e6..8fa4e54c444c 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -20,6 +20,11 @@ struct raid10_info {
 	sector_t	head_position;
 };
 
+struct r10bio_pool {
+	mempool_t	pool;
+	unsigned int	nr_devs;
+};
+
 struct r10conf {
 	struct mddev		*mddev;
 	struct raid10_info	*mirrors;
@@ -87,7 +92,7 @@ struct r10conf {
 						   */
 	wait_queue_head_t	wait_barrier;
 
-	mempool_t		r10bio_pool;
+	struct r10bio_pool	*r10bio_pool;
 	mempool_t		r10buf_pool;
 	struct page		*tmppage;
 	struct bio_set		bio_split;
@@ -128,6 +133,7 @@ struct r10bio {
 	 */
 	int			read_slot;
 	unsigned int		used_nr_devs;
+	unsigned int		alloc_nr_devs;
 
 	struct list_head	retry_list;
 	/*
-- 
2.53.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/4] md/raid10: fix r10bio devs overflow across reshape
  2026-04-22  2:33 [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Chen Cheng
  2026-04-22  2:33 ` [PATCH 2/4] md/raid10: prepare r10bio allocation width tracking Chen Cheng
@ 2026-04-22  2:33 ` Chen Cheng
  2026-04-22  2:33 ` [PATCH 4/4] md/raid10: reset read_slot when reusing r10bio for discard Chen Cheng
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Chen Cheng @ 2026-04-22  2:33 UTC (permalink / raw)
  To: linux-raid, yukuai; +Cc: chencheng, chenchneg33

From: Chen Cheng <chencheng@fnnas.com>

A 4-disk to 5-disk raid10 reshape can complete or free an r10bio that was
allocated before the geometry switch.

The failure was reproduced with a simple write workload while reshaping a
raid10 array from 4 disks to 5 disks, e.g.:

  mdadm -C /dev/md777 -l10 -n4 /dev/sda /dev/sdb /dev/sdc /dev/sdd
  mkfs.ext4 /dev/md777
  mount /dev/md777 /mnt/test
  fsstress -d /mnt/test -n 24000 -p 8 -l 24 &
  mdadm /dev/md777 --add /dev/sde
  mdadm --grow /dev/md777 --raid-devices=5 \
    --backup-file=/tmp/md-reshape-backup

Without this patch, the sequence above can trigger:

  BUG: KASAN: slab-out-of-bounds in free_r10bio+0x1c4/0x260 [raid10]
  Read of size 8 at addr ffff00008c2dfac8 by task ksoftirqd/0/15
  free_r10bio
  raid_end_bio_io
  one_write_done
  raid10_end_write_request

The buggy object was 200 bytes long, which matches an r10bio with space for
only four devs[] entries. However, put_all_bios() and find_bio_disk() walk
r10_bio->devs[] using the current conf->geo.raid_disks value. Once reshape
switches conf->geo.raid_disks from 4 to 5, an old 4-slot r10bio can be
completed or freed as if it had 5 slots, and the walk overruns devs[4].

The same stale-width mismatch can also surface during a 5-disk to 4-disk
reshape.

The same transition also leaves stale-width objects in the active r10bio
pool, so new requests can reuse a 4-slot object after reshape starts unless
the pool is replaced for the new geometry.

Fix this by recording the actual devs[] slot count in each r10bio and using
that count when scanning or freeing the object. Also replace the active
r10bio pool with one sized for the new geometry before reshape switches
layouts. Old-width r10bio objects are freed directly instead of being
returned to a pool that now expects a different width.

A/B validation:

- Without this patch, the 4-disk to 5-disk reshape test triggered the
  KASAN report.
- With this patch, neither the 4-disk to 5-disk nor the 5-disk to 4-disk
  reshape test triggers KASAN.
---
 drivers/md/raid10.c | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b447903fbdc6..3edde440623a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -234,6 +234,30 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 	return NULL;
 }
 
+static int reinit_r10bio_pool(struct r10conf *conf, unsigned int nr_devs)
+{
+	struct r10bio_pool *new_pool, *old_pool = conf->r10bio_pool;
+	int ret;
+
+	if (old_pool->nr_devs == nr_devs)
+		return 0;
+
+	new_pool = kzalloc_obj(struct r10bio_pool);
+	if (!new_pool)
+		return -ENOMEM;
+
+	ret = init_r10bio_pool(new_pool, nr_devs);
+	if (ret) {
+		kfree(new_pool);
+		return ret;
+	}
+
+	conf->r10bio_pool = new_pool;
+	mempool_exit(&old_pool->pool);
+	kfree(old_pool);
+	return 0;
+}
+
 static void r10buf_pool_free(void *__r10_bio, void *data)
 {
 	struct r10conf *conf = data;
@@ -268,7 +292,7 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
 {
 	int i;
 
-	for (i = 0; i < conf->geo.raid_disks; i++) {
+	for (i = 0; i < r10_bio->used_nr_devs; i++) {
 		struct bio **bio = & r10_bio->devs[i].bio;
 		if (!BIO_SPECIAL(*bio))
 			bio_put(*bio);
@@ -285,6 +309,10 @@ static void free_r10bio(struct r10bio *r10_bio)
 	struct r10conf *conf = r10_bio->mddev->private;
 
 	put_all_bios(conf, r10_bio);
+	if (r10_bio->alloc_nr_devs != conf->r10bio_pool->nr_devs) {
+		rbio_pool_free(r10_bio, conf);
+		return;
+	}
 	mempool_free(r10_bio, &conf->r10bio_pool->pool);
 }
 
@@ -365,7 +393,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
 	int slot;
 	int repl = 0;
 
-	for (slot = 0; slot < conf->geo.raid_disks; slot++) {
+	for (slot = 0; slot < r10_bio->used_nr_devs; slot++) {
 		if (r10_bio->devs[slot].bio == bio)
 			break;
 		if (r10_bio->devs[slot].repl_bio == bio) {
@@ -4416,6 +4444,11 @@ static int raid10_start_reshape(struct mddev *mddev)
 	if (spares < mddev->delta_disks)
 		return -EINVAL;
 
+	raise_barrier(conf, 0);
+	ret = reinit_r10bio_pool(conf, new.raid_disks);
+	if (ret)
+		goto out_lower_barrier;
+
 	conf->offset_diff = min_offset_diff;
 	spin_lock_irq(&conf->device_lock);
 	if (conf->mirrors_new) {
@@ -4433,6 +4466,7 @@ static int raid10_start_reshape(struct mddev *mddev)
 		sector_t size = raid10_size(mddev, 0, 0);
 		if (size < mddev->array_sectors) {
 			spin_unlock_irq(&conf->device_lock);
+			lower_barrier(conf);
 			pr_warn("md/raid10:%s: array size must be reduce before number of disks\n",
 				mdname(mddev));
 			return -EINVAL;
@@ -4443,6 +4477,7 @@ static int raid10_start_reshape(struct mddev *mddev)
 		conf->reshape_progress = 0;
 	conf->reshape_safe = conf->reshape_progress;
 	spin_unlock_irq(&conf->device_lock);
+	lower_barrier(conf);
 
 	if (mddev->delta_disks && mddev->bitmap) {
 		struct mdp_superblock_1 *sb = NULL;
@@ -4527,6 +4562,10 @@ static int raid10_start_reshape(struct mddev *mddev)
 	md_new_event();
 	return 0;
 
+out_lower_barrier:
+	lower_barrier(conf);
+	return ret;
+
 abort:
 	mddev->recovery = 0;
 	spin_lock_irq(&conf->device_lock);
-- 
2.53.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] md/raid10: reset read_slot when reusing r10bio for discard
  2026-04-22  2:33 [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Chen Cheng
  2026-04-22  2:33 ` [PATCH 2/4] md/raid10: prepare r10bio allocation width tracking Chen Cheng
  2026-04-22  2:33 ` [PATCH 3/4] md/raid10: fix r10bio devs overflow across reshape Chen Cheng
@ 2026-04-22  2:33 ` Chen Cheng
  2026-04-22  6:40 ` [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Paul Menzel
  2026-04-24  7:04 ` Yu Kuai
  4 siblings, 0 replies; 7+ messages in thread
From: Chen Cheng @ 2026-04-22  2:33 UTC (permalink / raw)
  To: linux-raid, yukuai; +Cc: chencheng, chenchneg33

From: Chen Cheng <chencheng@fnnas.com>

raid10_handle_discard() reuses r10bio objects from r10bio_pool.

put_all_bios() always drops devs[i].bio, but it only drops
devs[i].repl_bio when r10_bio->read_slot < 0. If discard reuses an
r10bio that was previously used for a read, read_slot can still be
non-negative, and discard cleanup can skip bio_put() on repl_bio.

Reset read_slot to -1 when preparing an r10bio for discard so the
replacement bio is always released correctly.
---
 drivers/md/raid10.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3edde440623a..19d7f6f62beb 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1764,6 +1764,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 	r10_bio->mddev = mddev;
 	r10_bio->state = 0;
 	r10_bio->sectors = 0;
+	r10_bio->read_slot = -1;
 	r10_bio->used_nr_devs = geo->raid_disks;
 	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
 	wait_blocked_dev(mddev, r10_bio);
-- 
2.53.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking
  2026-04-22  2:33 [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Chen Cheng
                   ` (2 preceding siblings ...)
  2026-04-22  2:33 ` [PATCH 4/4] md/raid10: reset read_slot when reusing r10bio for discard Chen Cheng
@ 2026-04-22  6:40 ` Paul Menzel
  2026-04-24  2:11   ` Chen Cheng
  2026-04-24  7:04 ` Yu Kuai
  4 siblings, 1 reply; 7+ messages in thread
From: Paul Menzel @ 2026-04-22  6:40 UTC (permalink / raw)
  To: Chen Cheng; +Cc: linux-raid, yukuai, chenchneg33

Dear Cheng,


Am 22.04.26 um 04:33 schrieb Chen Cheng:
> From: Chen Cheng <chencheng@fnnas.com>
> 
> raid10 reuses r10bio objects from both r10bio_pool and r10buf_pool. Track
> the number of devs[] slots used by each request in the r10bio itself and
> initialize it whenever one of these objects is reused.
> 
> No functional change yet. A later patch will use this width when reshape
> changes conf->geo.raid_disks.

Your Signed-off-by: line is missing.

> ---
>   drivers/md/raid10.c | 4 ++++
>   drivers/md/raid10.h | 1 +
>   2 files changed, 5 insertions(+)
> 
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 0653b5d8545a..e93933632893 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1540,6 +1540,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
>   	r10_bio->sector = bio->bi_iter.bi_sector;
>   	r10_bio->state = 0;
>   	r10_bio->read_slot = -1;
> +	r10_bio->used_nr_devs = conf->geo.raid_disks;
>   	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
>   			conf->geo.raid_disks);
>   
> @@ -1727,6 +1728,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
>   	r10_bio->mddev = mddev;
>   	r10_bio->state = 0;
>   	r10_bio->sectors = 0;
> +	r10_bio->used_nr_devs = geo->raid_disks;
>   	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
>   	wait_blocked_dev(mddev, r10_bio);
>   
> @@ -3061,6 +3063,8 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
>   	else
>   		nalloc = 2; /* recovery */
>   
> +	r10bio->used_nr_devs = nalloc;
> +
>   	for (i = 0; i < nalloc; i++) {
>   		bio = r10bio->devs[i].bio;
>   		rp = bio->bi_private;
> diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
> index ec79d87fb92f..92e8743023e6 100644
> --- a/drivers/md/raid10.h
> +++ b/drivers/md/raid10.h
> @@ -127,6 +127,7 @@ struct r10bio {
>   	 * if the IO is in READ direction, then this is where we read
>   	 */
>   	int			read_slot;
> +	unsigned int		used_nr_devs;

Most entries have a comment describing the use. Maybe add one too, or at 
least a blank line, so it’s clear that the existing comment is just for 
`read_slot`?

>   
>   	struct list_head	retry_list;
>   	/*

 From a performance and resource usage point of view, will increasing 
the struct have a negative impact?

The diff looks good.

Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>


Kind regards,

Paul

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking
  2026-04-22  6:40 ` [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Paul Menzel
@ 2026-04-24  2:11   ` Chen Cheng
  0 siblings, 0 replies; 7+ messages in thread
From: Chen Cheng @ 2026-04-24  2:11 UTC (permalink / raw)
  To: Paul Menzel; +Cc: linux-raid, yukuai, chenchneg33

On Wed, Apr 22, 2026 at 08:40:42AM +0200, Paul Menzel wrote:

Hi Paul,

> Dear Cheng,
>
>
> Am 22.04.26 um 04:33 schrieb Chen Cheng:
> > From: Chen Cheng <chencheng@fnnas.com>
> >
> > raid10 reuses r10bio objects from both r10bio_pool and r10buf_pool. Track
> > the number of devs[] slots used by each request in the r10bio itself and
> > initialize it whenever one of these objects is reused.
> >
> > No functional change yet. A later patch will use this width when reshape
> > changes conf->geo.raid_disks.
>
> Your Signed-off-by: line is missing.

Yes, i missed it, thanks for point-out;

>
> > ---
> >   drivers/md/raid10.c | 4 ++++
> >   drivers/md/raid10.h | 1 +
> >   2 files changed, 5 insertions(+)
> >
> > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> > index 0653b5d8545a..e93933632893 100644
> > --- a/drivers/md/raid10.c
> > +++ b/drivers/md/raid10.c
> > @@ -1540,6 +1540,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
> >     r10_bio->sector = bio->bi_iter.bi_sector;
> >     r10_bio->state = 0;
> >     r10_bio->read_slot = -1;
> > +   r10_bio->used_nr_devs = conf->geo.raid_disks;
> >     memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
> >                     conf->geo.raid_disks);
> > @@ -1727,6 +1728,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
> >     r10_bio->mddev = mddev;
> >     r10_bio->state = 0;
> >     r10_bio->sectors = 0;
> > +   r10_bio->used_nr_devs = geo->raid_disks;
> >     memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
> >     wait_blocked_dev(mddev, r10_bio);
> > @@ -3061,6 +3063,8 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
> >     else
> >             nalloc = 2; /* recovery */
> > +   r10bio->used_nr_devs = nalloc;
> > +
> >     for (i = 0; i < nalloc; i++) {
> >             bio = r10bio->devs[i].bio;
> >             rp = bio->bi_private;
> > diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
> > index ec79d87fb92f..92e8743023e6 100644
> > --- a/drivers/md/raid10.h
> > +++ b/drivers/md/raid10.h
> > @@ -127,6 +127,7 @@ struct r10bio {
> >      * if the IO is in READ direction, then this is where we read
> >      */
> >     int                     read_slot;
> > +   unsigned int            used_nr_devs;
>
> Most entries have a comment describing the use. Maybe add one too, or at
> least a blank line, so it’s clear that the existing comment is just for
> `read_slot`?

Agreed.

>
> >     struct list_head        retry_list;
> >     /*
>
> From a performance and resource usage point of view, will increasing the
> struct have a negative impact?

On 64-bit platform, doesn't have negative resource usage impact,
the new field fits into the existing padding after read_slot, so
offsetof(struct r10bio, devs) stays unchanged;

On 32-bit platform, may increase by 4 bytes per r10bio, but that's
negligible compared with the bios/pages allocated for each request;


No negative performance impact, cause bottleneck is IO, and
the IO path has no changed;

>
> The diff looks good.
>
> Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
>

Thanks for review;

>
> Kind regards,
>
> Paul


Thanks,
Cheng

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking
  2026-04-22  2:33 [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Chen Cheng
                   ` (3 preceding siblings ...)
  2026-04-22  6:40 ` [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Paul Menzel
@ 2026-04-24  7:04 ` Yu Kuai
  4 siblings, 0 replies; 7+ messages in thread
From: Yu Kuai @ 2026-04-24  7:04 UTC (permalink / raw)
  To: Chen Cheng, linux-raid, yukuai; +Cc: chenchneg33

Hi,

在 2026/4/22 10:33, Chen Cheng 写道:
> From: Chen Cheng <chencheng@fnnas.com>
>
> raid10 reuses r10bio objects from both r10bio_pool and r10buf_pool. Track
> the number of devs[] slots used by each request in the r10bio itself and
> initialize it whenever one of these objects is reused.
>
> No functional change yet. A later patch will use this width when reshape
> changes conf->geo.raid_disks.
> ---
>   drivers/md/raid10.c | 4 ++++
>   drivers/md/raid10.h | 1 +
>   2 files changed, 5 insertions(+)

For patchset please also add a patch 0.

This solution looks incorrect. The usage of r10bio_pool() is wrong in the first
place. Noted for mempool, it will preallocate elements and such elements can be
reused in following mempool allocation. Which means:

1) preallocate elements with old raid disks;
2) rehsape update raid disks;
3) allocate new r10bio, elements from 1) can be used.

The solution can refer to raid1.

1) convert mempool to fixed size;
2) during reshape, suspend/quiesce the array first to wait for all prallocated
r10bios to return first.

>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 0653b5d8545a..e93933632893 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1540,6 +1540,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
>   	r10_bio->sector = bio->bi_iter.bi_sector;
>   	r10_bio->state = 0;
>   	r10_bio->read_slot = -1;
> +	r10_bio->used_nr_devs = conf->geo.raid_disks;
>   	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
>   			conf->geo.raid_disks);
>   
> @@ -1727,6 +1728,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
>   	r10_bio->mddev = mddev;
>   	r10_bio->state = 0;
>   	r10_bio->sectors = 0;
> +	r10_bio->used_nr_devs = geo->raid_disks;
>   	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
>   	wait_blocked_dev(mddev, r10_bio);
>   
> @@ -3061,6 +3063,8 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
>   	else
>   		nalloc = 2; /* recovery */
>   
> +	r10bio->used_nr_devs = nalloc;
> +
>   	for (i = 0; i < nalloc; i++) {
>   		bio = r10bio->devs[i].bio;
>   		rp = bio->bi_private;
> diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
> index ec79d87fb92f..92e8743023e6 100644
> --- a/drivers/md/raid10.h
> +++ b/drivers/md/raid10.h
> @@ -127,6 +127,7 @@ struct r10bio {
>   	 * if the IO is in READ direction, then this is where we read
>   	 */
>   	int			read_slot;
> +	unsigned int		used_nr_devs;
>   
>   	struct list_head	retry_list;
>   	/*

-- 
Thansk,
Kuai

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-04-24  7:04 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-22  2:33 [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Chen Cheng
2026-04-22  2:33 ` [PATCH 2/4] md/raid10: prepare r10bio allocation width tracking Chen Cheng
2026-04-22  2:33 ` [PATCH 3/4] md/raid10: fix r10bio devs overflow across reshape Chen Cheng
2026-04-22  2:33 ` [PATCH 4/4] md/raid10: reset read_slot when reusing r10bio for discard Chen Cheng
2026-04-22  6:40 ` [PATCH 1/4] md/raid10: prepare per-r10bio dev slot tracking Paul Menzel
2026-04-24  2:11   ` Chen Cheng
2026-04-24  7:04 ` Yu Kuai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox