Linux RAID subsystem development
 help / color / mirror / Atom feed
* Re: [PATCH] md/raid5: split reshape bios before bitmap accounting
From: kernel test robot @ 2026-06-06  2:15 UTC (permalink / raw)
  To: Yu Kuai, Song Liu, Yu Kuai
  Cc: oe-kbuild-all, Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-21-yukuai@kernel.org>

Hi Yu,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v7.1-rc6 next-20260605]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Yu-Kuai/md-raid5-split-reshape-bios-before-bitmap-accounting/20260605-214540
base:   linus/master
patch link:    https://lore.kernel.org/r/20260605091527.2463539-21-yukuai%40kernel.org
patch subject: [PATCH] md/raid5: split reshape bios before bitmap accounting
config: x86_64-rhel-9.4 (https://download.01.org/0day-ci/archive/20260606/202606060443.UZIDPAEF-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260606/202606060443.UZIDPAEF-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202606060443.UZIDPAEF-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/md/raid5.c: In function 'raid5_make_request':
>> drivers/md/raid5.c:6126:14: error: implicit declaration of function 'mddev_bio_split_at_reshape_offset' [-Wimplicit-function-declaration]
    6126 |         bi = mddev_bio_split_at_reshape_offset(mddev, bi, NULL,
         |              ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/md/raid5.c:6126:12: error: assignment to 'struct bio *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
    6126 |         bi = mddev_bio_split_at_reshape_offset(mddev, bi, NULL,
         |            ^


vim +/mddev_bio_split_at_reshape_offset +6126 drivers/md/raid5.c

  6083	
  6084	static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
  6085	{
  6086		DEFINE_WAIT_FUNC(wait, woken_wake_function);
  6087		struct r5conf *conf = mddev->private;
  6088		const int rw = bio_data_dir(bi);
  6089		struct stripe_request_ctx *ctx;
  6090		sector_t logical_sector;
  6091		enum stripe_result res;
  6092		int s, stripe_cnt;
  6093		bool on_wq;
  6094	
  6095		if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
  6096			int ret = log_handle_flush_request(conf, bi);
  6097	
  6098			if (ret == 0)
  6099				return true;
  6100			if (ret == -ENODEV) {
  6101				if (md_flush_request(mddev, bi))
  6102					return true;
  6103			}
  6104			/* ret == -EAGAIN, fallback */
  6105		}
  6106	
  6107		md_write_start(mddev, bi);
  6108		/*
  6109		 * If array is degraded, better not do chunk aligned read because
  6110		 * later we might have to read it again in order to reconstruct
  6111		 * data on failed drives.
  6112		 */
  6113		if (rw == READ && mddev->degraded == 0 &&
  6114		    mddev->reshape_position == MaxSector) {
  6115			bi = chunk_aligned_read(mddev, bi);
  6116			if (!bi)
  6117				return true;
  6118		}
  6119	
  6120		if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) {
  6121			make_discard_request(mddev, bi);
  6122			md_write_end(mddev);
  6123			return true;
  6124		}
  6125	
> 6126		bi = mddev_bio_split_at_reshape_offset(mddev, bi, NULL,
  6127						       &conf->bio_split);
  6128		if (!bi) {
  6129			if (rw == WRITE)
  6130				md_write_end(mddev);
  6131			return true;
  6132		}
  6133	
  6134		logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
  6135		bi->bi_next = NULL;
  6136	
  6137		ctx = mempool_alloc(conf->ctx_pool, GFP_NOIO);
  6138		memset(ctx, 0, conf->ctx_size);
  6139		ctx->first_sector = logical_sector;
  6140		ctx->last_sector = bio_end_sector(bi);
  6141		/*
  6142		 * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
  6143		 * we need to flush journal device
  6144		 */
  6145		if (unlikely(bi->bi_opf & REQ_PREFLUSH))
  6146			ctx->do_flush = true;
  6147	
  6148		stripe_cnt = DIV_ROUND_UP_SECTOR_T(ctx->last_sector - logical_sector,
  6149						   RAID5_STRIPE_SECTORS(conf));
  6150		bitmap_set(ctx->sectors_to_do, 0, stripe_cnt);
  6151	
  6152		pr_debug("raid456: %s, logical %llu to %llu\n", __func__,
  6153			 bi->bi_iter.bi_sector, ctx->last_sector);
  6154	
  6155		/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
  6156		if ((bi->bi_opf & REQ_NOWAIT) &&
  6157		    get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
  6158			bio_wouldblock_error(bi);
  6159			if (rw == WRITE)
  6160				md_write_end(mddev);
  6161			mempool_free(ctx, conf->ctx_pool);
  6162			return true;
  6163		}
  6164		md_account_bio(mddev, &bi);
  6165	
  6166		/*
  6167		 * Lets start with the stripe with the lowest chunk offset in the first
  6168		 * chunk. That has the best chances of creating IOs adjacent to
  6169		 * previous IOs in case of sequential IO and thus creates the most
  6170		 * sequential IO pattern. We don't bother with the optimization when
  6171		 * reshaping as the performance benefit is not worth the complexity.
  6172		 */
  6173		if (likely(conf->reshape_progress == MaxSector)) {
  6174			logical_sector = raid5_bio_lowest_chunk_sector(conf, bi);
  6175			on_wq = false;
  6176		} else {
  6177			add_wait_queue(&conf->wait_for_reshape, &wait);
  6178			on_wq = true;
  6179		}
  6180		s = (logical_sector - ctx->first_sector) >> RAID5_STRIPE_SHIFT(conf);
  6181	
  6182		while (1) {
  6183			res = make_stripe_request(mddev, conf, ctx, logical_sector,
  6184						  bi);
  6185			if (res == STRIPE_FAIL || res == STRIPE_WAIT_RESHAPE)
  6186				break;
  6187	
  6188			if (res == STRIPE_RETRY)
  6189				continue;
  6190	
  6191			if (res == STRIPE_SCHEDULE_AND_RETRY) {
  6192				WARN_ON_ONCE(!on_wq);
  6193				/*
  6194				 * Must release the reference to batch_last before
  6195				 * scheduling and waiting for work to be done,
  6196				 * otherwise the batch_last stripe head could prevent
  6197				 * raid5_activate_delayed() from making progress
  6198				 * and thus deadlocking.
  6199				 */
  6200				if (ctx->batch_last) {
  6201					raid5_release_stripe(ctx->batch_last);
  6202					ctx->batch_last = NULL;
  6203				}
  6204	
  6205				wait_woken(&wait, TASK_UNINTERRUPTIBLE,
  6206					   MAX_SCHEDULE_TIMEOUT);
  6207				continue;
  6208			}
  6209	
  6210			s = find_next_bit_wrap(ctx->sectors_to_do, stripe_cnt, s);
  6211			if (s == stripe_cnt)
  6212				break;
  6213	
  6214			logical_sector = ctx->first_sector +
  6215				(s << RAID5_STRIPE_SHIFT(conf));
  6216		}
  6217		if (unlikely(on_wq))
  6218			remove_wait_queue(&conf->wait_for_reshape, &wait);
  6219	
  6220		if (ctx->batch_last)
  6221			raid5_release_stripe(ctx->batch_last);
  6222	
  6223		if (rw == WRITE)
  6224			md_write_end(mddev);
  6225	
  6226		mempool_free(ctx, conf->ctx_pool);
  6227		if (res == STRIPE_WAIT_RESHAPE) {
  6228			DECLARE_COMPLETION_ONSTACK(done);
  6229			WRITE_ONCE(bi->bi_private, &done);
  6230	
  6231			bio_endio(bi);
  6232	
  6233			wait_for_completion(&done);
  6234			return false;
  6235		}
  6236	
  6237		bio_endio(bi);
  6238		return true;
  6239	}
  6240	

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply

* Re: [PATCH] md/raid5: split reshape bios before bitmap accounting
From: kernel test robot @ 2026-06-05 17:27 UTC (permalink / raw)
  To: Yu Kuai, Song Liu, Yu Kuai
  Cc: llvm, oe-kbuild-all, Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-21-yukuai@kernel.org>

Hi Yu,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.16-rc1 next-20260605]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Yu-Kuai/md-raid5-split-reshape-bios-before-bitmap-accounting/20260605-173848
base:   linus/master
patch link:    https://lore.kernel.org/r/20260605091527.2463539-21-yukuai%40kernel.org
patch subject: [PATCH] md/raid5: split reshape bios before bitmap accounting
config: x86_64-kexec (https://download.01.org/0day-ci/archive/20260605/202606051933.yVCn4DIj-lkp@intel.com/config)
compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project f43d6834093b19baf79beda8c0337ab020ac5f17)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260605/202606051933.yVCn4DIj-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202606051933.yVCn4DIj-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/md/raid5.c:4221:7: warning: variable 'qread' set but not used [-Wunused-but-set-variable]
    4221 |                 int qread =0;
         |                     ^
>> drivers/md/raid5.c:6126:7: error: call to undeclared function 'mddev_bio_split_at_reshape_offset'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
    6126 |         bi = mddev_bio_split_at_reshape_offset(mddev, bi, NULL,
         |              ^
>> drivers/md/raid5.c:6126:5: error: incompatible integer to pointer conversion assigning to 'struct bio *' from 'int' [-Wint-conversion]
    6126 |         bi = mddev_bio_split_at_reshape_offset(mddev, bi, NULL,
         |            ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    6127 |                                                &conf->bio_split);
         |                                                ~~~~~~~~~~~~~~~~~
   1 warning and 2 errors generated.


vim +/mddev_bio_split_at_reshape_offset +6126 drivers/md/raid5.c

  6083	
  6084	static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
  6085	{
  6086		DEFINE_WAIT_FUNC(wait, woken_wake_function);
  6087		struct r5conf *conf = mddev->private;
  6088		const int rw = bio_data_dir(bi);
  6089		struct stripe_request_ctx *ctx;
  6090		sector_t logical_sector;
  6091		enum stripe_result res;
  6092		int s, stripe_cnt;
  6093		bool on_wq;
  6094	
  6095		if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
  6096			int ret = log_handle_flush_request(conf, bi);
  6097	
  6098			if (ret == 0)
  6099				return true;
  6100			if (ret == -ENODEV) {
  6101				if (md_flush_request(mddev, bi))
  6102					return true;
  6103			}
  6104			/* ret == -EAGAIN, fallback */
  6105		}
  6106	
  6107		md_write_start(mddev, bi);
  6108		/*
  6109		 * If array is degraded, better not do chunk aligned read because
  6110		 * later we might have to read it again in order to reconstruct
  6111		 * data on failed drives.
  6112		 */
  6113		if (rw == READ && mddev->degraded == 0 &&
  6114		    mddev->reshape_position == MaxSector) {
  6115			bi = chunk_aligned_read(mddev, bi);
  6116			if (!bi)
  6117				return true;
  6118		}
  6119	
  6120		if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) {
  6121			make_discard_request(mddev, bi);
  6122			md_write_end(mddev);
  6123			return true;
  6124		}
  6125	
> 6126		bi = mddev_bio_split_at_reshape_offset(mddev, bi, NULL,
  6127						       &conf->bio_split);
  6128		if (!bi) {
  6129			if (rw == WRITE)
  6130				md_write_end(mddev);
  6131			return true;
  6132		}
  6133	
  6134		logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
  6135		bi->bi_next = NULL;
  6136	
  6137		ctx = mempool_alloc(conf->ctx_pool, GFP_NOIO);
  6138		memset(ctx, 0, conf->ctx_size);
  6139		ctx->first_sector = logical_sector;
  6140		ctx->last_sector = bio_end_sector(bi);
  6141		/*
  6142		 * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
  6143		 * we need to flush journal device
  6144		 */
  6145		if (unlikely(bi->bi_opf & REQ_PREFLUSH))
  6146			ctx->do_flush = true;
  6147	
  6148		stripe_cnt = DIV_ROUND_UP_SECTOR_T(ctx->last_sector - logical_sector,
  6149						   RAID5_STRIPE_SECTORS(conf));
  6150		bitmap_set(ctx->sectors_to_do, 0, stripe_cnt);
  6151	
  6152		pr_debug("raid456: %s, logical %llu to %llu\n", __func__,
  6153			 bi->bi_iter.bi_sector, ctx->last_sector);
  6154	
  6155		/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
  6156		if ((bi->bi_opf & REQ_NOWAIT) &&
  6157		    get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
  6158			bio_wouldblock_error(bi);
  6159			if (rw == WRITE)
  6160				md_write_end(mddev);
  6161			mempool_free(ctx, conf->ctx_pool);
  6162			return true;
  6163		}
  6164		md_account_bio(mddev, &bi);
  6165	
  6166		/*
  6167		 * Lets start with the stripe with the lowest chunk offset in the first
  6168		 * chunk. That has the best chances of creating IOs adjacent to
  6169		 * previous IOs in case of sequential IO and thus creates the most
  6170		 * sequential IO pattern. We don't bother with the optimization when
  6171		 * reshaping as the performance benefit is not worth the complexity.
  6172		 */
  6173		if (likely(conf->reshape_progress == MaxSector)) {
  6174			logical_sector = raid5_bio_lowest_chunk_sector(conf, bi);
  6175			on_wq = false;
  6176		} else {
  6177			add_wait_queue(&conf->wait_for_reshape, &wait);
  6178			on_wq = true;
  6179		}
  6180		s = (logical_sector - ctx->first_sector) >> RAID5_STRIPE_SHIFT(conf);
  6181	
  6182		while (1) {
  6183			res = make_stripe_request(mddev, conf, ctx, logical_sector,
  6184						  bi);
  6185			if (res == STRIPE_FAIL || res == STRIPE_WAIT_RESHAPE)
  6186				break;
  6187	
  6188			if (res == STRIPE_RETRY)
  6189				continue;
  6190	
  6191			if (res == STRIPE_SCHEDULE_AND_RETRY) {
  6192				WARN_ON_ONCE(!on_wq);
  6193				/*
  6194				 * Must release the reference to batch_last before
  6195				 * scheduling and waiting for work to be done,
  6196				 * otherwise the batch_last stripe head could prevent
  6197				 * raid5_activate_delayed() from making progress
  6198				 * and thus deadlocking.
  6199				 */
  6200				if (ctx->batch_last) {
  6201					raid5_release_stripe(ctx->batch_last);
  6202					ctx->batch_last = NULL;
  6203				}
  6204	
  6205				wait_woken(&wait, TASK_UNINTERRUPTIBLE,
  6206					   MAX_SCHEDULE_TIMEOUT);
  6207				continue;
  6208			}
  6209	
  6210			s = find_next_bit_wrap(ctx->sectors_to_do, stripe_cnt, s);
  6211			if (s == stripe_cnt)
  6212				break;
  6213	
  6214			logical_sector = ctx->first_sector +
  6215				(s << RAID5_STRIPE_SHIFT(conf));
  6216		}
  6217		if (unlikely(on_wq))
  6218			remove_wait_queue(&conf->wait_for_reshape, &wait);
  6219	
  6220		if (ctx->batch_last)
  6221			raid5_release_stripe(ctx->batch_last);
  6222	
  6223		if (rw == WRITE)
  6224			md_write_end(mddev);
  6225	
  6226		mempool_free(ctx, conf->ctx_pool);
  6227		if (res == STRIPE_WAIT_RESHAPE) {
  6228			DECLARE_COMPLETION_ONSTACK(done);
  6229			WRITE_ONCE(bi->bi_private, &done);
  6230	
  6231			bio_endio(bi);
  6232	
  6233			wait_for_completion(&done);
  6234			return false;
  6235		}
  6236	
  6237		bio_endio(bi);
  6238		return true;
  6239	}
  6240	

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply

* [PATCH] md/raid5: split reshape bios before bitmap accounting
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Use the shared mddev_bio_split_at_reshape_offset() helper so RAID5
submits only one-side bios to llbitmap during reshape.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid5.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6786f30dc59b..dde10b5b0ff5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6172,10 +6172,18 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 		make_discard_request(mddev, bi);
 		md_write_end(mddev);
 		return true;
 	}
 
+	bi = mddev_bio_split_at_reshape_offset(mddev, bi, NULL,
+					       &conf->bio_split);
+	if (!bi) {
+		if (rw == WRITE)
+			md_write_end(mddev);
+		return true;
+	}
+
 	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
 	bi->bi_next = NULL;
 
 	ctx = mempool_alloc(conf->ctx_pool, GFP_NOIO);
 	memset(ctx, 0, conf->ctx_size);
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/raid5: wire llbitmap reshape lifecycle
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Prepare llbitmap before RAID5 reshape starts, checkpoint the bitmap
before advancing reshape_position, and finish the llbitmap geometry
update when reshape completes.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid5.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 55af1d1eec63..6786f30dc59b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6421,10 +6421,17 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 		wait_event(conf->wait_for_reshape,
 			   atomic_read(&conf->reshape_stripes)==0
 			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
 		if (atomic_read(&conf->reshape_stripes) != 0)
 			return 0;
+		if (md_bitmap_enabled(mddev, false) &&
+		    mddev->bitmap_ops->reshape_mark &&
+		    conf->reshape_safe != conf->reshape_progress) {
+			mddev->bitmap_ops->reshape_mark(mddev, conf->reshape_safe,
+						       conf->reshape_progress);
+			mddev->bitmap_ops->unplug(mddev, true);
+		}
 		mddev->reshape_position = conf->reshape_progress;
 		mddev->curr_resync_completed = sector_nr;
 		if (!mddev->reshape_backwards)
 			/* Can update recovery_offset */
 			rdev_for_each(rdev, mddev)
@@ -6530,10 +6537,17 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 		wait_event(conf->wait_for_reshape,
 			   atomic_read(&conf->reshape_stripes) == 0
 			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
 		if (atomic_read(&conf->reshape_stripes) != 0)
 			goto ret;
+		if (md_bitmap_enabled(mddev, false) &&
+		    mddev->bitmap_ops->reshape_mark &&
+		    conf->reshape_safe != conf->reshape_progress) {
+			mddev->bitmap_ops->reshape_mark(mddev, conf->reshape_safe,
+						       conf->reshape_progress);
+			mddev->bitmap_ops->unplug(mddev, true);
+		}
 		mddev->reshape_position = conf->reshape_progress;
 		mddev->curr_resync_completed = sector_nr;
 		if (!mddev->reshape_backwards)
 			/* Can update recovery_offset */
 			rdev_for_each(rdev, mddev)
@@ -8562,10 +8576,16 @@ static int raid5_start_reshape(struct mddev *mddev)
 	    < mddev->array_sectors) {
 		pr_warn("md/raid:%s: array size must be reduced before number of disks\n",
 			mdname(mddev));
 		return -EINVAL;
 	}
+	if (md_bitmap_enabled(mddev, false) &&
+	    mddev->bitmap_id == ID_LLBITMAP) {
+		i = mddev->bitmap_ops->resize(mddev, mddev->dev_sectors, 0);
+		if (i)
+			return i;
+	}
 
 	atomic_set(&conf->reshape_stripes, 0);
 	spin_lock_irq(&conf->device_lock);
 	write_seqcount_begin(&conf->gen_lock);
 	conf->previous_raid_disks = conf->raid_disks;
@@ -8646,14 +8666,23 @@ static int raid5_start_reshape(struct mddev *mddev)
 /* This is called from the reshape thread and should make any
  * changes needed in 'conf'
  */
 static void end_reshape(struct r5conf *conf)
 {
+	struct mddev *mddev = conf->mddev;
 
 	if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
 		struct md_rdev *rdev;
 
+		if (md_bitmap_enabled(mddev, false) &&
+		    mddev->bitmap_ops->reshape_mark &&
+		    conf->reshape_safe != conf->reshape_progress) {
+			mddev->bitmap_ops->reshape_mark(mddev, conf->reshape_safe,
+						       conf->reshape_progress);
+			mddev->bitmap_ops->unplug(mddev, true);
+		}
+
 		spin_lock_irq(&conf->device_lock);
 		conf->previous_raid_disks = conf->raid_disks;
 		md_finish_reshape(conf->mddev);
 		smp_wmb();
 		conf->reshape_progress = MaxSector;
@@ -8676,12 +8705,20 @@ static void end_reshape(struct r5conf *conf)
  */
 static void raid5_finish_reshape(struct mddev *mddev)
 {
 	struct r5conf *conf = mddev->private;
 	struct md_rdev *rdev;
+	bool llbitmap = mddev->bitmap_id == ID_LLBITMAP &&
+		md_bitmap_enabled(mddev, false);
 
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+		if (llbitmap && mddev->bitmap_ops->reshape_finish)
+			mddev->bitmap_ops->reshape_finish(mddev);
+		if (llbitmap) {
+			mddev->resync_offset = 0;
+			mddev->resync_max_sectors = mddev->dev_sectors;
+		}
 
 		if (mddev->delta_disks <= 0) {
 			int d;
 			spin_lock_irq(&conf->device_lock);
 			mddev->degraded = raid5_calc_degraded(conf);
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/raid5: reject llbitmap reshape when md chunk shrinks
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

llbitmap reshape keeps one live bitmap and cannot safely make an
existing bitmap bit cover a smaller data range.

The llbitmap chunksize itself will not shrink when mddev->chunk_sectors
stays the same or grows. However, shrinking mddev->chunk_sectors shrinks
sectors_per_chunk used by raid5_bitmap_sector_map(). That can shrink the
effective data range covered by each bit across the old and new RAID5
geometry. Reject that reshape while llbitmap is active.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid5.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ff4da82f78fe..55af1d1eec63 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -8497,10 +8497,13 @@ static int check_reshape(struct mddev *mddev)
 	}
 
 	if (!check_stripe_cache(mddev))
 		return -ENOSPC;
 
+	if (mddev->bitmap_id == ID_LLBITMAP &&
+	    mddev->new_chunk_sectors < mddev->chunk_sectors)
+		return -EOPNOTSUPP;
 	if (mddev->new_chunk_sectors > mddev->chunk_sectors ||
 	    mddev->delta_disks > 0)
 		if (resize_chunks(conf,
 				  conf->previous_raid_disks
 				  + max(0, mddev->delta_disks),
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/raid5: add exact old and new llbitmap mapping helpers
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Teach RAID5 to export exact old and new llbitmap mappings and the
corresponding sync and array sizes for reshape-aware bitmap users.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid5.c | 70 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 52 insertions(+), 18 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 180ff0660b6a..ff4da82f78fe 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5933,56 +5933,67 @@ static enum reshape_loc get_reshape_loc(struct mddev *mddev,
 	if (ahead_of_reshape(mddev, logical_sector, reshape_safe))
 		return LOC_INSIDE_RESHAPE;
 	return LOC_BEHIND_RESHAPE;
 }
 
-static void raid5_bitmap_sector(struct mddev *mddev, sector_t *offset,
-				unsigned long *sectors)
+static void raid5_bitmap_sector_map(struct mddev *mddev, sector_t *offset,
+				    unsigned long *sectors,
+				    bool previous)
 {
 	struct r5conf *conf = mddev->private;
 	sector_t start = *offset;
 	sector_t end = start + *sectors;
-	sector_t prev_start = start;
-	sector_t prev_end = end;
 	int sectors_per_chunk;
-	enum reshape_loc loc;
 	int dd_idx;
 
-	sectors_per_chunk = conf->chunk_sectors *
-		(conf->raid_disks - conf->max_degraded);
+	if (previous)
+		sectors_per_chunk = conf->prev_chunk_sectors *
+			(conf->previous_raid_disks - conf->max_degraded);
+	else
+		sectors_per_chunk = conf->chunk_sectors *
+			(conf->raid_disks - conf->max_degraded);
 	start = round_down(start, sectors_per_chunk);
 	end = round_up(end, sectors_per_chunk);
 
-	start = raid5_compute_sector(conf, start, 0, &dd_idx, NULL);
-	end = raid5_compute_sector(conf, end, 0, &dd_idx, NULL);
+	start = raid5_compute_sector(conf, start, previous, &dd_idx, NULL);
+	end = raid5_compute_sector(conf, end, previous, &dd_idx, NULL);
+	*offset = start;
+	*sectors = end - start;
+}
+
+static void raid5_bitmap_sector(struct mddev *mddev, sector_t *offset,
+				unsigned long *sectors)
+{
+	struct r5conf *conf = mddev->private;
+	sector_t start = *offset;
+	sector_t end = start + *sectors;
+	sector_t prev_start = start;
+	unsigned long prev_sectors = end - start;
+	enum reshape_loc loc;
+
+	raid5_bitmap_sector_map(mddev, &start, sectors, false);
+	end = start + *sectors;
 
 	/*
 	 * For LOC_INSIDE_RESHAPE, this IO will wait for reshape to make
 	 * progress, hence it's the same as LOC_BEHIND_RESHAPE.
 	 */
 	loc = get_reshape_loc(mddev, conf, prev_start);
 	if (likely(loc != LOC_AHEAD_OF_RESHAPE)) {
 		*offset = start;
-		*sectors = end - start;
 		return;
 	}
 
-	sectors_per_chunk = conf->prev_chunk_sectors *
-		(conf->previous_raid_disks - conf->max_degraded);
-	prev_start = round_down(prev_start, sectors_per_chunk);
-	prev_end = round_down(prev_end, sectors_per_chunk);
-
-	prev_start = raid5_compute_sector(conf, prev_start, 1, &dd_idx, NULL);
-	prev_end = raid5_compute_sector(conf, prev_end, 1, &dd_idx, NULL);
+	raid5_bitmap_sector_map(mddev, &prev_start, &prev_sectors, true);
 
 	/*
 	 * for LOC_AHEAD_OF_RESHAPE, reshape can make progress before this IO
 	 * is handled in make_stripe_request(), we can't know this here hence
 	 * we set bits for both.
 	 */
 	*offset = min(start, prev_start);
-	*sectors = max(end, prev_end) - *offset;
+	*sectors = max(end, prev_start + prev_sectors) - *offset;
 }
 
 static enum stripe_result make_stripe_request(struct mddev *mddev,
 		struct r5conf *conf, struct stripe_request_ctx *ctx,
 		sector_t logical_sector, struct bio *bi)
@@ -9040,10 +9051,24 @@ static void raid5_prepare_suspend(struct mddev *mddev)
 	struct r5conf *conf = mddev->private;
 
 	wake_up(&conf->wait_for_reshape);
 }
 
+static sector_t raid5_bitmap_sync_size(struct mddev *mddev, bool previous)
+{
+	return mddev->dev_sectors;
+}
+
+static sector_t raid5_bitmap_array_sectors(struct mddev *mddev, bool previous)
+{
+	struct r5conf *conf = mddev->private;
+
+	if (previous)
+		return raid5_size(mddev, 0, 0);
+	return raid5_size(mddev, mddev->dev_sectors, conf->raid_disks);
+}
+
 static struct md_personality raid6_personality =
 {
 	.head = {
 		.type	= MD_PERSONALITY,
 		.id	= ID_RAID6,
@@ -9069,10 +9094,13 @@ static struct md_personality raid6_personality =
 	.quiesce	= raid5_quiesce,
 	.takeover	= raid6_takeover,
 	.change_consistency_policy = raid5_change_consistency_policy,
 	.prepare_suspend = raid5_prepare_suspend,
 	.bitmap_sector	= raid5_bitmap_sector,
+	.bitmap_sector_map = raid5_bitmap_sector_map,
+	.bitmap_sync_size = raid5_bitmap_sync_size,
+	.bitmap_array_sectors = raid5_bitmap_array_sectors,
 };
 static struct md_personality raid5_personality =
 {
 	.head = {
 		.type	= MD_PERSONALITY,
@@ -9099,10 +9127,13 @@ static struct md_personality raid5_personality =
 	.quiesce	= raid5_quiesce,
 	.takeover	= raid5_takeover,
 	.change_consistency_policy = raid5_change_consistency_policy,
 	.prepare_suspend = raid5_prepare_suspend,
 	.bitmap_sector	= raid5_bitmap_sector,
+	.bitmap_sector_map = raid5_bitmap_sector_map,
+	.bitmap_sync_size = raid5_bitmap_sync_size,
+	.bitmap_array_sectors = raid5_bitmap_array_sectors,
 };
 
 static struct md_personality raid4_personality =
 {
 	.head = {
@@ -9130,10 +9161,13 @@ static struct md_personality raid4_personality =
 	.quiesce	= raid5_quiesce,
 	.takeover	= raid4_takeover,
 	.change_consistency_policy = raid5_change_consistency_policy,
 	.prepare_suspend = raid5_prepare_suspend,
 	.bitmap_sector	= raid5_bitmap_sector,
+	.bitmap_sector_map = raid5_bitmap_sector_map,
+	.bitmap_sync_size = raid5_bitmap_sync_size,
+	.bitmap_array_sectors = raid5_bitmap_array_sectors,
 };
 
 static int __init raid5_init(void)
 {
 	int ret;
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/raid10: split reshape bios before bitmap accounting
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Use the shared mddev_bio_split_at_reshape_offset() helper so RAID10
submits only one-side bios to llbitmap during reshape.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid10.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 40000b867136..cd35d183877c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1898,10 +1898,16 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
 			 || conf->prev.near_copies <
 			 conf->prev.raid_disks)))
 		sectors = chunk_sects -
 			(bio->bi_iter.bi_sector &
 			 (chunk_sects - 1));
+
+	bio = mddev_bio_split_at_reshape_offset(mddev, bio, &sectors,
+						&conf->bio_split);
+	if (!bio)
+		return true;
+
 	__make_request(mddev, bio, sectors);
 
 	/* In case raid10d snuck in to freeze_array */
 	wake_up_barrier(conf);
 	return true;
@@ -4273,11 +4279,10 @@ static int raid10_check_reshape(struct mddev *mddev)
 		return -EINVAL;
 	if (mddev->bitmap_id == ID_LLBITMAP &&
 	    mddev->new_chunk_sectors &&
 	    mddev->new_chunk_sectors < mddev->chunk_sectors)
 		return -EOPNOTSUPP;
-
 	if (setup_geo(&geo, mddev, geo_start) != conf->copies)
 		/* mustn't change number of copies */
 		return -EINVAL;
 	if (geo.far_copies > 1 && !geo.far_offset)
 		/* Cannot switch to 'far' mode */
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/raid10: wire llbitmap reshape lifecycle
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Prepare llbitmap before RAID10 starts growing, checkpoint the bitmap
before advancing reshape_position, finish the llbitmap geometry update
when reshape completes, and export the old and new tracked sizes.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid10.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c69ef76c89e1..40000b867136 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4379,10 +4379,16 @@ static int raid10_start_reshape(struct mddev *mddev)
 	int spares = 0;
 	int ret;
 
 	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
 		return -EBUSY;
+	if (md_bitmap_enabled(mddev, false) &&
+	    mddev->bitmap_ops->reshape_can_start) {
+		ret = mddev->bitmap_ops->reshape_can_start(mddev);
+		if (ret)
+			return ret;
+	}
 
 	if (setup_geo(&new, mddev, geo_start) != conf->copies)
 		return -EINVAL;
 
 	before_length = ((1 << conf->prev.chunk_shift) *
@@ -4692,10 +4698,17 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 
 	if (need_flush ||
 	    time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
 		/* Need to update reshape_position in metadata */
 		wait_barrier(conf, false);
+		if (md_bitmap_enabled(mddev, false) &&
+		    mddev->bitmap_ops->reshape_mark &&
+		    conf->reshape_safe != conf->reshape_progress) {
+			mddev->bitmap_ops->reshape_mark(mddev, conf->reshape_safe,
+						       conf->reshape_progress);
+			mddev->bitmap_ops->unplug(mddev, true);
+		}
 		mddev->reshape_position = conf->reshape_progress;
 		if (mddev->reshape_backwards)
 			mddev->curr_resync_completed = raid10_size(mddev, 0, 0)
 				- conf->reshape_progress;
 		else
@@ -4890,13 +4903,23 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 	end_reshape_request(r10_bio);
 }
 
 static void end_reshape(struct r10conf *conf)
 {
+	struct mddev *mddev = conf->mddev;
+
 	if (test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery))
 		return;
 
+	if (md_bitmap_enabled(mddev, false) &&
+	    mddev->bitmap_ops->reshape_mark &&
+	    conf->reshape_safe != conf->reshape_progress) {
+		mddev->bitmap_ops->reshape_mark(mddev, conf->reshape_safe,
+					       conf->reshape_progress);
+		mddev->bitmap_ops->unplug(mddev, true);
+	}
+
 	spin_lock_irq(&conf->device_lock);
 	conf->prev = conf->geo;
 	md_finish_reshape(conf->mddev);
 	smp_wmb();
 	conf->reshape_progress = MaxSector;
@@ -5024,14 +5047,19 @@ static void end_reshape_request(struct r10bio *r10_bio)
 }
 
 static void raid10_finish_reshape(struct mddev *mddev)
 {
 	struct r10conf *conf = mddev->private;
+	bool llbitmap = mddev->bitmap_id == ID_LLBITMAP &&
+		md_bitmap_enabled(mddev, false);
 
 	if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
 		return;
 
+	if (llbitmap && mddev->bitmap_ops->reshape_finish)
+		mddev->bitmap_ops->reshape_finish(mddev);
+
 	if (mddev->delta_disks > 0) {
 		if (mddev->resync_offset > mddev->resync_max_sectors) {
 			mddev->resync_offset = mddev->resync_max_sectors;
 			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 		}
@@ -5054,10 +5082,19 @@ static void raid10_finish_reshape(struct mddev *mddev)
 	mddev->reshape_position = MaxSector;
 	mddev->delta_disks = 0;
 	mddev->reshape_backwards = 0;
 }
 
+static sector_t raid10_bitmap_sync_size(struct mddev *mddev, bool previous)
+{
+	struct r10conf *conf = mddev->private;
+
+	if (previous)
+		return raid10_size(mddev, 0, 0);
+	return raid10_size(mddev, 0, conf->geo.raid_disks);
+}
+
 static struct md_personality raid10_personality =
 {
 	.head = {
 		.type	= MD_PERSONALITY,
 		.id	= ID_RAID10,
@@ -5080,10 +5117,12 @@ static struct md_personality raid10_personality =
 	.takeover	= raid10_takeover,
 	.check_reshape	= raid10_check_reshape,
 	.start_reshape	= raid10_start_reshape,
 	.finish_reshape	= raid10_finish_reshape,
 	.update_reshape_pos = raid10_update_reshape_pos,
+	.bitmap_sync_size = raid10_bitmap_sync_size,
+	.bitmap_array_sectors = raid10_bitmap_sync_size,
 };
 
 static int __init raid10_init(void)
 {
 	return register_md_submodule(&raid10_personality.head);
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/raid10: reject llbitmap reshape when md chunk shrinks
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

llbitmap reshape keeps one live bitmap and cannot safely make an
existing bitmap bit cover a smaller data range.

The llbitmap chunksize itself will not shrink when mddev->chunk_sectors
stays the same or grows. However, shrinking mddev->chunk_sectors can
shrink the effective data range covered by each bit for the RAID10
reshape geometry. Reject that reshape while llbitmap is active.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid10.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5bd7698e0a1b..c69ef76c89e1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4269,10 +4269,14 @@ static int raid10_check_reshape(struct mddev *mddev)
 	struct r10conf *conf = mddev->private;
 	struct geom geo;
 
 	if (conf->geo.far_copies != 1 && !conf->geo.far_offset)
 		return -EINVAL;
+	if (mddev->bitmap_id == ID_LLBITMAP &&
+	    mddev->new_chunk_sectors &&
+	    mddev->new_chunk_sectors < mddev->chunk_sectors)
+		return -EOPNOTSUPP;
 
 	if (setup_geo(&geo, mddev, geo_start) != conf->copies)
 		/* mustn't change number of copies */
 		return -EINVAL;
 	if (geo.far_copies > 1 && !geo.far_offset)
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: clamp state-machine walks to tracked bits
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

llbitmap_state_machine() can be called with an end bit beyond
llbitmap->chunks. In particular, llbitmap_cond_end_sync() passes
sector >> chunkshift, and sector can reach the tracked boundary
exactly.

Clamp the state-machine range to llbitmap->chunks so it cannot walk
past the tracked bitmap.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 6783f1b3ddf0..65d2fd1979e5 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -980,11 +980,14 @@ static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap,
 
 	if (action == BitmapActionInit) {
 		llbitmap_init_state(llbitmap);
 		return BitNone;
 	}
-
+	if (start >= llbitmap->chunks)
+		return BitNone;
+	if (end >= llbitmap->chunks)
+		end = llbitmap->chunks - 1;
 	while (start <= end) {
 		enum llbitmap_state c = llbitmap_read(llbitmap, start);
 
 		if (c < 0 || c >= BitStateCount) {
 			pr_err("%s: invalid bit %lu state %d action %d, forcing resync\n",
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: remap checkpointed bits as reshape progresses
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Merge checkpointed old llbitmap state forward as reshape_position
advances and record the checkpoint remap through reshape_mark().

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 172 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 67e63d81a824..6783f1b3ddf0 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -499,10 +499,18 @@ static void llbitmap_map_layout(struct llbitmap *llbitmap, sector_t *offset,
 		llbitmap->mddev->pers->bitmap_sector_map(llbitmap->mddev, offset,
 							 sectors, previous);
 	else if (!previous && llbitmap->mddev->pers->bitmap_sector)
 		llbitmap->mddev->pers->bitmap_sector(llbitmap->mddev, offset,
 							 sectors);
+
+	limit = llbitmap_personality_sync_size(llbitmap, previous);
+	start = *offset;
+	end = start + *sectors;
+	if (start >= limit)
+		*sectors = 0;
+	else if (end > limit)
+		*sectors = limit - start;
 }
 
 static void llbitmap_encode_range(struct llbitmap *llbitmap, sector_t *offset,
 				  unsigned long *sectors, bool previous)
 {
@@ -903,10 +911,37 @@ static int llbitmap_prepare_resize(struct llbitmap *llbitmap,
 		llbitmap_mark_range(llbitmap, old_chunks, new_chunks - 1,
 				    BitUnwritten);
 	return 0;
 }
 
+static enum llbitmap_state
+llbitmap_rmerge_state(struct llbitmap *llbitmap,
+		      enum llbitmap_state dst,
+		      enum llbitmap_state src)
+{
+	bool level_456 = raid_is_456(llbitmap->mddev);
+
+	if (dst == BitNeedSync || dst == BitSyncing ||
+	    src == BitNeedSync || src == BitSyncing)
+		return BitNeedSync;
+
+	if (dst == BitDirty || src == BitDirty)
+		return BitDirty;
+
+	/*
+	 * Reshape generates valid target parity/data for both already-written
+	 * and not-yet-written regions in the checkpointed range, so a mix of
+	 * clean and unwritten still results in a clean destination bit.
+	 */
+	if (level_456 && ((dst == BitClean && src == BitUnwritten) ||
+			  (src == BitClean && dst == BitUnwritten)))
+		return BitClean;
+	if (dst == BitClean || src == BitClean)
+		return BitClean;
+	return BitUnwritten;
+}
+
 static void llbitmap_init_state(struct llbitmap *llbitmap)
 {
 	struct mddev *mddev = llbitmap->mddev;
 	enum llbitmap_state state = BitUnwritten;
 	unsigned long i;
@@ -1796,10 +1831,124 @@ static int llbitmap_reshape_can_start(struct mddev *mddev)
 	mutex_unlock(&mddev->bitmap_info.mutex);
 
 	return ret;
 }
 
+struct llbitmap_reshape_range {
+	sector_t offset;
+	unsigned long sectors;
+	sector_t start;
+	sector_t end;
+};
+
+static enum llbitmap_state
+llbitmap_reshape_init_dst(struct llbitmap *llbitmap, unsigned long dst,
+			  const struct llbitmap_reshape_range *new)
+{
+	u64 bit_start = (u64)dst * llbitmap->reshape_chunksize;
+	u64 bit_end = bit_start + llbitmap->reshape_chunksize;
+
+	if (!llbitmap->mddev->reshape_backwards)
+		return bit_start < new->offset ? llbitmap_read(llbitmap, dst) :
+		       BitUnwritten;
+	return bit_end > new->end ? llbitmap_read(llbitmap, dst) : BitUnwritten;
+}
+
+static void llbitmap_reshape_dst_range(struct llbitmap *llbitmap,
+				       unsigned long dst,
+				       const struct llbitmap_reshape_range *new,
+				       struct llbitmap_reshape_range *dst_range)
+{
+	sector_t dst_bit_start = (sector_t)dst * llbitmap->reshape_chunksize;
+
+	dst_range->start = max(dst_bit_start, new->offset);
+	dst_range->end = min(dst_bit_start + llbitmap->reshape_chunksize,
+			     new->end);
+	dst_range->offset = dst_range->start;
+	dst_range->sectors = dst_range->end - dst_range->start;
+}
+
+static void llbitmap_reshape_map_range(struct llbitmap *llbitmap,
+				       sector_t lo, sector_t hi,
+				       bool previous,
+				       struct llbitmap_reshape_range *range)
+{
+	range->offset = lo;
+	range->sectors = hi - lo;
+	llbitmap_map_layout(llbitmap, &range->offset, &range->sectors, previous);
+	range->start = range->offset;
+	range->end = range->offset + range->sectors;
+}
+
+static bool llbitmap_reshape_src_range(const struct llbitmap_reshape_range *old,
+				       const struct llbitmap_reshape_range *new,
+				       const struct llbitmap_reshape_range *dst,
+				       struct llbitmap_reshape_range *src)
+{
+	if (!old->sectors)
+		return false;
+
+	src->start = old->offset +
+		mul_u64_u64_div_u64(dst->start - new->offset,
+				    old->sectors, new->sectors);
+	src->end = old->offset +
+		mul_u64_u64_div_u64_roundup(dst->end - new->offset,
+					    old->sectors, new->sectors);
+	if (src->end > old->end)
+		src->end = old->end;
+	src->offset = src->start;
+	src->sectors = src->end - src->start;
+
+	return src->sectors;
+}
+
+static enum llbitmap_state llbitmap_rmerge_src(struct llbitmap *llbitmap,
+					       enum llbitmap_state state,
+					       const struct llbitmap_reshape_range *src)
+{
+	unsigned long bit = div64_u64(src->start, llbitmap->chunksize);
+	unsigned long end = div64_u64(src->end - 1, llbitmap->chunksize);
+
+	while (bit <= end) {
+		enum llbitmap_state src_state = llbitmap_read(llbitmap, bit);
+
+		state = llbitmap_rmerge_state(llbitmap, state, src_state);
+		bit++;
+	}
+
+	return state;
+}
+
+static void llbitmap_reshape_merge(struct llbitmap *llbitmap,
+				   const struct llbitmap_reshape_range *old,
+				   const struct llbitmap_reshape_range *new)
+{
+	unsigned long dst_start;
+	unsigned long dst_end;
+	unsigned long dst;
+
+	if (!new->sectors)
+		return;
+
+	dst_start = div64_u64(new->offset, llbitmap->reshape_chunksize);
+	dst_end = div64_u64(new->end - 1, llbitmap->reshape_chunksize);
+
+	for (dst = dst_start; dst <= dst_end; dst++) {
+		struct llbitmap_reshape_range dst_range;
+		struct llbitmap_reshape_range src;
+		enum llbitmap_state state;
+
+		llbitmap_reshape_dst_range(llbitmap, dst, new, &dst_range);
+		state = llbitmap_reshape_init_dst(llbitmap, dst, new);
+		if (llbitmap_reshape_src_range(old, new, &dst_range, &src))
+			state = llbitmap_rmerge_src(llbitmap, state, &src);
+		else
+			state = llbitmap_rmerge_state(llbitmap, state, BitUnwritten);
+		llbitmap_write(llbitmap, state, dst);
+	}
+}
+
 static void llbitmap_reshape_finish(struct mddev *mddev)
 {
 	struct llbitmap *llbitmap = mddev->bitmap;
 
 	if (mddev->pers->quiesce)
@@ -1820,10 +1969,32 @@ static void llbitmap_reshape_finish(struct mddev *mddev)
 
 	if (mddev->pers->quiesce)
 		mddev->pers->quiesce(mddev, 0);
 }
 
+static void llbitmap_reshape_mark(struct mddev *mddev, sector_t old_pos,
+				  sector_t new_pos)
+{
+	struct llbitmap *llbitmap = mddev->bitmap;
+	sector_t lo;
+	sector_t hi;
+	struct llbitmap_reshape_range old;
+	struct llbitmap_reshape_range new;
+
+	if (!llbitmap || old_pos == new_pos)
+		return;
+
+	lo = min(old_pos, new_pos);
+	hi = max(old_pos, new_pos);
+	if (!hi)
+		return;
+
+	llbitmap_reshape_map_range(llbitmap, lo, hi, true, &old);
+	llbitmap_reshape_map_range(llbitmap, lo, hi, false, &new);
+	llbitmap_reshape_merge(llbitmap, &old, &new);
+}
+
 static void llbitmap_write_sb(struct llbitmap *llbitmap)
 {
 	int nr_blocks = DIV_ROUND_UP(BITMAP_DATA_OFFSET, llbitmap->io_size);
 
 	bitmap_fill(llbitmap->pctl[0]->dirty, nr_blocks);
@@ -2116,10 +2287,11 @@ static struct bitmap_operations llbitmap_ops = {
 	.get_stats		= llbitmap_get_stats,
 	.dirty_bits		= llbitmap_dirty_bits,
 	.prepare_range		= llbitmap_prepare_range,
 	.reshape_finish		= llbitmap_reshape_finish,
 	.reshape_can_start	= llbitmap_reshape_can_start,
+	.reshape_mark		= llbitmap_reshape_mark,
 	.write_all		= llbitmap_write_all,
 
 	.groups			= md_llbitmap_groups,
 };
 
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: don't skip reshape ranges from bitmap state
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Reshape progress is tracked by array metadata rather than llbitmap.
Do not let llbitmap skip_sync_blocks() suppress reshape ranges based on
stale bitmap state before the corresponding checkpoint is persisted.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 76621a9fd645..67e63d81a824 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -1644,10 +1644,18 @@ static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset)
 
 	if (p >= llbitmap->chunks)
 		return 0;
 	c = llbitmap_read(llbitmap, p);
 
+	/*
+	 * Reshape progress is tracked by array metadata rather than llbitmap.
+	 * Skipping reshape ranges from stale bitmap state can lose data after a
+	 * restart before the corresponding bits are checkpointed to disk.
+	 */
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+		return 0;
+
 	/* always skip unwritten blocks */
 	if (c == BitUnwritten)
 		return blocks;
 
 	/* Skip CleanUnwritten - no user data, will be reset after recovery */
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: add reshape range mapping helpers
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Teach llbitmap to choose old versus new geometry during reshape and to
encode exact bitmap ranges for the active geometry.

This is the mapping groundwork for checkpoint remapping.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 96 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 92 insertions(+), 4 deletions(-)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index f45daf3be4d5..76621a9fd645 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -7,10 +7,11 @@
 #include <linux/init.h>
 #include <linux/timer.h>
 #include <linux/sched.h>
 #include <linux/list.h>
 #include <linux/file.h>
+#include <linux/math64.h>
 #include <linux/seq_file.h>
 #include <trace/events/block.h>
 
 #include "md.h"
 #include "md-bitmap.h"
@@ -447,10 +448,20 @@ static sector_t llbitmap_personality_sync_size(struct llbitmap *llbitmap,
 	    !mddev->pers->bitmap_sync_size)
 		return llbitmap->sync_size;
 	return mddev->pers->bitmap_sync_size(mddev, previous);
 }
 
+static sector_t llbitmap_logical_size(struct llbitmap *llbitmap, bool previous)
+{
+	struct mddev *mddev = llbitmap->mddev;
+
+	if (!llbitmap_reshaping(llbitmap) || !mddev->private || !mddev->pers ||
+	    !mddev->pers->bitmap_array_sectors)
+		return llbitmap_personality_sync_size(llbitmap, previous);
+	return mddev->pers->bitmap_array_sectors(mddev, previous);
+}
+
 static void llbitmap_refresh_reshape(struct llbitmap *llbitmap)
 {
 	unsigned long old_chunks = DIV_ROUND_UP_SECTOR_T(llbitmap->sync_size,
 						 llbitmap->chunksize);
 	sector_t blocks = llbitmap_personality_sync_size(llbitmap, false);
@@ -463,10 +474,56 @@ static void llbitmap_refresh_reshape(struct llbitmap *llbitmap)
 	llbitmap_resize_chunks(llbitmap->mddev, blocks, &llbitmap->reshape_chunksize,
 			       &llbitmap->reshape_chunks);
 	llbitmap->chunks = max(old_chunks, llbitmap->reshape_chunks);
 }
 
+static void llbitmap_map_layout(struct llbitmap *llbitmap, sector_t *offset,
+				unsigned long *sectors, bool previous)
+{
+	sector_t limit = llbitmap_logical_size(llbitmap, previous);
+	sector_t start = *offset;
+	sector_t end = start + *sectors;
+
+	if (start >= limit) {
+		*sectors = 0;
+		return;
+	}
+	if (end > limit)
+		end = limit;
+
+	*offset = start;
+	*sectors = end - start;
+	if (!*sectors)
+		return;
+
+	if (llbitmap->mddev->pers->bitmap_sector_map)
+		llbitmap->mddev->pers->bitmap_sector_map(llbitmap->mddev, offset,
+							 sectors, previous);
+	else if (!previous && llbitmap->mddev->pers->bitmap_sector)
+		llbitmap->mddev->pers->bitmap_sector(llbitmap->mddev, offset,
+							 sectors);
+}
+
+static void llbitmap_encode_range(struct llbitmap *llbitmap, sector_t *offset,
+				  unsigned long *sectors, bool previous)
+{
+	unsigned long chunksize = previous ? llbitmap->chunksize :
+				      llbitmap->reshape_chunksize;
+	u64 start;
+	u64 end;
+
+	if (!*sectors) {
+		*offset = 0;
+		return;
+	}
+
+	start = div64_u64(*offset, chunksize);
+	end = div64_u64(*offset + *sectors - 1, chunksize);
+	*offset = (sector_t)start << llbitmap->chunkshift;
+	*sectors = (end - start + 1) << llbitmap->chunkshift;
+}
+
 static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos)
 {
 	unsigned int idx;
 	unsigned int offset;
 
@@ -1373,15 +1430,36 @@ static void llbitmap_destroy(struct mddev *mddev)
 	llbitmap_free_pages(llbitmap);
 	kfree(llbitmap);
 	mutex_unlock(&mddev->bitmap_info.mutex);
 }
 
+static bool llbitmap_map_previous(struct llbitmap *llbitmap, sector_t offset,
+				  unsigned long sectors)
+{
+	struct mddev *mddev = llbitmap->mddev;
+	sector_t boundary = mddev->reshape_position;
+
+	if (!llbitmap_reshaping(llbitmap))
+		return false;
+
+	WARN_ON_ONCE(sectors && offset < boundary && offset + sectors > boundary);
+
+	return mddev->reshape_backwards ? offset < boundary : offset >= boundary;
+}
+
 static void llbitmap_prepare_range(struct mddev *mddev, sector_t *offset,
 				   unsigned long *sectors)
 {
-	if (mddev->pers->bitmap_sector)
-		mddev->pers->bitmap_sector(mddev, offset, sectors);
+	struct llbitmap *llbitmap = mddev->bitmap;
+	bool previous;
+
+	if (!llbitmap)
+		return;
+
+	previous = llbitmap_map_previous(llbitmap, *offset, *sectors);
+	llbitmap_map_layout(llbitmap, offset, sectors, previous);
+	llbitmap_encode_range(llbitmap, offset, sectors, previous);
 }
 
 static void llbitmap_start_write(struct mddev *mddev, sector_t offset,
 				 unsigned long sectors)
 {
@@ -1546,21 +1624,29 @@ static void llbitmap_flush(struct mddev *mddev)
 /* This is used for raid5 lazy initial recovery */
 static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset)
 {
 	struct llbitmap *llbitmap = mddev->bitmap;
 	unsigned long p = offset >> llbitmap->chunkshift;
-	enum llbitmap_state c = llbitmap_read(llbitmap, p);
+	enum llbitmap_state c;
+
+	if (p >= llbitmap->chunks)
+		return false;
+	c = llbitmap_read(llbitmap, p);
 
 	return c == BitClean || c == BitDirty || c == BitCleanUnwritten;
 }
 
 static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset)
 {
 	struct llbitmap *llbitmap = mddev->bitmap;
 	unsigned long p = offset >> llbitmap->chunkshift;
 	int blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
-	enum llbitmap_state c = llbitmap_read(llbitmap, p);
+	enum llbitmap_state c;
+
+	if (p >= llbitmap->chunks)
+		return 0;
+	c = llbitmap_read(llbitmap, p);
 
 	/* always skip unwritten blocks */
 	if (c == BitUnwritten)
 		return blocks;
 
@@ -1601,10 +1687,12 @@ static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset,
 	/*
 	 * Handle one bit at a time, this is much simpler. And it doesn't matter
 	 * if md_do_sync() loop more times.
 	 */
 	*blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
+	if (p >= llbitmap->chunks)
+		return false;
 	state = llbitmap_state_machine(llbitmap, p, p, BitmapActionStartsync);
 	return state == BitSyncing || state == BitSyncingUnwritten;
 }
 
 /* Something is wrong, sync_thread stop at @offset */
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: refuse reshape while llbitmap still needs sync
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Reject reshape when llbitmap still contains NeedSync or Syncing bits.

This keeps reshape from starting until the current llbitmap state has
been reconciled.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 52ab45f2a018..f45daf3be4d5 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -1677,10 +1677,33 @@ static void llbitmap_dirty_bits(struct mddev *mddev, unsigned long s,
 				unsigned long e)
 {
 	llbitmap_state_machine(mddev->bitmap, s, e, BitmapActionStartwrite);
 }
 
+static int llbitmap_reshape_can_start(struct mddev *mddev)
+{
+	struct llbitmap *llbitmap = mddev->bitmap;
+	unsigned long chunk;
+	int ret = 0;
+
+	if (!llbitmap)
+		return 0;
+
+	mutex_lock(&mddev->bitmap_info.mutex);
+	for (chunk = 0; chunk < llbitmap->chunks; chunk++) {
+		enum llbitmap_state state = llbitmap_read(llbitmap, chunk);
+
+		if (state == BitNeedSync || state == BitSyncing) {
+			ret = -EBUSY;
+			break;
+		}
+	}
+	mutex_unlock(&mddev->bitmap_info.mutex);
+
+	return ret;
+}
+
 static void llbitmap_reshape_finish(struct mddev *mddev)
 {
 	struct llbitmap *llbitmap = mddev->bitmap;
 
 	if (mddev->pers->quiesce)
@@ -1996,10 +2019,11 @@ static struct bitmap_operations llbitmap_ops = {
 	.update_sb		= llbitmap_update_sb,
 	.get_stats		= llbitmap_get_stats,
 	.dirty_bits		= llbitmap_dirty_bits,
 	.prepare_range		= llbitmap_prepare_range,
 	.reshape_finish		= llbitmap_reshape_finish,
+	.reshape_can_start	= llbitmap_reshape_can_start,
 	.write_all		= llbitmap_write_all,
 
 	.groups			= md_llbitmap_groups,
 };
 
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: finish reshape geometry
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Commit the staged llbitmap geometry when reshape finishes.

The reshape staging itself is handled through llbitmap_resize(), so only
the finish step remains in this patch.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index f79538a4eb79..52ab45f2a018 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -1677,10 +1677,34 @@ static void llbitmap_dirty_bits(struct mddev *mddev, unsigned long s,
 				unsigned long e)
 {
 	llbitmap_state_machine(mddev->bitmap, s, e, BitmapActionStartwrite);
 }
 
+static void llbitmap_reshape_finish(struct mddev *mddev)
+{
+	struct llbitmap *llbitmap = mddev->bitmap;
+
+	if (mddev->pers->quiesce)
+		mddev->pers->quiesce(mddev, 1);
+
+	mutex_lock(&mddev->bitmap_info.mutex);
+	llbitmap_flush(mddev);
+
+	llbitmap->chunksize = llbitmap->reshape_chunksize;
+	llbitmap->chunkshift = ffz(~llbitmap->chunksize);
+	llbitmap->chunks = llbitmap->reshape_chunks;
+	llbitmap->sync_size = llbitmap->reshape_sync_size;
+	llbitmap_refresh_reshape(llbitmap);
+	mddev->bitmap_info.chunksize = llbitmap->chunksize;
+	llbitmap_update_sb(llbitmap);
+	__llbitmap_flush(mddev);
+	mutex_unlock(&mddev->bitmap_info.mutex);
+
+	if (mddev->pers->quiesce)
+		mddev->pers->quiesce(mddev, 0);
+}
+
 static void llbitmap_write_sb(struct llbitmap *llbitmap)
 {
 	int nr_blocks = DIV_ROUND_UP(BITMAP_DATA_OFFSET, llbitmap->io_size);
 
 	bitmap_fill(llbitmap->pctl[0]->dirty, nr_blocks);
@@ -1971,10 +1995,11 @@ static struct bitmap_operations llbitmap_ops = {
 
 	.update_sb		= llbitmap_update_sb,
 	.get_stats		= llbitmap_get_stats,
 	.dirty_bits		= llbitmap_dirty_bits,
 	.prepare_range		= llbitmap_prepare_range,
+	.reshape_finish		= llbitmap_reshape_finish,
 	.write_all		= llbitmap_write_all,
 
 	.groups			= md_llbitmap_groups,
 };
 
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: track target reshape geometry fields
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Track llbitmap bookkeeping for the target reshape geometry while keeping
a single live bitmap instance.

Add the reshape geometry fields, refresh helper, and update the load and
resize paths to keep the target geometry in sync.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 91d3dec43d48..f79538a4eb79 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -287,10 +287,13 @@ struct llbitmap {
 	unsigned long chunksize;
 	/* total number of chunks */
 	unsigned long chunks;
 	/* total number of sectors tracked by current bitmap geometry */
 	sector_t sync_size;
+	unsigned long reshape_chunksize;
+	unsigned long reshape_chunks;
+	sector_t reshape_sync_size;
 	unsigned long last_end_sync;
 	/*
 	 * time in seconds that dirty bits will be cleared if the page is not
 	 * accessed.
 	 */
@@ -428,10 +431,42 @@ static void llbitmap_resize_chunks(struct mddev *mddev, sector_t blocks,
 		*chunksize = *chunksize << 1;
 		*chunks = DIV_ROUND_UP_SECTOR_T(blocks, *chunksize);
 	}
 }
 
+static bool llbitmap_reshaping(struct llbitmap *llbitmap)
+{
+	return llbitmap->mddev->reshape_position != MaxSector;
+}
+
+static sector_t llbitmap_personality_sync_size(struct llbitmap *llbitmap,
+					       bool previous)
+{
+	struct mddev *mddev = llbitmap->mddev;
+
+	if (!llbitmap_reshaping(llbitmap) || !mddev->private || !mddev->pers ||
+	    !mddev->pers->bitmap_sync_size)
+		return llbitmap->sync_size;
+	return mddev->pers->bitmap_sync_size(mddev, previous);
+}
+
+static void llbitmap_refresh_reshape(struct llbitmap *llbitmap)
+{
+	unsigned long old_chunks = DIV_ROUND_UP_SECTOR_T(llbitmap->sync_size,
+						 llbitmap->chunksize);
+	sector_t blocks = llbitmap_personality_sync_size(llbitmap, false);
+	unsigned long chunksize = llbitmap->chunksize;
+	unsigned long chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
+
+	llbitmap->reshape_sync_size = blocks;
+	llbitmap->reshape_chunksize = chunksize;
+	llbitmap->reshape_chunks = chunks;
+	llbitmap_resize_chunks(llbitmap->mddev, blocks, &llbitmap->reshape_chunksize,
+			       &llbitmap->reshape_chunks);
+	llbitmap->chunks = max(old_chunks, llbitmap->reshape_chunks);
+}
+
 static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos)
 {
 	unsigned int idx;
 	unsigned int offset;
 
@@ -1027,10 +1062,11 @@ static int llbitmap_init(struct llbitmap *llbitmap)
 	llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
 	llbitmap->chunkshift = ffz(~chunksize);
 	llbitmap->chunksize = chunksize;
 	llbitmap->chunks = chunks;
 	llbitmap->sync_size = blocks;
+	llbitmap_refresh_reshape(llbitmap);
 	mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP;
 
 	ret = llbitmap_alloc_pages(llbitmap);
 	if (ret)
 		return ret;
@@ -1138,10 +1174,11 @@ static int llbitmap_read_sb(struct llbitmap *llbitmap)
 	llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
 	llbitmap->chunksize = chunksize;
 	llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(sync_size, chunksize);
 	llbitmap->chunkshift = ffz(~chunksize);
 	llbitmap->sync_size = sync_size;
+	llbitmap_refresh_reshape(llbitmap);
 	ret = llbitmap_alloc_pages(llbitmap);
 
 out_put_page:
 	__free_page(sb_page);
 	kunmap_local(sb);
@@ -1293,10 +1330,11 @@ static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
 			llbitmap_mark_range(llbitmap, chunks, old_chunks - 1,
 					    BitUnwritten);
 		mddev->bitmap_info.chunksize = bitmap_chunksize;
 		llbitmap->chunks = chunks;
 		llbitmap->sync_size = blocks;
+		llbitmap_refresh_reshape(llbitmap);
 		llbitmap_update_sb(llbitmap);
 	}
 	__llbitmap_flush(mddev);
 	mutex_unlock(&mddev->bitmap_info.mutex);
 	return 0;
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: grow the page cache in place for reshape
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Use the page-control helpers to grow llbitmap's cached pages in place
for resize and later reshape preparation, instead of rebuilding the
whole cache.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 139 +++++++++++++++++++++++++++++++++++----
 1 file changed, 127 insertions(+), 12 deletions(-)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 2f2896fe4d6f..91d3dec43d48 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -414,10 +414,23 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
 		[BitmapActionClearUnwritten]	= BitUnwritten,
 	},
 };
 
 static void __llbitmap_flush(struct mddev *mddev);
+static void llbitmap_flush(struct mddev *mddev);
+static void llbitmap_update_sb(void *data);
+
+static void llbitmap_resize_chunks(struct mddev *mddev, sector_t blocks,
+				   unsigned long *chunksize,
+				   unsigned long *chunks)
+{
+	*chunks = DIV_ROUND_UP_SECTOR_T(blocks, *chunksize);
+	while (*chunks > mddev->bitmap_info.space << SECTOR_SHIFT) {
+		*chunksize = *chunksize << 1;
+		*chunks = DIV_ROUND_UP_SECTOR_T(blocks, *chunksize);
+	}
+}
 
 static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos)
 {
 	unsigned int idx;
 	unsigned int offset;
@@ -653,10 +666,52 @@ static unsigned int llbitmap_reserved_pages(struct llbitmap *llbitmap)
 {
 	return DIV_ROUND_UP(llbitmap->mddev->bitmap_info.space << SECTOR_SHIFT,
 			    PAGE_SIZE);
 }
 
+static int llbitmap_expand_pages(struct llbitmap *llbitmap,
+				 unsigned long chunks)
+{
+	struct llbitmap_page_ctl **pctl;
+	unsigned int old_nr_pages = llbitmap->nr_pages;
+	unsigned int nr_pages = llbitmap_used_pages(llbitmap, chunks);
+	int i;
+	int ret;
+
+	if (nr_pages <= old_nr_pages)
+		return 0;
+
+	pctl = kcalloc(nr_pages, sizeof(*pctl), GFP_KERNEL);
+	if (!pctl)
+		return -ENOMEM;
+
+	if (llbitmap->pctl)
+		memcpy(pctl, llbitmap->pctl,
+		       array_size(old_nr_pages, sizeof(*pctl)));
+
+	for (i = old_nr_pages; i < nr_pages; i++) {
+		pctl[i] = llbitmap_alloc_page_ctl(llbitmap, i);
+		if (IS_ERR(pctl[i]))
+			goto err_alloc_ptr;
+	}
+
+	kfree(llbitmap->pctl);
+	llbitmap->pctl = pctl;
+	llbitmap->nr_pages = nr_pages;
+	return 0;
+
+err_alloc_ptr:
+	ret = PTR_ERR(pctl[i]);
+	for (i--; i >= (int)old_nr_pages; i--) {
+		__free_page(pctl[i]->page);
+		percpu_ref_exit(&pctl[i]->active);
+		kfree(pctl[i]);
+	}
+	kfree(pctl);
+	return ret;
+}
+
 static int llbitmap_alloc_pages(struct llbitmap *llbitmap)
 {
 	unsigned int used_pages = llbitmap_used_pages(llbitmap, llbitmap->chunks);
 	unsigned int nr_pages = max(used_pages, llbitmap_reserved_pages(llbitmap));
 	int i;
@@ -728,10 +783,38 @@ static bool llbitmap_zero_all_disks(struct llbitmap *llbitmap)
 	}
 
 	return true;
 }
 
+static void llbitmap_mark_range(struct llbitmap *llbitmap,
+				unsigned long start,
+				unsigned long end,
+				enum llbitmap_state state)
+{
+	while (start <= end) {
+		llbitmap_write(llbitmap, state, start);
+		start++;
+	}
+}
+
+static int llbitmap_prepare_resize(struct llbitmap *llbitmap,
+				   unsigned long old_chunks,
+				   unsigned long new_chunks,
+				   unsigned long cache_chunks)
+{
+	int ret;
+
+	llbitmap_flush(llbitmap->mddev);
+	ret = llbitmap_expand_pages(llbitmap, cache_chunks);
+	if (ret)
+		return ret;
+	if (new_chunks > old_chunks)
+		llbitmap_mark_range(llbitmap, old_chunks, new_chunks - 1,
+				    BitUnwritten);
+	return 0;
+}
+
 static void llbitmap_init_state(struct llbitmap *llbitmap)
 {
 	struct mddev *mddev = llbitmap->mddev;
 	enum llbitmap_state state = BitUnwritten;
 	unsigned long i;
@@ -1024,14 +1107,14 @@ static int llbitmap_read_sb(struct llbitmap *llbitmap)
 		pr_err("md/llbitmap: %s: chunksize not a power of 2",
 		       mdname(mddev));
 		goto out_put_page;
 	}
 
-	if (chunksize < DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors,
+	if (chunksize < DIV_ROUND_UP_SECTOR_T(sync_size,
 					      mddev->bitmap_info.space << SECTOR_SHIFT)) {
 		pr_err("md/llbitmap: %s: chunksize too small %lu < %llu / %lu",
-		       mdname(mddev), chunksize, mddev->resync_max_sectors,
+		       mdname(mddev), chunksize, sync_size,
 		       mddev->bitmap_info.space);
 		goto out_put_page;
 	}
 
 	daemon_sleep = le32_to_cpu(sb->daemon_sleep);
@@ -1169,28 +1252,60 @@ static int llbitmap_create(struct mddev *mddev)
 }
 
 static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
 {
 	struct llbitmap *llbitmap = mddev->bitmap;
+	sector_t old_blocks = llbitmap->sync_size;
+	unsigned long old_chunks = llbitmap->chunks;
 	unsigned long chunks;
+	unsigned long cache_chunks;
+	int ret = 0;
+	unsigned long bitmap_chunksize;
+	bool reshape;
 
 	if (chunksize == 0)
 		chunksize = llbitmap->chunksize;
 
-	/* If there is enough space, leave the chunksize unchanged. */
-	chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
-	while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) {
-		chunksize = chunksize << 1;
-		chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
-	}
+	bitmap_chunksize = chunksize;
+	llbitmap_resize_chunks(mddev, blocks, &bitmap_chunksize, &chunks);
 
-	llbitmap->chunkshift = ffz(~chunksize);
-	llbitmap->chunksize = chunksize;
-	llbitmap->chunks = chunks;
-	llbitmap->sync_size = blocks;
+	reshape = mddev->delta_disks || mddev->new_level != mddev->level ||
+		mddev->new_layout != mddev->layout ||
+		mddev->new_chunk_sectors != mddev->chunk_sectors;
+	if (!reshape && bitmap_chunksize != llbitmap->chunksize)
+		return -EOPNOTSUPP;
+	if (blocks == old_blocks && chunks == llbitmap->chunks)
+		return 0;
+
+	mutex_lock(&mddev->bitmap_info.mutex);
 
+	cache_chunks = reshape ? max(old_chunks, chunks) : chunks;
+	ret = llbitmap_prepare_resize(llbitmap, old_chunks, chunks, cache_chunks);
+	if (ret)
+		goto out;
+
+	if (reshape) {
+		llbitmap->reshape_sync_size = blocks;
+		llbitmap->reshape_chunksize = bitmap_chunksize;
+		llbitmap->reshape_chunks = chunks;
+		llbitmap->chunks = max(old_chunks, chunks);
+	} else {
+		if (blocks < old_blocks && chunks < old_chunks)
+			llbitmap_mark_range(llbitmap, chunks, old_chunks - 1,
+					    BitUnwritten);
+		mddev->bitmap_info.chunksize = bitmap_chunksize;
+		llbitmap->chunks = chunks;
+		llbitmap->sync_size = blocks;
+		llbitmap_update_sb(llbitmap);
+	}
+	__llbitmap_flush(mddev);
+	mutex_unlock(&mddev->bitmap_info.mutex);
 	return 0;
+
+out:
+	mutex_unlock(&mddev->bitmap_info.mutex);
+	return ret;
 }
 
 static int llbitmap_load(struct mddev *mddev)
 {
 	enum llbitmap_action action = BitmapActionReload;
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: allocate page controls independently
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Allocate one llbitmap page-control object at a time and free each
object through the same model.

Let llbitmap_read_page() return a zeroed page without reading disk when
the page index is beyond the current bitmap size, so page-control
allocation no longer needs a separate read_existing flag.

This keeps the llbitmap page-control lifetime self-consistent and
prepares the page-cache code for later in-place growth.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 99 +++++++++++++++++++++++++---------------
 1 file changed, 62 insertions(+), 37 deletions(-)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index ecf3ed712315..2f2896fe4d6f 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -510,24 +510,32 @@ static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state,
 		llbitmap_set_page_dirty(llbitmap, idx, bit, true);
 	else if (state == BitNeedSyncUnwritten)
 		llbitmap_set_page_dirty(llbitmap, idx, bit, false);
 }
 
+static unsigned int llbitmap_used_pages(struct llbitmap *llbitmap,
+					unsigned long chunks)
+{
+	return DIV_ROUND_UP(chunks + BITMAP_DATA_OFFSET, PAGE_SIZE);
+}
+
 static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
 {
 	struct mddev *mddev = llbitmap->mddev;
 	struct page *page = NULL;
 	struct md_rdev *rdev;
 
-	if (llbitmap->pctl && llbitmap->pctl[idx])
+	if (llbitmap->pctl && idx < llbitmap->nr_pages && llbitmap->pctl[idx])
 		page = llbitmap->pctl[idx]->page;
 	if (page)
 		return page;
 
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
 		return ERR_PTR(-ENOMEM);
+	if (idx >= llbitmap_used_pages(llbitmap, llbitmap->chunks))
+		return page;
 
 	rdev_for_each(rdev, mddev) {
 		sector_t sector;
 
 		if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags) ||
@@ -594,65 +602,82 @@ static void llbitmap_free_pages(struct llbitmap *llbitmap)
 		return;
 
 	for (i = 0; i < llbitmap->nr_pages; i++) {
 		struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
 
-		if (!pctl || !pctl->page)
-			break;
-
-		__free_page(pctl->page);
+		if (!pctl)
+			continue;
+		if (pctl->page)
+			__free_page(pctl->page);
 		percpu_ref_exit(&pctl->active);
+		kfree(pctl);
 	}
 
-	kfree(llbitmap->pctl[0]);
 	kfree(llbitmap->pctl);
 	llbitmap->pctl = NULL;
 }
 
-static int llbitmap_cache_pages(struct llbitmap *llbitmap)
+static struct llbitmap_page_ctl *
+llbitmap_alloc_page_ctl(struct llbitmap *llbitmap, int idx)
 {
 	struct llbitmap_page_ctl *pctl;
-	unsigned int nr_pages = DIV_ROUND_UP(llbitmap->chunks +
-					     BITMAP_DATA_OFFSET, PAGE_SIZE);
+	struct page *page;
 	unsigned int size = struct_size(pctl, dirty, BITS_TO_LONGS(
 						llbitmap->blocks_per_page));
-	int i;
-
-	llbitmap->pctl = kmalloc_array(nr_pages, sizeof(void *),
-				       GFP_KERNEL | __GFP_ZERO);
-	if (!llbitmap->pctl)
-		return -ENOMEM;
 
 	size = round_up(size, cache_line_size());
-	pctl = kmalloc_array(nr_pages, size, GFP_KERNEL | __GFP_ZERO);
-	if (!pctl) {
-		kfree(llbitmap->pctl);
-		return -ENOMEM;
+	pctl = kzalloc(size, GFP_KERNEL);
+	if (!pctl)
+		return ERR_PTR(-ENOMEM);
+
+	page = llbitmap_read_page(llbitmap, idx);
+
+	if (IS_ERR(page)) {
+		kfree(pctl);
+		return ERR_CAST(page);
 	}
 
-	llbitmap->nr_pages = nr_pages;
+	if (percpu_ref_init(&pctl->active, active_release,
+			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
+		__free_page(page);
+		kfree(pctl);
+		return ERR_PTR(-ENOMEM);
+	}
 
-	for (i = 0; i < nr_pages; i++, pctl = (void *)pctl + size) {
-		struct page *page = llbitmap_read_page(llbitmap, i);
+	pctl->page = page;
+	pctl->state = page_address(page);
+	init_waitqueue_head(&pctl->wait);
+	return pctl;
+}
 
-		llbitmap->pctl[i] = pctl;
+static unsigned int llbitmap_reserved_pages(struct llbitmap *llbitmap)
+{
+	return DIV_ROUND_UP(llbitmap->mddev->bitmap_info.space << SECTOR_SHIFT,
+			    PAGE_SIZE);
+}
 
-		if (IS_ERR(page)) {
-			llbitmap_free_pages(llbitmap);
-			return PTR_ERR(page);
-		}
+static int llbitmap_alloc_pages(struct llbitmap *llbitmap)
+{
+	unsigned int used_pages = llbitmap_used_pages(llbitmap, llbitmap->chunks);
+	unsigned int nr_pages = max(used_pages, llbitmap_reserved_pages(llbitmap));
+	int i;
+
+	llbitmap->pctl = kcalloc(nr_pages, sizeof(*llbitmap->pctl), GFP_KERNEL);
+	if (!llbitmap->pctl)
+		return -ENOMEM;
 
-		if (percpu_ref_init(&pctl->active, active_release,
-				    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
-			__free_page(page);
+	llbitmap->nr_pages = nr_pages;
+
+	for (i = 0; i < nr_pages; i++) {
+		llbitmap->pctl[i] = llbitmap_alloc_page_ctl(llbitmap, i);
+		if (IS_ERR(llbitmap->pctl[i])) {
+			int ret = PTR_ERR(llbitmap->pctl[i]);
+
+			llbitmap->pctl[i] = NULL;
 			llbitmap_free_pages(llbitmap);
-			return -ENOMEM;
+			return ret;
 		}
-
-		pctl->page = page;
-		pctl->state = page_address(page);
-		init_waitqueue_head(&pctl->wait);
 	}
 
 	return 0;
 }
 
@@ -921,11 +946,11 @@ static int llbitmap_init(struct llbitmap *llbitmap)
 	llbitmap->chunksize = chunksize;
 	llbitmap->chunks = chunks;
 	llbitmap->sync_size = blocks;
 	mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP;
 
-	ret = llbitmap_cache_pages(llbitmap);
+	ret = llbitmap_alloc_pages(llbitmap);
 	if (ret)
 		return ret;
 
 	llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
 			       BitmapActionInit);
@@ -1030,11 +1055,11 @@ static int llbitmap_read_sb(struct llbitmap *llbitmap)
 	llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
 	llbitmap->chunksize = chunksize;
 	llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(sync_size, chunksize);
 	llbitmap->chunkshift = ffz(~chunksize);
 	llbitmap->sync_size = sync_size;
-	ret = llbitmap_cache_pages(llbitmap);
+	ret = llbitmap_alloc_pages(llbitmap);
 
 out_put_page:
 	__free_page(sb_page);
 	kunmap_local(sb);
 	return ret;
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md/md-llbitmap: track bitmap sync_size explicitly
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Track llbitmap's own sync_size instead of always using
mddev->resync_max_sectors directly.

This is the minimal bookkeeping needed before llbitmap can track old
and new reshape geometry independently.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-llbitmap.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index bcf34f0c9af6..ecf3ed712315 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -285,10 +285,12 @@ struct llbitmap {
 	unsigned long chunkshift;
 	/* size of one chunk in sector */
 	unsigned long chunksize;
 	/* total number of chunks */
 	unsigned long chunks;
+	/* total number of sectors tracked by current bitmap geometry */
+	sector_t sync_size;
 	unsigned long last_end_sync;
 	/*
 	 * time in seconds that dirty bits will be cleared if the page is not
 	 * accessed.
 	 */
@@ -916,10 +918,11 @@ static int llbitmap_init(struct llbitmap *llbitmap)
 
 	llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
 	llbitmap->chunkshift = ffz(~chunksize);
 	llbitmap->chunksize = chunksize;
 	llbitmap->chunks = chunks;
+	llbitmap->sync_size = blocks;
 	mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP;
 
 	ret = llbitmap_cache_pages(llbitmap);
 	if (ret)
 		return ret;
@@ -936,10 +939,11 @@ static int llbitmap_read_sb(struct llbitmap *llbitmap)
 {
 	struct mddev *mddev = llbitmap->mddev;
 	unsigned long daemon_sleep;
 	unsigned long chunksize;
 	unsigned long events;
+	sector_t sync_size;
 	struct page *sb_page;
 	bitmap_super_t *sb;
 	int ret = -EINVAL;
 
 	if (!mddev->bitmap_info.offset) {
@@ -985,10 +989,13 @@ static int llbitmap_read_sb(struct llbitmap *llbitmap)
 	if (test_and_clear_bit(BITMAP_FIRST_USE, &llbitmap->flags)) {
 		ret = llbitmap_init(llbitmap);
 		goto out_put_page;
 	}
 
+	sync_size = le64_to_cpu(sb->sync_size);
+	if (!sync_size)
+		sync_size = mddev->resync_max_sectors;
 	chunksize = le32_to_cpu(sb->chunksize);
 	if (!is_power_of_2(chunksize)) {
 		pr_err("md/llbitmap: %s: chunksize not a power of 2",
 		       mdname(mddev));
 		goto out_put_page;
@@ -1020,12 +1027,13 @@ static int llbitmap_read_sb(struct llbitmap *llbitmap)
 	mddev->bitmap_info.chunksize = chunksize;
 	mddev->bitmap_info.daemon_sleep = daemon_sleep;
 
 	llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
 	llbitmap->chunksize = chunksize;
-	llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, chunksize);
+	llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(sync_size, chunksize);
 	llbitmap->chunkshift = ffz(~chunksize);
+	llbitmap->sync_size = sync_size;
 	ret = llbitmap_cache_pages(llbitmap);
 
 out_put_page:
 	__free_page(sb_page);
 	kunmap_local(sb);
@@ -1151,10 +1159,11 @@ static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
 	}
 
 	llbitmap->chunkshift = ffz(~chunksize);
 	llbitmap->chunksize = chunksize;
 	llbitmap->chunks = chunks;
+	llbitmap->sync_size = blocks;
 
 	return 0;
 }
 
 static int llbitmap_load(struct mddev *mddev)
@@ -1524,11 +1533,11 @@ static void llbitmap_update_sb(void *data)
 
 	sb = kmap_local_page(sb_page);
 	sb->events = cpu_to_le64(mddev->events);
 	sb->state = cpu_to_le32(llbitmap->flags);
 	sb->chunksize = cpu_to_le32(llbitmap->chunksize);
-	sb->sync_size = cpu_to_le64(mddev->resync_max_sectors);
+	sb->sync_size = cpu_to_le64(llbitmap->sync_size);
 	sb->events_cleared = cpu_to_le64(llbitmap->events_cleared);
 	sb->sectors_reserved = cpu_to_le32(mddev->bitmap_info.space);
 	sb->daemon_sleep = cpu_to_le32(mddev->bitmap_info.daemon_sleep);
 
 	kunmap_local(sb);
@@ -1542,10 +1551,11 @@ static int llbitmap_get_stats(void *data, struct md_bitmap_stats *stats)
 	memset(stats, 0, sizeof(*stats));
 
 	stats->missing_pages = 0;
 	stats->pages = llbitmap->nr_pages;
 	stats->file_pages = llbitmap->nr_pages;
+	stats->sync_size = llbitmap->sync_size;
 
 	stats->behind_writes = atomic_read(&llbitmap->behind_writes);
 	stats->behind_wait = wq_has_sleeper(&llbitmap->behind_wait);
 	stats->events_cleared = llbitmap->events_cleared;
 
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md: add helper to split bios at reshape offset
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Add mddev_bio_split_at_reshape_offset() so personalities can share
reshape-offset bio splitting instead of open-coding the same helper in
multiple places.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md.c | 39 +++++++++++++++++++++++++++++++++++++++
 drivers/md/md.h |  4 ++++
 2 files changed, 43 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index ccc4180d2c1d..6685e4c53fd9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9359,10 +9359,49 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 	mddev_trace_remap(mddev, discard_bio, bio->bi_iter.bi_sector);
 	submit_bio_noacct(discard_bio);
 }
 EXPORT_SYMBOL_GPL(md_submit_discard_bio);
 
+struct bio *mddev_bio_split_at_reshape_offset(struct mddev *mddev,
+					      struct bio *bio,
+					      unsigned int *max_sectors,
+					      struct bio_set *bs)
+{
+	sector_t boundary;
+	sector_t start;
+	sector_t end;
+	unsigned int split_sectors;
+
+	split_sectors = bio_sectors(bio);
+	if (max_sectors && *max_sectors && *max_sectors < split_sectors)
+		split_sectors = *max_sectors;
+
+	if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+		goto split;
+
+	boundary = mddev->reshape_position;
+	start = bio->bi_iter.bi_sector;
+	end = bio_end_sector(bio);
+	if (start >= boundary || end <= boundary)
+		goto split;
+
+	if (boundary - start < split_sectors)
+		split_sectors = boundary - start;
+
+split:
+	if (max_sectors)
+		*max_sectors = split_sectors;
+	if (split_sectors < bio_sectors(bio)) {
+		bio = bio_submit_split_bioset(bio, split_sectors, bs);
+		if (bio)
+			bio->bi_opf |= REQ_NOMERGE;
+	}
+
+	return bio;
+}
+EXPORT_SYMBOL_GPL(mddev_bio_split_at_reshape_offset);
+
 static void md_bitmap_prepare_range(struct mddev *mddev, sector_t *offset,
 				    unsigned long *sectors)
 {
 	mddev->bitmap_ops->prepare_range(mddev, offset, sectors);
 }
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 110cf0f8b107..ebfc6da83161 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -923,10 +923,14 @@ extern void md_done_sync(struct mddev *mddev, int blocks);
 extern void md_sync_error(struct mddev *mddev);
 extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
 extern void md_finish_reshape(struct mddev *mddev);
 void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 			struct bio *bio, sector_t start, sector_t size);
+struct bio *mddev_bio_split_at_reshape_offset(struct mddev *mddev,
+					      struct bio *bio,
+					      unsigned int *max_sectors,
+					      struct bio_set *bs);
 void md_account_bio(struct mddev *mddev, struct bio **bio);
 
 extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
 void md_write_metadata(struct mddev *mddev, struct md_rdev *rdev,
 		       sector_t sector, int size, struct page *page,
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md: skip bitmap accounting for empty write ranges
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

mkfs.ext4 can submit zero-sector flush/FUA bios. These bios are WRITE
bios for md_write_start() purposes, but they do not cover any data sector
and must not dirty bitmap bits.

md bitmap accounting currently passes such bios to bitmap start_write().
For llbitmap this reaches llbitmap_start_write() with sectors == 0,
which underflows the end chunk calculation.

The new bitmap prepare_range() hook can also turn a non-empty bio into an
empty bitmap range when the requested sectors are outside the active
bitmap geometry. Treat both cases as not started, so the completion path
will not call end_write() for an empty range.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 08eabc7e5a71..ccc4180d2c1d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9374,10 +9374,12 @@ static void md_bitmap_start(struct mddev *mddev,
 			   mddev->bitmap_ops->start_discard :
 			   mddev->bitmap_ops->start_write;
 
 	md_bitmap_prepare_range(mddev, &md_io_clone->offset,
 				&md_io_clone->sectors);
+	if (!md_io_clone->sectors)
+		return;
 	fn(mddev, md_io_clone->offset, md_io_clone->sectors);
 }
 
 static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
 {
@@ -9394,11 +9396,12 @@ static void md_end_clone_io(struct bio *bio)
 						       bio_clone);
 	struct bio *orig_bio = md_io_clone->orig_bio;
 	struct mddev *mddev = md_io_clone->mddev;
 	struct completion *reshape_completion = bio->bi_private;
 
-	if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false))
+	if (bio_data_dir(orig_bio) == WRITE && md_io_clone->sectors &&
+	    md_bitmap_enabled(mddev, false))
 		md_bitmap_end(mddev, md_io_clone);
 
 	if (bio->bi_status && !orig_bio->bi_status)
 		orig_bio->bi_status = bio->bi_status;
 
@@ -9421,14 +9424,16 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio)
 		bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
 
 	md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
 	md_io_clone->orig_bio = *bio;
 	md_io_clone->mddev = mddev;
+	md_io_clone->sectors = 0;
 	if (blk_queue_io_stat(bdev->bd_disk->queue))
 		md_io_clone->start_time = bio_start_io_acct(*bio);
 
-	if (bio_data_dir(*bio) == WRITE && md_bitmap_enabled(mddev, false)) {
+	if (bio_data_dir(*bio) == WRITE && bio_sectors(*bio) &&
+	    md_bitmap_enabled(mddev, false)) {
 		md_io_clone->offset = (*bio)->bi_iter.bi_sector;
 		md_io_clone->sectors = bio_sectors(*bio);
 		md_io_clone->rw = op_stat_group(bio_op(*bio));
 		md_bitmap_start(mddev, md_io_clone);
 	}
-- 
2.51.0


^ permalink raw reply related

* [PATCH] md: add exact bitmap mapping and reshape hooks
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605091527.2463539-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Add bitmap mapping and reshape hooks needed by llbitmap reshape
support without teaching md core to account a single bio against
multiple bitmap ranges.

This also adds the old/new bitmap geometry helpers used by
personalities to describe reshape mapping to llbitmap.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/md-bitmap.c   |  8 ++++++++
 drivers/md/md-bitmap.h   |  8 ++++++++
 drivers/md/md-llbitmap.c |  8 ++++++++
 drivers/md/md.c          | 12 ++++++++----
 drivers/md/md.h          |  4 ++++
 5 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 028b9ca8ce52..e10296788cdd 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1727,10 +1727,17 @@ static void bitmap_start_write(struct mddev *mddev, sector_t offset,
 		else
 			sectors = 0;
 	}
 }
 
+static void bitmap_prepare_range(struct mddev *mddev, sector_t *offset,
+				 unsigned long *sectors)
+{
+	if (mddev->pers->bitmap_sector)
+		mddev->pers->bitmap_sector(mddev, offset, sectors);
+}
+
 static void bitmap_end_write(struct mddev *mddev, sector_t offset,
 			     unsigned long sectors)
 {
 	struct bitmap *bitmap = mddev->bitmap;
 
@@ -3075,10 +3082,11 @@ static struct bitmap_operations bitmap_ops = {
 	.load			= bitmap_load,
 	.destroy		= bitmap_destroy,
 	.flush			= bitmap_flush,
 	.write_all		= bitmap_write_all,
 	.dirty_bits		= bitmap_dirty_bits,
+	.prepare_range		= bitmap_prepare_range,
 	.unplug			= bitmap_unplug,
 	.daemon_work		= bitmap_daemon_work,
 
 	.start_behind_write	= bitmap_start_behind_write,
 	.end_behind_write	= bitmap_end_behind_write,
diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h
index 214f623c7e79..f0136fc02feb 100644
--- a/drivers/md/md-bitmap.h
+++ b/drivers/md/md-bitmap.h
@@ -91,10 +91,18 @@ struct bitmap_operations {
 	void (*destroy)(struct mddev *mddev);
 	void (*flush)(struct mddev *mddev);
 	void (*write_all)(struct mddev *mddev);
 	void (*dirty_bits)(struct mddev *mddev, unsigned long s,
 			   unsigned long e);
+	/* Prepare a range for this bitmap implementation. */
+	void (*prepare_range)(struct mddev *mddev,
+			      sector_t *offset,
+			      unsigned long *sectors);
+	void (*reshape_finish)(struct mddev *mddev);
+	int (*reshape_can_start)(struct mddev *mddev);
+	void (*reshape_mark)(struct mddev *mddev, sector_t old_pos,
+			     sector_t new_pos);
 	void (*unplug)(struct mddev *mddev, bool sync);
 	void (*daemon_work)(struct mddev *mddev);
 
 	void (*start_behind_write)(struct mddev *mddev);
 	void (*end_behind_write)(struct mddev *mddev);
diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 1adc5b117821..bcf34f0c9af6 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -1186,10 +1186,17 @@ static void llbitmap_destroy(struct mddev *mddev)
 	llbitmap_free_pages(llbitmap);
 	kfree(llbitmap);
 	mutex_unlock(&mddev->bitmap_info.mutex);
 }
 
+static void llbitmap_prepare_range(struct mddev *mddev, sector_t *offset,
+				   unsigned long *sectors)
+{
+	if (mddev->pers->bitmap_sector)
+		mddev->pers->bitmap_sector(mddev, offset, sectors);
+}
+
 static void llbitmap_start_write(struct mddev *mddev, sector_t offset,
 				 unsigned long sectors)
 {
 	struct llbitmap *llbitmap = mddev->bitmap;
 	unsigned long start = offset >> llbitmap->chunkshift;
@@ -1775,10 +1782,11 @@ static struct bitmap_operations llbitmap_ops = {
 	.cond_end_sync		= llbitmap_cond_end_sync,
 
 	.update_sb		= llbitmap_update_sb,
 	.get_stats		= llbitmap_get_stats,
 	.dirty_bits		= llbitmap_dirty_bits,
+	.prepare_range		= llbitmap_prepare_range,
 	.write_all		= llbitmap_write_all,
 
 	.groups			= md_llbitmap_groups,
 };
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 096bb64e87bd..08eabc7e5a71 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9359,21 +9359,25 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 	mddev_trace_remap(mddev, discard_bio, bio->bi_iter.bi_sector);
 	submit_bio_noacct(discard_bio);
 }
 EXPORT_SYMBOL_GPL(md_submit_discard_bio);
 
+static void md_bitmap_prepare_range(struct mddev *mddev, sector_t *offset,
+				    unsigned long *sectors)
+{
+	mddev->bitmap_ops->prepare_range(mddev, offset, sectors);
+}
+
 static void md_bitmap_start(struct mddev *mddev,
 			    struct md_io_clone *md_io_clone)
 {
 	md_bitmap_fn *fn = unlikely(md_io_clone->rw == STAT_DISCARD) ?
 			   mddev->bitmap_ops->start_discard :
 			   mddev->bitmap_ops->start_write;
 
-	if (mddev->pers->bitmap_sector)
-		mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
-					   &md_io_clone->sectors);
-
+	md_bitmap_prepare_range(mddev, &md_io_clone->offset,
+				&md_io_clone->sectors);
 	fn(mddev, md_io_clone->offset, md_io_clone->sectors);
 }
 
 static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
 {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d8daf0f75cbb..110cf0f8b107 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -796,10 +796,14 @@ struct md_personality
 	/* Changes the consistency policy of an active array. */
 	int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
 	/* convert io ranges from array to bitmap */
 	void (*bitmap_sector)(struct mddev *mddev, sector_t *offset,
 			      unsigned long *sectors);
+	void (*bitmap_sector_map)(struct mddev *mddev, sector_t *offset,
+				  unsigned long *sectors, bool previous);
+	sector_t (*bitmap_sync_size)(struct mddev *mddev, bool previous);
+	sector_t (*bitmap_array_sectors)(struct mddev *mddev, bool previous);
 };
 
 struct md_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct mddev *, char *);
-- 
2.51.0


^ permalink raw reply related

* [PATCH 00/20] md/md-llbitmap: support reshape for RAID10 and RAID5
From: Yu Kuai @ 2026-06-05  9:15 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel

From: Yu Kuai <yukuai@fygo.io>

Hi,

This series adds llbitmap support for online reshape in RAID10 and RAID5.

llbitmap has a different set of constraints from the existing bitmap code:
there is one live bitmap instance, each bit state has richer semantics, and
reshape can change the mapping from logical array ranges to bitmap ranges.
The series therefore adds exact bitmap range mapping hooks, tracks old and
new llbitmap geometry during reshape, remaps checkpointed bits as reshape
progresses, and wires the reshape lifecycle into RAID10 and RAID5.

The main rules are:

1. split bios at the reshape position before bitmap accounting, so one bio
   is never accounted with mixed old/new geometry;
2. do not skip reshape ranges from stale llbitmap state, because reshape
   progress is checkpointed by array metadata;
3. remap llbitmap bits when reshape progress is checkpointed;
4. reject llbitmap reshape if mddev->chunk_sectors shrinks, because the
   effective data range represented by existing bitmap bits can shrink.

The first group of patches prepares generic bitmap and llbitmap
infrastructure.  The second group wires RAID10.  The last group wires
RAID5, including exact old/new stripe mapping.

Validation:
* RAID5 llbitmap test:
  - created 3-disk RAID5 with --bitmap=lockless
  - wrote 96 MiB of random data
  - reshaped to 4 disks
  - llbitmap bits changed from clean=1024 dirty=1024 to
    unwritten=448 clean=1600 dirty=0
  - all sync-related llbitmap counters were zero after reshape
  - data hash was unchanged after reshape
  - replaced one disk, waited for recovery, hash was unchanged
  - failed another old disk and verified degraded reads still matched
* RAID10 llbitmap test:
  - created 4-disk RAID10 n2 with --bitmap=lockless
  - wrote 128 MiB of random data
  - reshaped to 6 disks
  - llbitmap bits changed from clean=2048 dirty=2048 to
    unwritten=2048 clean=4096 dirty=0
  - all sync-related llbitmap counters were zero after reshape
  - data hash was unchanged after reshape
  - replaced one disk, waited for recovery, hash was unchanged
  - failed the rebuilt disk's mirror mate and verified degraded reads still
    matched

Yu Kuai (20):
  md: add exact bitmap mapping and reshape hooks
  md: skip bitmap accounting for empty write ranges
  md: add helper to split bios at reshape offset
  md/md-llbitmap: track bitmap sync_size explicitly
  md/md-llbitmap: allocate page controls independently
  md/md-llbitmap: grow the page cache in place for reshape
  md/md-llbitmap: track target reshape geometry fields
  md/md-llbitmap: finish reshape geometry
  md/md-llbitmap: refuse reshape while llbitmap still needs sync
  md/md-llbitmap: add reshape range mapping helpers
  md/md-llbitmap: don't skip reshape ranges from bitmap state
  md/md-llbitmap: remap checkpointed bits as reshape progresses
  md/md-llbitmap: clamp state-machine walks to tracked bits
  md/raid10: reject llbitmap reshape when md chunk shrinks
  md/raid10: wire llbitmap reshape lifecycle
  md/raid10: split reshape bios before bitmap accounting
  md/raid5: add exact old and new llbitmap mapping helpers
  md/raid5: reject llbitmap reshape when md chunk shrinks
  md/raid5: wire llbitmap reshape lifecycle
  md/raid5: split reshape bios before bitmap accounting

 drivers/md/md-bitmap.c   |   8 +
 drivers/md/md-bitmap.h   |   8 +
 drivers/md/md-llbitmap.c | 616 +++++++++++++++++++++++++++++++++++----
 drivers/md/md.c          |  60 +++-
 drivers/md/md.h          |   8 +
 drivers/md/raid10.c      |  50 +++-
 drivers/md/raid5.c       | 118 ++++++--
 7 files changed, 793 insertions(+), 75 deletions(-)

-- 
2.51.0

^ permalink raw reply

* [PATCH v2 3/3] md/raid5: always convert llbitmap bits for discard
From: Yu Kuai @ 2026-06-05  7:26 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605072639.2434847-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

llbitmap discard is useful even when no underlying member device supports
it. The discard still converts the llbitmap range to unwritten, so later
reads and recovery do not rely on stale parity for that range.

Let llbitmap discard bypass the raid5 lower discard support check. If lower
discard is not safe or not supported, complete the accounted clone after
md_account_bio() so the llbitmap conversion callbacks run without member
discard bios.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid5.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 76e736ee48d3..180ff0660b6a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1136,10 +1136,13 @@ static void defer_issue_bios(struct r5conf *conf, sector_t sector,
 
 static bool raid5_discard_limits(struct mddev *mddev, struct bio *bi)
 {
 	struct r5conf *conf = mddev->private;
 
+	if (mddev->bitmap_id == ID_LLBITMAP)
+		return true;
+
 	if (!conf->raid5_discard_unsupported)
 		return true;
 
 	bi->bi_status = BLK_STS_NOTSUPP;
 	bio_endio(bi);
@@ -5738,10 +5741,16 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
 	md_account_bio(mddev, &bi);
 	orig_bi->bi_iter = bi_iter;
 	bi->bi_iter = bi_iter;
 	bi->bi_next = NULL;
 
+	if (mddev->bitmap_id == ID_LLBITMAP &&
+	    conf->raid5_discard_unsupported) {
+		bio_endio(bi);
+		return;
+	}
+
 	logical_sector = first_stripe * conf->chunk_sectors;
 	last_sector = last_stripe * conf->chunk_sectors;
 
 	for (; logical_sector < last_sector;
 	     logical_sector += RAID5_STRIPE_SECTORS(conf)) {
-- 
2.51.0


^ permalink raw reply related

* [PATCH v2 2/3] md/raid5: validate discard support at request time
From: Yu Kuai @ 2026-06-05  7:26 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, Xiao Ni, linux-raid, linux-kernel
In-Reply-To: <20260605072639.2434847-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

Raid5 used to disable discard limits when devices_handle_discard_safely
was not set or when stacked member limits could not support a full-stripe
discard. That hides discard from userspace before raid5 can decide whether
a request can be handled safely.

Follow other virtual drivers and advertise a UINT_MAX discard limit for the
md device. Cache lower discard support in r5conf when setting queue limits,
and reject unsupported discard bios before queuing stripe work.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 drivers/md/raid5.c | 34 +++++++++++++++++++---------------
 drivers/md/raid5.h |  1 +
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index debf35342ae0..76e736ee48d3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1132,10 +1132,22 @@ static void defer_issue_bios(struct r5conf *conf, sector_t sector,
 	spin_unlock(&conf->pending_bios_lock);
 
 	dispatch_bio_list(&tmp);
 }
 
+static bool raid5_discard_limits(struct mddev *mddev, struct bio *bi)
+{
+	struct r5conf *conf = mddev->private;
+
+	if (!conf->raid5_discard_unsupported)
+		return true;
+
+	bi->bi_status = BLK_STS_NOTSUPP;
+	bio_endio(bi);
+	return false;
+}
+
 static void
 raid5_end_read_request(struct bio *bi);
 static void
 raid5_end_write_request(struct bio *bi);
 
@@ -5702,10 +5714,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
 
 	if (mddev->reshape_position != MaxSector)
 		/* Skip discard while reshape is happening */
 		return;
 
+	if (!raid5_discard_limits(mddev, bi))
+		return;
+
 	stripe_sectors = conf->chunk_sectors *
 		(conf->raid_disks - conf->max_degraded);
 	first_stripe = DIV_ROUND_UP_SECTOR_T(bi->bi_iter.bi_sector,
 					     stripe_sectors);
 	last_stripe = bio_end_sector(bi);
@@ -7815,36 +7830,25 @@ static int raid5_set_limits(struct mddev *mddev)
 	mddev_stack_rdev_limits(mddev, &lim, 0);
 	rdev_for_each(rdev, mddev)
 		queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
 				mddev->gendisk->disk_name);
 
-	/*
-	 * Zeroing is required for discard, otherwise data could be lost.
-	 *
-	 * Consider a scenario: discard a stripe (the stripe could be
-	 * inconsistent if discard_zeroes_data is 0); write one disk of the
-	 * stripe (the stripe could be inconsistent again depending on which
-	 * disks are used to calculate parity); the disk is broken; The stripe
-	 * data of this disk is lost.
-	 *
-	 * We only allow DISCARD if the sysadmin has confirmed that only safe
-	 * devices are in use by setting a module parameter.  A better idea
-	 * might be to turn DISCARD into WRITE_ZEROES requests, as that is
-	 * required to be safe.
-	 */
 	if (!devices_handle_discard_safely ||
 	    lim.max_discard_sectors < (stripe >> 9) ||
 	    lim.discard_granularity < stripe)
-		lim.max_hw_discard_sectors = 0;
+		conf->raid5_discard_unsupported = true;
+	else
+		conf->raid5_discard_unsupported = false;
 
 	/*
 	 * Requests require having a bitmap for each stripe.
 	 * Limit the max sectors based on this.
 	 */
 	lim.max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf);
 	if ((lim.max_hw_sectors << 9) < lim.io_opt)
 		lim.max_hw_sectors = lim.io_opt >> 9;
+	lim.max_hw_discard_sectors = UINT_MAX;
 
 	/* No restrictions on the number of segments in the request */
 	lim.max_segments = USHRT_MAX;
 
 	return queue_limits_set(mddev->gendisk->queue, &lim);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 1c7b710fc9c1..ba06cf88aa24 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -687,10 +687,11 @@ struct r5conf {
 	struct r5pending_data	*pending_data;
 	struct list_head	free_list;
 	struct list_head	pending_list;
 	int			pending_data_cnt;
 	struct r5pending_data	*next_pending_data;
+	bool			raid5_discard_unsupported;
 
 	mempool_t		*ctx_pool;
 	int			ctx_size;
 };
 
-- 
2.51.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox