[PATCH 0/2] btrfs: prepare raid56 and scrub to support bs

Linux Btrfs filesystem development
 help / color / mirror / Atom feed

* [PATCH 0/2] btrfs: prepare raid56 and scrub to support bs > ps cases
@ 2025-09-12  3:59 Qu Wenruo
  2025-09-12  3:59 ` [PATCH 1/2] btrfs: prepare raid56 " Qu Wenruo
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Qu Wenruo @ 2025-09-12  3:59 UTC (permalink / raw)
  To: linux-btrfs

After the previous compression bs > ps preparation, this series focus on
raid56 and scrub.

Both raid56 and scrub are using a page array storing their stripes.

Thankfully both are already using physical memory addresses for checksum
calculation, thus there are no internal code to do the checksum
handling.

Just convert the involved arraies (and some RAID56 internal page related
members) to folio arraies will handle most thing properly.

Now the remaining code is mostly encoded write, which shares some
infrastructure with send, which makes the conversion more complex than I
thought.

Qu Wenruo (2):
  btrfs: prepare raid56 to support bs > ps cases
  btrfs: prepare scrub to support bs > ps cases

 fs/btrfs/misc.h   |   5 ++
 fs/btrfs/raid56.c | 180 +++++++++++++++++++++++-----------------------
 fs/btrfs/raid56.h |  26 ++++---
 fs/btrfs/scrub.c  |  51 +++++++------
 4 files changed, 142 insertions(+), 120 deletions(-)

-- 
2.50.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] btrfs: prepare raid56 to support bs > ps cases
  2025-09-12  3:59 [PATCH 0/2] btrfs: prepare raid56 and scrub to support bs > ps cases Qu Wenruo
@ 2025-09-12  3:59 ` Qu Wenruo
  2025-09-12  3:59 ` [PATCH 2/2] btrfs: prepare scrub " Qu Wenruo
  2025-09-17  7:31 ` [PATCH 0/2] btrfs: prepare raid56 and " Qu Wenruo
  2 siblings, 0 replies; 4+ messages in thread
From: Qu Wenruo @ 2025-09-12  3:59 UTC (permalink / raw)
  To: linux-btrfs

This involves the following conversion:

- btrfs_raid_bio::nr_pages -> nr_folios
- btrfs_raid_bio::stripe_npages -> stripe_nfolios
- btrfs_raid_bio::stripe_pages[] -> stripe_folios[]
- Involved comments using "page"
- Remove the PAGE_SIZE alignment check against sectorsize

There is one exception, function raid56_parity_cache_data_pages() is
utilized by scrub, and it doesn't support bs > ps yet.

So add an ASSERT() inside that function to make sure it's only called
when bs <= ps.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/misc.h   |   5 ++
 fs/btrfs/raid56.c | 181 ++++++++++++++++++++++++----------------------
 fs/btrfs/raid56.h |  22 ++++--
 3 files changed, 115 insertions(+), 93 deletions(-)

diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 60f9b000d644..3bedffbd51ba 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -214,4 +214,9 @@ static inline u64 folio_end(struct folio *folio)
 	return folio_pos(folio) + folio_size(folio);
 }
 
+static inline phys_addr_t folio_to_phys(const struct folio *folio)
+{
+	return page_to_phys(folio_page(folio, 0));
+}
+
 #endif
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 2b4f577dcf39..ac94335dd262 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -158,7 +158,7 @@ static void scrub_rbio_work_locked(struct work_struct *work);
 static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
 {
 	bitmap_free(rbio->error_bitmap);
-	kfree(rbio->stripe_pages);
+	kfree(rbio->stripe_folios);
 	kfree(rbio->bio_sectors);
 	kfree(rbio->stripe_sectors);
 	kfree(rbio->finish_pointers);
@@ -166,8 +166,6 @@ static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
 
 static void free_raid_bio(struct btrfs_raid_bio *rbio)
 {
-	int i;
-
 	if (!refcount_dec_and_test(&rbio->refs))
 		return;
 
@@ -175,10 +173,10 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
 	WARN_ON(!list_empty(&rbio->hash_list));
 	WARN_ON(!bio_list_empty(&rbio->bio_list));
 
-	for (i = 0; i < rbio->nr_pages; i++) {
-		if (rbio->stripe_pages[i]) {
-			__free_page(rbio->stripe_pages[i]);
-			rbio->stripe_pages[i] = NULL;
+	for (int i = 0; i < rbio->nr_folios; i++) {
+		if (rbio->stripe_folios[i]) {
+			folio_put(rbio->stripe_folios[i]);
+			rbio->stripe_folios[i] = NULL;
 		}
 	}
 
@@ -299,17 +297,16 @@ static int rbio_bucket(struct btrfs_raid_bio *rbio)
 	return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
 }
 
-static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
-				       unsigned int page_nr)
+static bool full_folio_sectors_uptodate(struct btrfs_raid_bio *rbio,
+					unsigned int folio_nr)
 {
-	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
-	const u32 sectors_per_page = PAGE_SIZE / sectorsize;
+	struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+	const u32 sectors_per_folio = btrfs_min_folio_size(fs_info) >> fs_info->sectorsize_bits;
 	int i;
 
-	ASSERT(page_nr < rbio->nr_pages);
+	ASSERT(folio_nr < rbio->nr_folios);
 
-	for (i = sectors_per_page * page_nr;
-	     i < sectors_per_page * page_nr + sectors_per_page;
+	for (i = sectors_per_folio * folio_nr; i < sectors_per_folio * (folio_nr + 1);
 	     i++) {
 		if (!rbio->stripe_sectors[i].uptodate)
 			return false;
@@ -324,53 +321,54 @@ static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
  */
 static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
 {
-	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
-	u32 offset;
-	int i;
+	struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+	const u32 blocksize = fs_info->sectorsize;
+	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
 
-	for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) {
-		int page_index = offset >> PAGE_SHIFT;
+	for (u32 offset = 0; offset < rbio->nr_folios << min_folio_shift; offset += blocksize) {
+		const unsigned int findex = offset >> min_folio_shift;
+		const unsigned int sindex = offset >> fs_info->sectorsize_bits;
+		struct folio *folio = rbio->stripe_folios[findex];
 
-		ASSERT(page_index < rbio->nr_pages);
-		if (!rbio->stripe_pages[page_index])
+		ASSERT(findex < rbio->nr_folios);
+		if (!folio)
 			continue;
-
-		rbio->stripe_sectors[i].has_paddr = true;
-		rbio->stripe_sectors[i].paddr =
-			page_to_phys(rbio->stripe_pages[page_index]) +
-			offset_in_page(offset);
+		rbio->stripe_sectors[sindex].has_paddr = true;
+		rbio->stripe_sectors[sindex].paddr = folio_to_phys(folio) +
+			offset_in_folio(folio, offset);
 	}
 }
 
-static void steal_rbio_page(struct btrfs_raid_bio *src,
-			    struct btrfs_raid_bio *dest, int page_nr)
+static void steal_rbio_folio(struct btrfs_raid_bio *src,
+			     struct btrfs_raid_bio *dest, int folio_nr)
 {
-	const u32 sectorsize = src->bioc->fs_info->sectorsize;
-	const u32 sectors_per_page = PAGE_SIZE / sectorsize;
-	int i;
+	struct btrfs_fs_info *fs_info = src->bioc->fs_info;
+	const u32 sectors_per_folio = btrfs_min_folio_size(fs_info) >> fs_info->sectorsize_bits;
 
-	if (dest->stripe_pages[page_nr])
-		__free_page(dest->stripe_pages[page_nr]);
-	dest->stripe_pages[page_nr] = src->stripe_pages[page_nr];
-	src->stripe_pages[page_nr] = NULL;
+	if (dest->stripe_folios[folio_nr])
+		folio_put(dest->stripe_folios[folio_nr]);
+	dest->stripe_folios[folio_nr] = src->stripe_folios[folio_nr];
+	src->stripe_folios[folio_nr] = NULL;
 
 	/* Also update the sector->uptodate bits. */
-	for (i = sectors_per_page * page_nr;
-	     i < sectors_per_page * page_nr + sectors_per_page; i++)
+	for (int i = sectors_per_folio * folio_nr;
+	     i < sectors_per_folio * (folio_nr + 1); i++)
 		dest->stripe_sectors[i].uptodate = true;
 }
 
-static bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr)
+static bool is_data_stripe_folio(struct btrfs_raid_bio *rbio, int folio_nr)
 {
-	const int sector_nr = (page_nr << PAGE_SHIFT) >>
+	struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+	const unsigned int min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
+	const int sector_nr = (folio_nr << min_folio_shift) >>
 			      rbio->bioc->fs_info->sectorsize_bits;
 
 	/*
-	 * We have ensured PAGE_SIZE is aligned with sectorsize, thus
-	 * we won't have a page which is half data half parity.
+	 * We have ensured folio size is aligned with sectorsize, thus
+	 * we won't have a folio which is half data half parity.
 	 *
-	 * Thus if the first sector of the page belongs to data stripes, then
-	 * the full page belongs to data stripes.
+	 * Thus if the first sector of the folio belongs to data stripes, then
+	 * the full folio belongs to data stripes.
 	 */
 	return (sector_nr < rbio->nr_data * rbio->stripe_nsectors);
 }
@@ -384,28 +382,26 @@ static bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr)
  */
 static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
 {
-	int i;
-
 	if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
 		return;
 
-	for (i = 0; i < dest->nr_pages; i++) {
-		struct page *p = src->stripe_pages[i];
+	for (int i = 0; i < dest->nr_folios; i++) {
+		struct folio *folio = src->stripe_folios[i];
 
 		/*
-		 * We don't need to steal P/Q pages as they will always be
+		 * We don't need to steal P/Q folio as they will always be
 		 * regenerated for RMW or full write anyway.
 		 */
-		if (!is_data_stripe_page(src, i))
+		if (!is_data_stripe_folio(src, i))
 			continue;
 
 		/*
 		 * If @src already has RBIO_CACHE_READY_BIT, it should have
 		 * all data stripe pages present and uptodate.
 		 */
-		ASSERT(p);
-		ASSERT(full_page_sectors_uptodate(src, i));
-		steal_rbio_page(src, dest, i);
+		ASSERT(folio);
+		ASSERT(full_folio_sectors_uptodate(src, i));
+		steal_rbio_folio(src, dest, i);
 	}
 	index_stripe_sectors(dest);
 	index_stripe_sectors(src);
@@ -991,16 +987,15 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
 static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
 					 struct btrfs_io_context *bioc)
 {
+	const unsigned int min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
 	const unsigned int real_stripes = bioc->num_stripes - bioc->replace_nr_stripes;
-	const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT;
-	const unsigned int num_pages = stripe_npages * real_stripes;
+	const unsigned int stripe_nfolios = BTRFS_STRIPE_LEN >> min_folio_shift;
+	const unsigned int num_folios = stripe_nfolios * real_stripes;
 	const unsigned int stripe_nsectors =
 		BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits;
 	const unsigned int num_sectors = stripe_nsectors * real_stripes;
 	struct btrfs_raid_bio *rbio;
 
-	/* PAGE_SIZE must also be aligned to sectorsize for subpage support */
-	ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize));
 	/*
 	 * Our current stripe len should be fixed to 64k thus stripe_nsectors
 	 * (at most 16) should be no larger than BITS_PER_LONG.
@@ -1017,8 +1012,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
 	rbio = kzalloc(sizeof(*rbio), GFP_NOFS);
 	if (!rbio)
 		return ERR_PTR(-ENOMEM);
-	rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *),
-				     GFP_NOFS);
+	rbio->stripe_folios = kcalloc(num_folios, sizeof(struct folio *), GFP_NOFS);
 	rbio->bio_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
 				    GFP_NOFS);
 	rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
@@ -1026,7 +1020,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
 	rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS);
 	rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
 
-	if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors ||
+	if (!rbio->stripe_folios || !rbio->bio_sectors || !rbio->stripe_sectors ||
 	    !rbio->finish_pointers || !rbio->error_bitmap) {
 		free_raid_bio_pointers(rbio);
 		kfree(rbio);
@@ -1041,10 +1035,10 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
 	INIT_LIST_HEAD(&rbio->hash_list);
 	btrfs_get_bioc(bioc);
 	rbio->bioc = bioc;
-	rbio->nr_pages = num_pages;
+	rbio->nr_folios = num_folios;
 	rbio->nr_sectors = num_sectors;
 	rbio->real_stripes = real_stripes;
-	rbio->stripe_npages = stripe_npages;
+	rbio->stripe_nfolios = stripe_nfolios;
 	rbio->stripe_nsectors = stripe_nsectors;
 	refcount_set(&rbio->refs, 1);
 	atomic_set(&rbio->stripes_pending, 0);
@@ -1061,7 +1055,8 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
 {
 	int ret;
 
-	ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, false);
+	ret = btrfs_alloc_folio_array(rbio->nr_folios, rbio->bioc->fs_info->block_min_order,
+				      rbio->stripe_folios);
 	if (ret < 0)
 		return ret;
 	/* Mapping all sectors */
@@ -1072,14 +1067,14 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
 /* only allocate pages for p/q stripes */
 static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
 {
-	const int data_pages = rbio->nr_data * rbio->stripe_npages;
+	const unsigned int data_folios = rbio->nr_data * rbio->stripe_nfolios;
 	int ret;
 
-	ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
-				     rbio->stripe_pages + data_pages, false);
+	ret = btrfs_alloc_folio_array(rbio->nr_folios - data_folios,
+				      rbio->bioc->fs_info->block_min_order,
+				      rbio->stripe_folios + data_folios);
 	if (ret < 0)
 		return ret;
-
 	index_stripe_sectors(rbio);
 	return 0;
 }
@@ -1488,7 +1483,7 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
 
 /*
  * For subpage case, we can no longer set page Up-to-date directly for
- * stripe_pages[], thus we need to locate the sector.
+ * stripe_folios[], thus we need to locate the sector.
  */
 static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
 					     phys_addr_t paddr)
@@ -1633,10 +1628,11 @@ static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio,
 
 static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
 {
-	const int data_pages = rbio->nr_data * rbio->stripe_npages;
+	const unsigned int data_folios = rbio->nr_data * rbio->stripe_nfolios;
 	int ret;
 
-	ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, false);
+	ret = btrfs_alloc_folio_array(data_folios, rbio->bioc->fs_info->block_min_order,
+				      rbio->stripe_folios);
 	if (ret < 0)
 		return ret;
 
@@ -2475,23 +2471,25 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
  */
 static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
 {
-	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
+	struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+	const u32 sectorsize = fs_info->sectorsize;
+	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
 	int total_sector_nr;
 
 	for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
 	     total_sector_nr++) {
-		struct page *page;
+		struct folio *folio;
 		int sectornr = total_sector_nr % rbio->stripe_nsectors;
-		int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT;
+		unsigned int findex = (total_sector_nr * sectorsize) >> min_folio_shift;
 
 		if (!test_bit(sectornr, &rbio->dbitmap))
 			continue;
-		if (rbio->stripe_pages[index])
+		if (rbio->stripe_folios[findex])
 			continue;
-		page = alloc_page(GFP_NOFS);
-		if (!page)
+		folio = folio_alloc(GFP_NOFS, fs_info->block_min_order);
+		if (!folio)
 			return -ENOMEM;
-		rbio->stripe_pages[index] = page;
+		rbio->stripe_folios[findex] = folio;
 	}
 	index_stripe_sectors(rbio);
 	return 0;
@@ -2850,13 +2848,19 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
 void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
 				    struct page **data_pages, u64 data_logical)
 {
+	struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
 	const u64 offset_in_full_stripe = data_logical -
 					  rbio->bioc->full_stripe_logical;
-	const int page_index = offset_in_full_stripe >> PAGE_SHIFT;
-	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
-	const u32 sectors_per_page = PAGE_SIZE / sectorsize;
+	const u32 min_folio_size = btrfs_min_folio_size(fs_info);
+	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
 	int ret;
 
+	/*
+	 * The caller is not yet converted to follow min_folio_shift. So our
+	 * minimal folio order must be 0 for now.
+	 */
+	ASSERT(fs_info->block_min_order == 0);
+
 	/*
 	 * If we hit ENOMEM temporarily, but later at
 	 * raid56_parity_submit_scrub_rbio() time it succeeded, we just do
@@ -2873,13 +2877,20 @@ void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
 	ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN));
 	ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT));
 
-	for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) {
-		struct page *dst = rbio->stripe_pages[page_nr + page_index];
-		struct page *src = data_pages[page_nr];
+	for (int cur = offset_in_full_stripe; cur < offset_in_full_stripe + BTRFS_STRIPE_LEN;
+	     cur += min_folio_size) {
+		struct folio *dest = rbio->stripe_folios[cur >> min_folio_shift];
+		struct folio *src = page_folio(data_pages[(cur - offset_in_full_stripe) >>
+							  min_folio_shift]);
 
-		memcpy_page(dst, 0, src, 0, PAGE_SIZE);
-		for (int sector_nr = sectors_per_page * page_index;
-		     sector_nr < sectors_per_page * (page_index + 1);
+		ASSERT(dest);
+		ASSERT(src);
+		/* Folios from source and destination should have the same order. */
+		ASSERT(folio_order(dest) == folio_order(src));
+		folio_copy(dest, src);
+
+		for (int sector_nr = cur >> fs_info->sectorsize_bits;
+		     sector_nr < (cur + BTRFS_STRIPE_LEN) >> fs_info->sectorsize_bits;
 		     sector_nr++)
 			rbio->stripe_sectors[sector_nr].uptodate = true;
 	}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 0d7b4c2fb6ae..ddf6a7687eb6 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -64,8 +64,11 @@ struct btrfs_raid_bio {
 	 */
 	enum btrfs_rbio_ops operation;
 
-	/* How many pages there are for the full stripe including P/Q */
-	u16 nr_pages;
+	/*
+	 * How many folios there are for the full stripe including P/Q.
+	 * The folio size should be based on the fs_info::block_min_order.
+	 */
+	u16 nr_folios;
 
 	/* How many sectors there are for the full stripe including P/Q */
 	u16 nr_sectors;
@@ -76,8 +79,8 @@ struct btrfs_raid_bio {
 	/* Number of all stripes (including P/Q) */
 	u8 real_stripes;
 
-	/* How many pages there are for each stripe */
-	u8 stripe_npages;
+	/* How many folios there are for each stripe. */
+	u8 stripe_nfolios;
 
 	/* How many sectors there are for each stripe */
 	u8 stripe_nsectors;
@@ -110,17 +113,20 @@ struct btrfs_raid_bio {
 	 */
 
 	/*
-	 * Pointers to pages that we allocated for reading/writing stripes
+	 * Pointers to folios that we allocated for reading/writing stripes
 	 * directly from the disk (including P/Q).
+	 *
+	 * All folios are following fs_info::block_min_order, so that no block
+	 * will cross folio boundary.
 	 */
-	struct page **stripe_pages;
+	struct folio **stripe_folios;
 
 	/* Pointers to the sectors in the bio_list, for faster lookup */
 	struct sector_ptr *bio_sectors;
 
 	/*
-	 * For subpage support, we need to map each sector to above
-	 * stripe_pages.
+	 * For bs < ps support, we need to map each sector to above
+	 * stripe_folios.
 	 */
 	struct sector_ptr *stripe_sectors;
 
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] btrfs: prepare scrub to support bs > ps cases
  2025-09-12  3:59 [PATCH 0/2] btrfs: prepare raid56 and scrub to support bs > ps cases Qu Wenruo
  2025-09-12  3:59 ` [PATCH 1/2] btrfs: prepare raid56 " Qu Wenruo
@ 2025-09-12  3:59 ` Qu Wenruo
  2025-09-17  7:31 ` [PATCH 0/2] btrfs: prepare raid56 and " Qu Wenruo
  2 siblings, 0 replies; 4+ messages in thread
From: Qu Wenruo @ 2025-09-12  3:59 UTC (permalink / raw)
  To: linux-btrfs

This involves:

- Migrate scrub_stripe::pages[] to folios[]

- Use btrfs_alloc_folio_array() and folio_put() to alloc above array.

- Migrate scrub_stripe_get_kaddr() and scrub_stripe_get_paddr() to use
  folio interfaces

- Migrate raid56_parity_cache_data_pages() to
  raid56_parity_cache_data_folios()
  Since scrub is the only caller still using pages.

Since most scrub code is based on kaddr/paddr, the migration itself is
pretty straightforward.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/raid56.c | 15 ++++----------
 fs/btrfs/raid56.h |  4 ++--
 fs/btrfs/scrub.c  | 51 +++++++++++++++++++++++++++--------------------
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index ac94335dd262..4074ff4ddb1d 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2842,11 +2842,11 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
  * This is for scrub call sites where we already have correct data contents.
  * This allows us to avoid reading data stripes again.
  *
- * Unfortunately here we have to do page copy, other than reusing the pages.
+ * Unfortunately here we have to do folio copy, other than reusing the pages.
  * This is due to the fact rbio has its own page management for its cache.
  */
-void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
-				    struct page **data_pages, u64 data_logical)
+void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
+				     struct folio **data_folios, u64 data_logical)
 {
 	struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
 	const u64 offset_in_full_stripe = data_logical -
@@ -2855,12 +2855,6 @@ void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
 	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
 	int ret;
 
-	/*
-	 * The caller is not yet converted to follow min_folio_shift. So our
-	 * minimal folio order must be 0 for now.
-	 */
-	ASSERT(fs_info->block_min_order == 0);
-
 	/*
 	 * If we hit ENOMEM temporarily, but later at
 	 * raid56_parity_submit_scrub_rbio() time it succeeded, we just do
@@ -2880,8 +2874,7 @@ void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
 	for (int cur = offset_in_full_stripe; cur < offset_in_full_stripe + BTRFS_STRIPE_LEN;
 	     cur += min_folio_size) {
 		struct folio *dest = rbio->stripe_folios[cur >> min_folio_shift];
-		struct folio *src = page_folio(data_pages[(cur - offset_in_full_stripe) >>
-							  min_folio_shift]);
+		struct folio *src = data_folios[(cur - offset_in_full_stripe) >> min_folio_shift];
 
 		ASSERT(dest);
 		ASSERT(src);
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index ddf6a7687eb6..5662a3a2f04b 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -207,8 +207,8 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
 				unsigned long *dbitmap, int stripe_nsectors);
 void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
 
-void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
-				    struct page **data_pages, u64 data_logical);
+void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
+				     struct folio **data_folios, u64 data_logical);
 
 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 979d33d8c193..cddf798c32e8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -130,7 +130,7 @@ enum {
 	scrub_bitmap_nr_last,
 };
 
-#define SCRUB_STRIPE_PAGES		(BTRFS_STRIPE_LEN / PAGE_SIZE)
+#define SCRUB_STRIPE_MAX_FOLIOS		(BTRFS_STRIPE_LEN / PAGE_SIZE)
 
 /*
  * Represent one contiguous range with a length of BTRFS_STRIPE_LEN.
@@ -139,7 +139,7 @@ struct scrub_stripe {
 	struct scrub_ctx *sctx;
 	struct btrfs_block_group *bg;
 
-	struct page *pages[SCRUB_STRIPE_PAGES];
+	struct folio *folios[SCRUB_STRIPE_MAX_FOLIOS];
 	struct scrub_sector_verification *sectors;
 
 	struct btrfs_device *dev;
@@ -339,10 +339,10 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
 	if (!stripe)
 		return;
 
-	for (int i = 0; i < SCRUB_STRIPE_PAGES; i++) {
-		if (stripe->pages[i])
-			__free_page(stripe->pages[i]);
-		stripe->pages[i] = NULL;
+	for (int i = 0; i < SCRUB_STRIPE_MAX_FOLIOS; i++) {
+		if (stripe->folios[i])
+			folio_put(stripe->folios[i]);
+		stripe->folios[i] = NULL;
 	}
 	kfree(stripe->sectors);
 	kfree(stripe->csums);
@@ -355,6 +355,7 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
 static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
 			     struct scrub_stripe *stripe)
 {
+	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
 	int ret;
 
 	memset(stripe, 0, sizeof(*stripe));
@@ -367,7 +368,9 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
 	atomic_set(&stripe->pending_io, 0);
 	spin_lock_init(&stripe->write_error_lock);
 
-	ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, false);
+	ASSERT(BTRFS_STRIPE_LEN >> min_folio_shift <= SCRUB_STRIPE_MAX_FOLIOS);
+	ret = btrfs_alloc_folio_array(BTRFS_STRIPE_LEN >> min_folio_shift,
+				      fs_info->block_min_order, stripe->folios);
 	if (ret < 0)
 		goto error;
 
@@ -687,27 +690,30 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
 
 static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr)
 {
-	u32 offset = (sector_nr << stripe->bg->fs_info->sectorsize_bits);
-	const struct page *page = stripe->pages[offset >> PAGE_SHIFT];
+	struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
+	u32 offset = (sector_nr << fs_info->sectorsize_bits);
+	const struct folio *folio = stripe->folios[offset >> min_folio_shift];
 
-	/* stripe->pages[] is allocated by us and no highmem is allowed. */
-	ASSERT(page);
-	ASSERT(!PageHighMem(page));
-	return page_address(page) + offset_in_page(offset);
+	/* stripe->folios[] is allocated by us and no highmem is allowed. */
+	ASSERT(folio);
+	ASSERT(!folio_test_partial_kmap(folio));
+	return folio_address(folio) + offset_in_folio(folio, offset);
 }
 
 static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int sector_nr)
 {
 	struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
 	u32 offset = (sector_nr << fs_info->sectorsize_bits);
-	const struct page *page = stripe->pages[offset >> PAGE_SHIFT];
+	const struct folio *folio = stripe->folios[offset >> min_folio_shift];
 
-	/* stripe->pages[] is allocated by us and no highmem is allowed. */
-	ASSERT(page);
-	ASSERT(!PageHighMem(page));
-	/* And the range must be contained inside the page. */
-	ASSERT(offset_in_page(offset) + fs_info->sectorsize <= PAGE_SIZE);
-	return page_to_phys(page) + offset_in_page(offset);
+	/* stripe->folios[] is allocated by us and no highmem is allowed. */
+	ASSERT(folio);
+	ASSERT(!folio_test_partial_kmap(folio));
+	/* And the range must be contained inside the folio. */
+	ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio));
+	return folio_to_phys(folio) + offset_in_folio(folio, offset);
 }
 
 static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr)
@@ -1872,6 +1878,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
 {
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
 	struct btrfs_bio *bbio;
+	const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
 	unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits;
 	int mirror = stripe->mirror_num;
 
@@ -1884,7 +1891,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
 		return;
 	}
 
-	bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
+	bbio = btrfs_bio_alloc(BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ, fs_info,
 			       scrub_read_endio, stripe);
 
 	bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
@@ -2215,7 +2222,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 	for (int i = 0; i < data_stripes; i++) {
 		stripe = &sctx->raid56_data_stripes[i];
 
-		raid56_parity_cache_data_pages(rbio, stripe->pages,
+		raid56_parity_cache_data_folios(rbio, stripe->folios,
 				full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
 	}
 	raid56_parity_submit_scrub_rbio(rbio);
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/2] btrfs: prepare raid56 and scrub to support bs > ps cases
  2025-09-12  3:59 [PATCH 0/2] btrfs: prepare raid56 and scrub to support bs > ps cases Qu Wenruo
  2025-09-12  3:59 ` [PATCH 1/2] btrfs: prepare raid56 " Qu Wenruo
  2025-09-12  3:59 ` [PATCH 2/2] btrfs: prepare scrub " Qu Wenruo
@ 2025-09-17  7:31 ` Qu Wenruo
  2 siblings, 0 replies; 4+ messages in thread
From: Qu Wenruo @ 2025-09-17  7:31 UTC (permalink / raw)
  To: linux-btrfs



在 2025/9/12 13:29, Qu Wenruo 写道:
> After the previous compression bs > ps preparation, this series focus on
> raid56 and scrub.
> 
> Both raid56 and scrub are using a page array storing their stripes.
> 
> Thankfully both are already using physical memory addresses for checksum
> calculation, thus there are no internal code to do the checksum
> handling.
> 
> Just convert the involved arraies (and some RAID56 internal page related
> members) to folio arraies will handle most thing properly.
> 
> Now the remaining code is mostly encoded write, which shares some
> infrastructure with send, which makes the conversion more complex than I
> thought.

Please drop the series.

The scrub part is fine, but the RAID56 shouldn't migrate to the 
mandatory large folios.

Currently we're relying on minimal folio size for filemap, but in the 
future direct IO will pass folios that are not meeting our minimal order.

If we force RAID56 to use large folios, it will need to revert back to 
the current page based solution to support direct IO again.

For now, I'll just disable RAID56 for bs > ps support.

Thanks,
Qu

> 
> Qu Wenruo (2):
>    btrfs: prepare raid56 to support bs > ps cases
>    btrfs: prepare scrub to support bs > ps cases
> 
>   fs/btrfs/misc.h   |   5 ++
>   fs/btrfs/raid56.c | 180 +++++++++++++++++++++++-----------------------
>   fs/btrfs/raid56.h |  26 ++++---
>   fs/btrfs/scrub.c  |  51 +++++++------
>   4 files changed, 142 insertions(+), 120 deletions(-)
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-09-17  7:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-12  3:59 [PATCH 0/2] btrfs: prepare raid56 and scrub to support bs > ps cases Qu Wenruo
2025-09-12  3:59 ` [PATCH 1/2] btrfs: prepare raid56 " Qu Wenruo
2025-09-12  3:59 ` [PATCH 2/2] btrfs: prepare scrub " Qu Wenruo
2025-09-17  7:31 ` [PATCH 0/2] btrfs: prepare raid56 and " Qu Wenruo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox