From: linan666@huaweicloud.com
To: song@kernel.org, yukuai@fnnas.com
Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org,
linan666@huaweicloud.com, yangerkun@huawei.com,
yi.zhang@huawei.com
Subject: [PATCH v3 6/8] md/raid1,raid10: use folio for sync path IO
Date: Thu, 16 Apr 2026 11:37:59 +0800 [thread overview]
Message-ID: <20260416033801.786415-7-linan666@huaweicloud.com> (raw)
In-Reply-To: <20260416033801.786415-1-linan666@huaweicloud.com>
From: Li Nan <linan122@huawei.com>
Convert all IO on the sync path to use folios, and rename page-related
identifiers to match folio.
Since RESYNC_BLOCK_SIZE (64K) has higher allocation failure chance than 4k,
retry with lower orders to improve allocation reliability. A r1/10_bio may
have different rf->folio orders, so use minimum order as r1/10_bio sectors
to prevent exceeding size when adding folio to IO later.
Clean up:
1. Remove resync_get_all_folio() and invoke folio_get() directly instead.
2. Clean up redundant while(0) loop in md_bio_reset_resync_folio().
3. Clean up bio variable by directly referencing r10_bio->devs[j].bio
instead in r1buf_pool_alloc() and r10buf_pool_alloc().
4. Clean up RESYNC_PAGES.
5. Remove resync_fetch_folio(), access 'rf->folio' directly.
6. Remove resync_free_folio(), call folio_put() directly.
7. clean up sync IO size calculation in raid1/10_sync_request.
Signed-off-by: Li Nan <linan122@huawei.com>
---
drivers/md/md.c | 2 +-
drivers/md/raid1-10.c | 80 ++++---------
drivers/md/raid1.c | 209 +++++++++++++++-------------------
drivers/md/raid10.c | 254 +++++++++++++++++++++---------------------
4 files changed, 240 insertions(+), 305 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5e83914d5c14..6554b849ac74 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9440,7 +9440,7 @@ static bool sync_io_within_limit(struct mddev *mddev)
{
/*
* For raid456, sync IO is stripe(4k) per IO, for other levels, it's
- * RESYNC_PAGES(64k) per IO.
+ * RESYNC_BLOCK_SIZE(64k) per IO.
*/
return atomic_read(&mddev->recovery_active) <
(raid_is_456(mddev) ? 8 : 128) * sync_io_depth(mddev);
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index cda531d0720b..10200b0a3fd2 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Maximum size of each resync request */
#define RESYNC_BLOCK_SIZE (64*1024)
-#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
/* when we get a read error on a read-only array, we redirect to another
@@ -20,9 +19,9 @@
#define MAX_PLUG_BIO 32
/* for managing resync I/O pages */
-struct resync_pages {
+struct resync_folio {
void *raid_bio;
- struct page *pages[RESYNC_PAGES];
+ struct folio *folio;
};
struct raid1_plug_cb {
@@ -36,77 +35,44 @@ static void rbio_pool_free(void *rbio, void *data)
kfree(rbio);
}
-static inline int resync_alloc_pages(struct resync_pages *rp,
- gfp_t gfp_flags)
+static inline int resync_alloc_folio(struct resync_folio *rf,
+ gfp_t gfp_flags, int *order)
{
- int i;
+ struct folio *folio;
- for (i = 0; i < RESYNC_PAGES; i++) {
- rp->pages[i] = alloc_page(gfp_flags);
- if (!rp->pages[i])
- goto out_free;
- }
+ do {
+ folio = folio_alloc(gfp_flags, *order);
+ if (folio)
+ break;
+ } while (--(*order) > 0);
+ if (!folio)
+ return -ENOMEM;
+
+ rf->folio = folio;
return 0;
-
-out_free:
- while (--i >= 0)
- put_page(rp->pages[i]);
- return -ENOMEM;
-}
-
-static inline void resync_free_pages(struct resync_pages *rp)
-{
- int i;
-
- for (i = 0; i < RESYNC_PAGES; i++)
- put_page(rp->pages[i]);
-}
-
-static inline void resync_get_all_pages(struct resync_pages *rp)
-{
- int i;
-
- for (i = 0; i < RESYNC_PAGES; i++)
- get_page(rp->pages[i]);
-}
-
-static inline struct page *resync_fetch_page(struct resync_pages *rp,
- unsigned idx)
-{
- if (WARN_ON_ONCE(idx >= RESYNC_PAGES))
- return NULL;
- return rp->pages[idx];
}
/*
- * 'strct resync_pages' stores actual pages used for doing the resync
+ * 'strct resync_folio' stores actual pages used for doing the resync
* IO, and it is per-bio, so make .bi_private points to it.
*/
-static inline struct resync_pages *get_resync_pages(struct bio *bio)
+static inline struct resync_folio *get_resync_folio(struct bio *bio)
{
return bio->bi_private;
}
/* generally called after bio_reset() for reseting bvec */
-static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
+static void md_bio_reset_resync_folio(struct bio *bio, struct resync_folio *rf,
int size)
{
- int idx = 0;
-
/* initialize bvec table again */
- do {
- struct page *page = resync_fetch_page(rp, idx);
- int len = min_t(int, size, PAGE_SIZE);
-
- if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
- bio->bi_status = BLK_STS_RESOURCE;
- bio_endio(bio);
- return;
- }
-
- size -= len;
- } while (idx++ < RESYNC_PAGES && size > 0);
+ if (WARN_ON(!bio_add_folio(bio, rf->folio,
+ min_t(int, size, RESYNC_BLOCK_SIZE),
+ 0))) {
+ bio->bi_status = BLK_STS_RESOURCE;
+ bio_endio(bio);
+ }
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a72abdc37a2d..724fd4f2cc3a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -120,11 +120,11 @@ static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
/*
* for resync bio, r1bio pointer can be retrieved from the per-bio
- * 'struct resync_pages'.
+ * 'struct resync_folio'.
*/
static inline struct r1bio *get_resync_r1bio(struct bio *bio)
{
- return get_resync_pages(bio)->raid_bio;
+ return get_resync_folio(bio)->raid_bio;
}
static void *r1bio_pool_alloc(gfp_t gfp_flags, struct r1conf *conf)
@@ -146,70 +146,69 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
struct r1conf *conf = data;
struct r1bio *r1_bio;
struct bio *bio;
- int need_pages;
+ int need_folio;
int j;
- struct resync_pages *rps;
+ struct resync_folio *rfs;
+ int order = get_order(RESYNC_BLOCK_SIZE);
r1_bio = r1bio_pool_alloc(gfp_flags, conf);
if (!r1_bio)
return NULL;
- rps = kmalloc_array(conf->raid_disks * 2, sizeof(struct resync_pages),
+ rfs = kmalloc_array(conf->raid_disks * 2, sizeof(struct resync_folio),
gfp_flags);
- if (!rps)
+ if (!rfs)
goto out_free_r1bio;
/*
* Allocate bios : 1 for reading, n-1 for writing
*/
for (j = conf->raid_disks * 2; j-- ; ) {
- bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
+ bio = bio_kmalloc(1, gfp_flags);
if (!bio)
goto out_free_bio;
- bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
+ bio_init_inline(bio, NULL, 1, 0);
r1_bio->bios[j] = bio;
}
/*
- * Allocate RESYNC_PAGES data pages and attach them to
- * the first bio.
+ * Allocate data folio and attach it to the first bio.
* If this is a user-requested check/repair, allocate
- * RESYNC_PAGES for each bio.
+ * folio for each bio.
*/
if (test_bit(MD_RECOVERY_REQUESTED, &conf->mddev->recovery))
- need_pages = conf->raid_disks * 2;
+ need_folio = conf->raid_disks * 2;
else
- need_pages = 1;
+ need_folio = 1;
for (j = 0; j < conf->raid_disks * 2; j++) {
- struct resync_pages *rp = &rps[j];
+ struct resync_folio *rf = &rfs[j];
- bio = r1_bio->bios[j];
-
- if (j < need_pages) {
- if (resync_alloc_pages(rp, gfp_flags))
- goto out_free_pages;
+ if (j < need_folio) {
+ if (resync_alloc_folio(rf, gfp_flags, &order))
+ goto out_free_folio;
} else {
- memcpy(rp, &rps[0], sizeof(*rp));
- resync_get_all_pages(rp);
+ memcpy(rf, &rfs[0], sizeof(*rf));
+ folio_get(rf->folio);
}
- rp->raid_bio = r1_bio;
- bio->bi_private = rp;
+ rf->raid_bio = r1_bio;
+ r1_bio->bios[j]->bi_private = rf;
}
+ r1_bio->sectors = 1 << (order + PAGE_SECTORS_SHIFT);
r1_bio->master_bio = NULL;
return r1_bio;
-out_free_pages:
+out_free_folio:
while (--j >= 0)
- resync_free_pages(&rps[j]);
+ folio_put(rfs[j].folio);
out_free_bio:
while (++j < conf->raid_disks * 2) {
bio_uninit(r1_bio->bios[j]);
kfree(r1_bio->bios[j]);
}
- kfree(rps);
+ kfree(rfs);
out_free_r1bio:
rbio_pool_free(r1_bio, data);
@@ -221,17 +220,17 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
struct r1conf *conf = data;
int i;
struct r1bio *r1bio = __r1_bio;
- struct resync_pages *rp = NULL;
+ struct resync_folio *rf = NULL;
for (i = conf->raid_disks * 2; i--; ) {
- rp = get_resync_pages(r1bio->bios[i]);
- resync_free_pages(rp);
+ rf = get_resync_folio(r1bio->bios[i]);
+ folio_put(rf->folio);
bio_uninit(r1bio->bios[i]);
kfree(r1bio->bios[i]);
}
- /* resync pages array stored in the 1st bio's .bi_private */
- kfree(rp);
+ /* resync folio stored in the 1st bio's .bi_private */
+ kfree(rf);
rbio_pool_free(r1bio, data);
}
@@ -2095,10 +2094,10 @@ static void end_sync_write(struct bio *bio)
put_sync_write_buf(r1_bio);
}
-static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
- int sectors, struct page *page, blk_opf_t rw)
+static int r1_sync_folio_io(struct md_rdev *rdev, sector_t sector, int sectors,
+ int off, struct folio *folio, blk_opf_t rw)
{
- if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ if (sync_folio_io(rdev, sector, sectors << 9, off, folio, rw, false))
/* success */
return 1;
if (rw == REQ_OP_WRITE) {
@@ -2129,10 +2128,10 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
struct bio *bio = r1_bio->bios[r1_bio->read_disk];
- struct page **pages = get_resync_pages(bio)->pages;
+ struct folio *folio = get_resync_folio(bio)->folio;
sector_t sect = r1_bio->sector;
int sectors = r1_bio->sectors;
- int idx = 0;
+ int off = 0;
struct md_rdev *rdev;
rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -2162,9 +2161,8 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
* active, and resync is currently active
*/
rdev = conf->mirrors[d].rdev;
- if (sync_page_io(rdev, sect, s<<9,
- pages[idx],
- REQ_OP_READ, false)) {
+ if (sync_folio_io(rdev, sect, s<<9, off, folio,
+ REQ_OP_READ, false)) {
success = 1;
break;
}
@@ -2197,7 +2195,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
/* Try next page */
sectors -= s;
sect += s;
- idx++;
+ off += s << 9;
continue;
}
@@ -2210,8 +2208,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
rdev = conf->mirrors[d].rdev;
- if (r1_sync_page_io(rdev, sect, s,
- pages[idx],
+ if (r1_sync_folio_io(rdev, sect, s, off, folio,
REQ_OP_WRITE) == 0) {
r1_bio->bios[d]->bi_end_io = NULL;
rdev_dec_pending(rdev, mddev);
@@ -2225,14 +2222,13 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
rdev = conf->mirrors[d].rdev;
- if (r1_sync_page_io(rdev, sect, s,
- pages[idx],
+ if (r1_sync_folio_io(rdev, sect, s, off, folio,
REQ_OP_READ) != 0)
atomic_add(s, &rdev->corrected_errors);
}
sectors -= s;
sect += s;
- idx ++;
+ off += s << 9;
}
set_bit(R1BIO_Uptodate, &r1_bio->state);
bio->bi_status = 0;
@@ -2252,14 +2248,12 @@ static void process_checks(struct r1bio *r1_bio)
struct r1conf *conf = mddev->private;
int primary;
int i;
- int vcnt;
/* Fix variable parts of all bios */
- vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9);
for (i = 0; i < conf->raid_disks * 2; i++) {
blk_status_t status;
struct bio *b = r1_bio->bios[i];
- struct resync_pages *rp = get_resync_pages(b);
+ struct resync_folio *rf = get_resync_folio(b);
if (b->bi_end_io != end_sync_read)
continue;
/* fixup the bio for reuse, but preserve errno */
@@ -2269,11 +2263,11 @@ static void process_checks(struct r1bio *r1_bio)
b->bi_iter.bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
b->bi_end_io = end_sync_read;
- rp->raid_bio = r1_bio;
- b->bi_private = rp;
+ rf->raid_bio = r1_bio;
+ b->bi_private = rf;
/* initialize bvec table again */
- md_bio_reset_resync_pages(b, rp, r1_bio->sectors << 9);
+ md_bio_reset_resync_folio(b, rf, r1_bio->sectors << 9);
}
for (primary = 0; primary < conf->raid_disks * 2; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
@@ -2284,44 +2278,39 @@ static void process_checks(struct r1bio *r1_bio)
}
r1_bio->read_disk = primary;
for (i = 0; i < conf->raid_disks * 2; i++) {
- int j = 0;
struct bio *pbio = r1_bio->bios[primary];
struct bio *sbio = r1_bio->bios[i];
blk_status_t status = sbio->bi_status;
- struct page **ppages = get_resync_pages(pbio)->pages;
- struct page **spages = get_resync_pages(sbio)->pages;
- struct bio_vec *bi;
- int page_len[RESYNC_PAGES] = { 0 };
- struct bvec_iter_all iter_all;
+ struct folio *pfolio = get_resync_folio(pbio)->folio;
+ struct folio *sfolio = get_resync_folio(sbio)->folio;
if (sbio->bi_end_io != end_sync_read)
continue;
/* Now we can 'fixup' the error value */
sbio->bi_status = 0;
- bio_for_each_segment_all(bi, sbio, iter_all)
- page_len[j++] = bi->bv_len;
-
- if (!status) {
- for (j = vcnt; j-- ; ) {
- if (memcmp(page_address(ppages[j]),
- page_address(spages[j]),
- page_len[j]))
- break;
- }
- } else
- j = 0;
- if (j >= 0)
+ /*
+ * Copy data and submit write in two cases:
+ * - IO error (non-zero status)
+ * - Data inconsistency and not a CHECK operation.
+ */
+ if (status) {
atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
- if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
- && !status)) {
- /* No need to write to this device. */
- sbio->bi_end_io = NULL;
- rdev_dec_pending(conf->mirrors[i].rdev, mddev);
+ bio_copy_data(sbio, pbio);
continue;
+ } else if (memcmp(folio_address(pfolio),
+ folio_address(sfolio),
+ r1_bio->sectors << 9)) {
+ atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
+ if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
+ bio_copy_data(sbio, pbio);
+ continue;
+ }
}
- bio_copy_data(sbio, pbio);
+ /* No need to write to this device. */
+ sbio->bi_end_io = NULL;
+ rdev_dec_pending(conf->mirrors[i].rdev, mddev);
}
}
@@ -2446,9 +2435,8 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
if (rdev &&
!test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
- r1_sync_page_io(rdev, sect, s,
- folio_page(conf->tmpfolio, 0),
- REQ_OP_WRITE);
+ r1_sync_folio_io(rdev, sect, s, 0,
+ conf->tmpfolio, REQ_OP_WRITE);
rdev_dec_pending(rdev, mddev);
}
}
@@ -2461,9 +2449,8 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
if (rdev &&
!test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
- if (r1_sync_page_io(rdev, sect, s,
- folio_page(conf->tmpfolio, 0),
- REQ_OP_READ)) {
+ if (r1_sync_folio_io(rdev, sect, s, 0,
+ conf->tmpfolio, REQ_OP_READ)) {
atomic_add(s, &rdev->corrected_errors);
pr_info("md/raid1:%s: read error corrected (%d sectors at %llu on %pg)\n",
mdname(mddev), s,
@@ -2738,15 +2725,15 @@ static int init_resync(struct r1conf *conf)
static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
{
struct r1bio *r1bio = mempool_alloc(&conf->r1buf_pool, GFP_NOIO);
- struct resync_pages *rps;
+ struct resync_folio *rfs;
struct bio *bio;
int i;
for (i = conf->raid_disks * 2; i--; ) {
bio = r1bio->bios[i];
- rps = bio->bi_private;
+ rfs = bio->bi_private;
bio_reset(bio, NULL, 0);
- bio->bi_private = rps;
+ bio->bi_private = rfs;
}
r1bio->master_bio = NULL;
return r1bio;
@@ -2775,10 +2762,9 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
int write_targets = 0, read_targets = 0;
sector_t sync_blocks;
bool still_degraded = false;
- int good_sectors = RESYNC_SECTORS;
+ int good_sectors;
int min_bad = 0; /* number of sectors that are bad in all devices */
int idx = sector_to_idx(sector_nr);
- int page_idx = 0;
if (!mempool_initialized(&conf->r1buf_pool))
if (init_resync(conf))
@@ -2858,8 +2844,11 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
r1_bio->sector = sector_nr;
r1_bio->state = 0;
set_bit(R1BIO_IsSync, &r1_bio->state);
- /* make sure good_sectors won't go across barrier unit boundary */
- good_sectors = align_to_barrier_unit_end(sector_nr, good_sectors);
+ /*
+ * make sure good_sectors won't go across barrier unit boundary.
+ * r1_bio->sectors <= RESYNC_SECTORS.
+ */
+ good_sectors = align_to_barrier_unit_end(sector_nr, r1_bio->sectors);
for (i = 0; i < conf->raid_disks * 2; i++) {
struct md_rdev *rdev;
@@ -2979,44 +2968,28 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
max_sector = mddev->resync_max; /* Don't do IO beyond here */
if (max_sector > sector_nr + good_sectors)
max_sector = sector_nr + good_sectors;
- nr_sectors = 0;
- sync_blocks = 0;
do {
- struct page *page;
- int len = PAGE_SIZE;
- if (sector_nr + (len>>9) > max_sector)
- len = (max_sector - sector_nr) << 9;
- if (len == 0)
+ nr_sectors = max_sector - sector_nr;
+ if (nr_sectors == 0)
break;
- if (sync_blocks == 0) {
- if (!md_bitmap_start_sync(mddev, sector_nr,
- &sync_blocks, still_degraded) &&
- !conf->fullsync &&
- !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
- break;
- if ((len >> 9) > sync_blocks)
- len = sync_blocks<<9;
- }
+ if (!md_bitmap_start_sync(mddev, sector_nr,
+ &sync_blocks, still_degraded) &&
+ !conf->fullsync &&
+ !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ break;
+ if (nr_sectors > sync_blocks)
+ nr_sectors = sync_blocks;
for (i = 0 ; i < conf->raid_disks * 2; i++) {
- struct resync_pages *rp;
-
bio = r1_bio->bios[i];
- rp = get_resync_pages(bio);
if (bio->bi_end_io) {
- page = resync_fetch_page(rp, page_idx);
+ struct resync_folio *rf = get_resync_folio(bio);
- /*
- * won't fail because the vec table is big
- * enough to hold all these pages
- */
- __bio_add_page(bio, page, len, 0);
+ bio_add_folio_nofail(bio, rf->folio, nr_sectors << 9, 0);
}
}
- nr_sectors += len>>9;
- sector_nr += len>>9;
- sync_blocks -= (len>>9);
- } while (++page_idx < RESYNC_PAGES);
+ sector_nr += nr_sectors;
+ } while (0);
r1_bio->sectors = nr_sectors;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 26f93040cd13..3638e00fe420 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -96,11 +96,11 @@ static void end_reshape(struct r10conf *conf);
/*
* for resync bio, r10bio pointer can be retrieved from the per-bio
- * 'struct resync_pages'.
+ * 'struct resync_folio'.
*/
static inline struct r10bio *get_resync_r10bio(struct bio *bio)
{
- return get_resync_pages(bio)->raid_bio;
+ return get_resync_folio(bio)->raid_bio;
}
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -133,8 +133,9 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
struct r10bio *r10_bio;
struct bio *bio;
int j;
- int nalloc, nalloc_rp;
- struct resync_pages *rps;
+ int nalloc, nalloc_rf;
+ struct resync_folio *rfs;
+ int order = get_order(RESYNC_BLOCK_SIZE);
r10_bio = r10bio_pool_alloc(gfp_flags, conf);
if (!r10_bio)
@@ -148,66 +149,64 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
/* allocate once for all bios */
if (!conf->have_replacement)
- nalloc_rp = nalloc;
+ nalloc_rf = nalloc;
else
- nalloc_rp = nalloc * 2;
- rps = kmalloc_array(nalloc_rp, sizeof(struct resync_pages), gfp_flags);
- if (!rps)
+ nalloc_rf = nalloc * 2;
+ rfs = kmalloc_array(nalloc_rf, sizeof(struct resync_folio), gfp_flags);
+ if (!rfs)
goto out_free_r10bio;
/*
* Allocate bios.
*/
for (j = nalloc ; j-- ; ) {
- bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
+ bio = bio_kmalloc(1, gfp_flags);
if (!bio)
goto out_free_bio;
- bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
+ bio_init_inline(bio, NULL, 1, 0);
r10_bio->devs[j].bio = bio;
if (!conf->have_replacement)
continue;
- bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
+ bio = bio_kmalloc(1, gfp_flags);
if (!bio)
goto out_free_bio;
- bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
+ bio_init_inline(bio, NULL, 1, 0);
r10_bio->devs[j].repl_bio = bio;
}
/*
- * Allocate RESYNC_PAGES data pages and attach them
- * where needed.
+ * Allocate data folio and attach it where needed.
*/
for (j = 0; j < nalloc; j++) {
struct bio *rbio = r10_bio->devs[j].repl_bio;
- struct resync_pages *rp, *rp_repl;
+ struct resync_folio *rf, *rf_repl;
- rp = &rps[j];
+ rf = &rfs[j];
if (rbio)
- rp_repl = &rps[nalloc + j];
-
- bio = r10_bio->devs[j].bio;
+ rf_repl = &rfs[nalloc + j];
if (!j || test_bit(MD_RECOVERY_SYNC,
&conf->mddev->recovery)) {
- if (resync_alloc_pages(rp, gfp_flags))
- goto out_free_pages;
+ if (resync_alloc_folio(rf, gfp_flags, &order))
+ goto out_free_folio;
} else {
- memcpy(rp, &rps[0], sizeof(*rp));
- resync_get_all_pages(rp);
+ memcpy(rf, &rfs[0], sizeof(*rf));
+ folio_get(rf->folio);
}
- rp->raid_bio = r10_bio;
- bio->bi_private = rp;
+ rf->raid_bio = r10_bio;
+ r10_bio->devs[j].bio->bi_private = rf;
if (rbio) {
- memcpy(rp_repl, rp, sizeof(*rp));
- rbio->bi_private = rp_repl;
+ memcpy(rf_repl, rf, sizeof(*rf));
+ rbio->bi_private = rf_repl;
}
}
+ r10_bio->sectors = 1 << (order + PAGE_SECTORS_SHIFT);
return r10_bio;
-out_free_pages:
+out_free_folio:
while (--j >= 0)
- resync_free_pages(&rps[j]);
+ folio_put(rfs[j].folio);
j = 0;
out_free_bio:
@@ -219,7 +218,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
bio_uninit(r10_bio->devs[j].repl_bio);
kfree(r10_bio->devs[j].repl_bio);
}
- kfree(rps);
+ kfree(rfs);
out_free_r10bio:
rbio_pool_free(r10_bio, conf);
return NULL;
@@ -230,14 +229,14 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
struct r10conf *conf = data;
struct r10bio *r10bio = __r10_bio;
int j;
- struct resync_pages *rp = NULL;
+ struct resync_folio *rf = NULL;
for (j = conf->copies; j--; ) {
struct bio *bio = r10bio->devs[j].bio;
if (bio) {
- rp = get_resync_pages(bio);
- resync_free_pages(rp);
+ rf = get_resync_folio(bio);
+ folio_put(rf->folio);
bio_uninit(bio);
kfree(bio);
}
@@ -250,7 +249,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
}
/* resync pages array stored in the 1st bio's .bi_private */
- kfree(rp);
+ kfree(rf);
rbio_pool_free(r10bio, conf);
}
@@ -2342,8 +2341,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
struct r10conf *conf = mddev->private;
int i, first;
struct bio *tbio, *fbio;
- int vcnt;
- struct page **tpages, **fpages;
+ struct folio *tfolio, *ffolio;
atomic_set(&r10_bio->remaining, 1);
@@ -2359,14 +2357,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
fbio = r10_bio->devs[i].bio;
fbio->bi_iter.bi_size = r10_bio->sectors << 9;
fbio->bi_iter.bi_idx = 0;
- fpages = get_resync_pages(fbio)->pages;
+ ffolio = get_resync_folio(fbio)->folio;
- vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
/* now find blocks with errors */
for (i=0 ; i < conf->copies ; i++) {
- int j, d;
+ int d;
struct md_rdev *rdev;
- struct resync_pages *rp;
+ struct resync_folio *rf;
tbio = r10_bio->devs[i].bio;
@@ -2375,31 +2372,23 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
if (i == first)
continue;
- tpages = get_resync_pages(tbio)->pages;
+ tfolio = get_resync_folio(tbio)->folio;
d = r10_bio->devs[i].devnum;
rdev = conf->mirrors[d].rdev;
if (!r10_bio->devs[i].bio->bi_status) {
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
- * All vec entries are PAGE_SIZE;
*/
- int sectors = r10_bio->sectors;
- for (j = 0; j < vcnt; j++) {
- int len = PAGE_SIZE;
- if (sectors < (len / 512))
- len = sectors * 512;
- if (memcmp(page_address(fpages[j]),
- page_address(tpages[j]),
- len))
- break;
- sectors -= len/512;
+ if (memcmp(folio_address(ffolio),
+ folio_address(tfolio),
+ r10_bio->sectors << 9)) {
+ atomic64_add(r10_bio->sectors,
+ &mddev->resync_mismatches);
+ if (test_bit(MD_RECOVERY_CHECK,
+ &mddev->recovery))
+ /* Don't fix anything. */
+ continue;
}
- if (j == vcnt)
- continue;
- atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
- if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
- /* Don't fix anything. */
- continue;
} else if (test_bit(FailFast, &rdev->flags)) {
/* Just give up on this device */
md_error(rdev->mddev, rdev);
@@ -2410,13 +2399,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* First we need to fixup bv_offset, bv_len and
* bi_vecs, as the read request might have corrupted these
*/
- rp = get_resync_pages(tbio);
+ rf = get_resync_folio(tbio);
bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE);
- md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size);
+ md_bio_reset_resync_folio(tbio, rf, fbio->bi_iter.bi_size);
- rp->raid_bio = r10_bio;
- tbio->bi_private = rp;
+ rf->raid_bio = r10_bio;
+ tbio->bi_private = rf;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
tbio->bi_end_io = end_sync_write;
@@ -2476,10 +2465,9 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
struct bio *bio = r10_bio->devs[0].bio;
sector_t sect = 0;
int sectors = r10_bio->sectors;
- int idx = 0;
int dr = r10_bio->devs[0].devnum;
int dw = r10_bio->devs[1].devnum;
- struct page **pages = get_resync_pages(bio)->pages;
+ struct folio *folio = get_resync_folio(bio)->folio;
while (sectors) {
int s = sectors;
@@ -2492,19 +2480,21 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
rdev = conf->mirrors[dr].rdev;
addr = r10_bio->devs[0].addr + sect;
- ok = sync_page_io(rdev,
- addr,
- s << 9,
- pages[idx],
- REQ_OP_READ, false);
+ ok = sync_folio_io(rdev,
+ addr,
+ s << 9,
+ sect << 9,
+ folio,
+ REQ_OP_READ, false);
if (ok) {
rdev = conf->mirrors[dw].rdev;
addr = r10_bio->devs[1].addr + sect;
- ok = sync_page_io(rdev,
- addr,
- s << 9,
- pages[idx],
- REQ_OP_WRITE, false);
+ ok = sync_folio_io(rdev,
+ addr,
+ s << 9,
+ sect << 9,
+ folio,
+ REQ_OP_WRITE, false);
if (!ok) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement,
@@ -2539,7 +2529,6 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
sectors -= s;
sect += s;
- idx++;
}
}
@@ -3050,7 +3039,7 @@ static int init_resync(struct r10conf *conf)
static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
{
struct r10bio *r10bio = mempool_alloc(&conf->r10buf_pool, GFP_NOIO);
- struct rsync_pages *rp;
+ struct resync_folio *rf;
struct bio *bio;
int nalloc;
int i;
@@ -3063,14 +3052,14 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
for (i = 0; i < nalloc; i++) {
bio = r10bio->devs[i].bio;
- rp = bio->bi_private;
+ rf = bio->bi_private;
bio_reset(bio, NULL, 0);
- bio->bi_private = rp;
+ bio->bi_private = rf;
bio = r10bio->devs[i].repl_bio;
if (bio) {
- rp = bio->bi_private;
+ rf = bio->bi_private;
bio_reset(bio, NULL, 0);
- bio->bi_private = rp;
+ bio->bi_private = rf;
}
}
return r10bio;
@@ -3156,7 +3145,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
int max_sync = RESYNC_SECTORS;
sector_t sync_blocks;
sector_t chunk_mask = conf->geo.chunk_mask;
- int page_idx = 0;
/*
* Allow skipping a full rebuild for incremental assembly
@@ -3376,6 +3364,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
continue;
}
}
+
+ /*
+ * RESYNC_BLOCK_SIZE folio might alloc failed in
+ * resync_alloc_folio(). Fall back to smaller sync
+ * size if needed.
+ */
+ if (max_sync > r10_bio->sectors)
+ max_sync = r10_bio->sectors;
+
any_working = 1;
bio = r10_bio->devs[0].bio;
bio->bi_next = biolist;
@@ -3527,7 +3524,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
}
if (sync_blocks < max_sync)
max_sync = sync_blocks;
+
r10_bio = raid10_alloc_init_r10buf(conf);
+ /*
+ * RESYNC_BLOCK_SIZE folio might alloc failed in resync_alloc_folio().
+ * Fall back to smaller sync size if needed.
+ */
+ if (max_sync > r10_bio->sectors)
+ max_sync = r10_bio->sectors;
+
r10_bio->state = 0;
r10_bio->mddev = mddev;
@@ -3620,29 +3625,25 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
}
}
- nr_sectors = 0;
if (sector_nr + max_sync < max_sector)
max_sector = sector_nr + max_sync;
do {
- struct page *page;
- int len = PAGE_SIZE;
- if (sector_nr + (len>>9) > max_sector)
- len = (max_sector - sector_nr) << 9;
- if (len == 0)
+ nr_sectors = max_sector - sector_nr;
+
+ if (nr_sectors == 0)
break;
for (bio= biolist ; bio ; bio=bio->bi_next) {
- struct resync_pages *rp = get_resync_pages(bio);
- page = resync_fetch_page(rp, page_idx);
- if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
+ struct resync_folio *rf = get_resync_folio(bio);
+
+ if (WARN_ON(!bio_add_folio(bio, rf->folio, nr_sectors << 9, 0))) {
bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
*skipped = 1;
- return max_sync;
+ return nr_sectors << 9;
}
}
- nr_sectors += len>>9;
- sector_nr += len>>9;
- } while (++page_idx < RESYNC_PAGES);
+ sector_nr += nr_sectors;
+ } while (0);
r10_bio->sectors = nr_sectors;
if (mddev_is_clustered(mddev) &&
@@ -4560,7 +4561,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
int *skipped)
{
/* We simply copy at most one chunk (smallest of old and new)
- * at a time, possibly less if that exceeds RESYNC_PAGES,
+ * at a time, possibly less if that exceeds RESYNC_BLOCK_SIZE,
* or we hit a bad block or something.
* This might mean we pause for normal IO in the middle of
* a chunk, but that is not a problem as mddev->reshape_position
@@ -4600,14 +4601,13 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
struct r10bio *r10_bio;
sector_t next, safe, last;
int max_sectors;
- int nr_sectors;
int s;
struct md_rdev *rdev;
int need_flush = 0;
struct bio *blist;
struct bio *bio, *read_bio;
int sectors_done = 0;
- struct page **pages;
+ struct folio *folio;
if (sector_nr == 0) {
/* If restarting in the middle, skip the initial sectors */
@@ -4709,7 +4709,12 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->mddev = mddev;
r10_bio->sector = sector_nr;
set_bit(R10BIO_IsReshape, &r10_bio->state);
- r10_bio->sectors = last - sector_nr + 1;
+ /*
+ * RESYNC_BLOCK_SIZE folio might alloc failed in
+ * resync_alloc_folio(). Fall back to smaller sync
+ * size if needed.
+ */
+ r10_bio->sectors = min_t(int, r10_bio->sectors, last - sector_nr + 1);
rdev = read_balance(conf, r10_bio, &max_sectors);
BUG_ON(!test_bit(R10BIO_Previous, &r10_bio->state));
@@ -4723,7 +4728,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
return sectors_done;
}
- read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ,
+ read_bio = bio_alloc_bioset(rdev->bdev, 1, REQ_OP_READ,
GFP_KERNEL, &mddev->bio_set);
read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset);
@@ -4787,32 +4792,23 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
blist = b;
}
- /* Now add as many pages as possible to all of these bios. */
+ /* Now add folio to all of these bios. */
- nr_sectors = 0;
- pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
- for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) {
- struct page *page = pages[s / (PAGE_SIZE >> 9)];
- int len = (max_sectors - s) << 9;
- if (len > PAGE_SIZE)
- len = PAGE_SIZE;
- for (bio = blist; bio ; bio = bio->bi_next) {
- if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
- bio->bi_status = BLK_STS_RESOURCE;
- bio_endio(bio);
- return sectors_done;
- }
+ folio = get_resync_folio(r10_bio->devs[0].bio)->folio;
+ for (bio = blist; bio ; bio = bio->bi_next) {
+ if (WARN_ON(!bio_add_folio(bio, folio, max_sectors, 0))) {
+ bio->bi_status = BLK_STS_RESOURCE;
+ bio_endio(bio);
+ return sectors_done;
}
- sector_nr += len >> 9;
- nr_sectors += len >> 9;
}
- r10_bio->sectors = nr_sectors;
+ r10_bio->sectors = max_sectors >> 9;
/* Now submit the read */
atomic_inc(&r10_bio->remaining);
read_bio->bi_next = NULL;
submit_bio_noacct(read_bio);
- sectors_done += nr_sectors;
+ sectors_done += max_sectors;
if (sector_nr <= last)
goto read_more;
@@ -4914,8 +4910,8 @@ static int handle_reshape_read_error(struct mddev *mddev,
struct r10conf *conf = mddev->private;
struct r10bio *r10b;
int slot = 0;
- int idx = 0;
- struct page **pages;
+ int sect = 0;
+ struct folio *folio;
r10b = kmalloc(struct_size(r10b, devs, conf->copies), GFP_NOIO);
if (!r10b) {
@@ -4923,8 +4919,8 @@ static int handle_reshape_read_error(struct mddev *mddev,
return -ENOMEM;
}
- /* reshape IOs share pages from .devs[0].bio */
- pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
+ /* reshape IOs share folio from .devs[0].bio */
+ folio = get_resync_folio(r10_bio->devs[0].bio)->folio;
r10b->sector = r10_bio->sector;
__raid10_find_phys(&conf->prev, r10b);
@@ -4940,19 +4936,19 @@ static int handle_reshape_read_error(struct mddev *mddev,
while (!success) {
int d = r10b->devs[slot].devnum;
struct md_rdev *rdev = conf->mirrors[d].rdev;
- sector_t addr;
if (rdev == NULL ||
test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags))
goto failed;
- addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
atomic_inc(&rdev->nr_pending);
- success = sync_page_io(rdev,
- addr,
- s << 9,
- pages[idx],
- REQ_OP_READ, false);
+ success = sync_folio_io(rdev,
+ r10b->devs[slot].addr +
+ sect,
+ s << 9,
+ sect << 9,
+ folio,
+ REQ_OP_READ, false);
rdev_dec_pending(rdev, mddev);
if (success)
break;
@@ -4971,7 +4967,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
return -EIO;
}
sectors -= s;
- idx++;
+ sect += s;
}
kfree(r10b);
return 0;
--
2.39.2
next prev parent reply other threads:[~2026-04-16 3:50 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-16 3:37 [PATCH v3 0/8] folio support for sync I/O in RAID linan666
2026-04-16 3:37 ` [PATCH v3 1/8] md/raid1,raid10: clean up of RESYNC_SECTORS linan666
2026-04-16 3:37 ` [PATCH v3 2/8] md: introduce sync_folio_io for folio support in RAID linan666
2026-04-16 3:37 ` [PATCH v3 3/8] md: introduce safe_put_folio " linan666
2026-04-16 3:37 ` [PATCH v3 4/8] md/raid1: use folio for tmppage linan666
2026-04-16 3:37 ` [PATCH v3 5/8] md/raid10: " linan666
2026-04-16 3:37 ` linan666 [this message]
2026-04-16 3:38 ` [PATCH v3 7/8] md/raid1: fix IO error at logical block size granularity linan666
2026-04-16 3:38 ` [PATCH v3 8/8] md/raid10: " linan666
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260416033801.786415-7-linan666@huaweicloud.com \
--to=linan666@huaweicloud.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=song@kernel.org \
--cc=yangerkun@huawei.com \
--cc=yi.zhang@huawei.com \
--cc=yukuai@fnnas.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox