From: Qu Wenruo <wqu@suse.com>
To: Christoph Hellwig <hch@lst.de>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 6/8] btrfs: store a kernel virtual address in struct sector_ptr
Date: Thu, 10 Apr 2025 08:04:25 +0930 [thread overview]
Message-ID: <0b3a6b18-ab19-4997-86dc-fd269b7b61da@suse.com> (raw)
In-Reply-To: <20250409111055.3640328-7-hch@lst.de>
在 2025/4/9 20:40, Christoph Hellwig 写道:
> All data pointed to by struct sector_ptr is non-highmem kernel memory.
> Simplify the code by using a void pointer instead of a page + offset
> pair and dropping all the kmap calls.
But the higher level bio from btrfs filemaps can have highmem pages.
That's why we keep the kmap/kunmap.
Or is there a way to set the filemap to no use any highmem pages?
Thanks,
Qu
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/btrfs/raid56.c | 167 +++++++++++++++++-----------------------------
> 1 file changed, 62 insertions(+), 105 deletions(-)
>
> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
> index 703e713bac03..3ccbf133b455 100644
> --- a/fs/btrfs/raid56.c
> +++ b/fs/btrfs/raid56.c
> @@ -134,14 +134,12 @@ struct btrfs_stripe_hash_table {
> };
>
> /*
> - * A bvec like structure to present a sector inside a page.
> - *
> - * Unlike bvec we don't need bvlen, as it's fixed to sectorsize.
> + * A structure to present a sector inside a page, the length is fixed to
> + * sectorsize;
> */
> struct sector_ptr {
> - struct page *page;
> - unsigned int pgoff:24;
> - unsigned int uptodate:8;
> + void *kaddr;
> + u8 uptodate;
> };
>
> static void rmw_rbio_work(struct work_struct *work);
> @@ -253,7 +251,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
>
> for (i = 0; i < rbio->nr_sectors; i++) {
> /* Some range not covered by bio (partial write), skip it */
> - if (!rbio->bio_sectors[i].page) {
> + if (!rbio->bio_sectors[i].kaddr) {
> /*
> * Even if the sector is not covered by bio, if it is
> * a data sector it should still be uptodate as it is
> @@ -264,11 +262,9 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
> continue;
> }
>
> - ASSERT(rbio->stripe_sectors[i].page);
> - memcpy_page(rbio->stripe_sectors[i].page,
> - rbio->stripe_sectors[i].pgoff,
> - rbio->bio_sectors[i].page,
> - rbio->bio_sectors[i].pgoff,
> + ASSERT(rbio->stripe_sectors[i].kaddr);
> + memcpy(rbio->stripe_sectors[i].kaddr,
> + rbio->bio_sectors[i].kaddr,
> rbio->bioc->fs_info->sectorsize);
> rbio->stripe_sectors[i].uptodate = 1;
> }
> @@ -326,8 +322,9 @@ static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
> int page_index = offset >> PAGE_SHIFT;
>
> ASSERT(page_index < rbio->nr_pages);
> - rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index];
> - rbio->stripe_sectors[i].pgoff = offset_in_page(offset);
> + rbio->stripe_sectors[i].kaddr =
> + page_address(rbio->stripe_pages[page_index]) +
> + offset_in_page(offset);
> }
> }
>
> @@ -962,9 +959,9 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
>
> spin_lock(&rbio->bio_list_lock);
> sector = &rbio->bio_sectors[index];
> - if (sector->page || bio_list_only) {
> + if (sector->kaddr || bio_list_only) {
> /* Don't return sector without a valid page pointer */
> - if (!sector->page)
> + if (!sector->kaddr)
> sector = NULL;
> spin_unlock(&rbio->bio_list_lock);
> return sector;
> @@ -1142,7 +1139,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
> rbio, stripe_nr);
> ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
> rbio, sector_nr);
> - ASSERT(sector->page);
> + ASSERT(sector->kaddr);
>
> stripe = &rbio->bioc->stripes[stripe_nr];
> disk_start = stripe->physical + sector_nr * sectorsize;
> @@ -1173,8 +1170,9 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
> */
> if (last_end == disk_start && !last->bi_status &&
> last->bi_bdev == stripe->dev->bdev) {
> - ret = bio_add_page(last, sector->page, sectorsize,
> - sector->pgoff);
> + ret = bio_add_page(last, virt_to_page(sector->kaddr),
> + sectorsize,
> + offset_in_page(sector->kaddr));
> if (ret == sectorsize)
> return 0;
> }
> @@ -1187,7 +1185,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
> bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
> bio->bi_private = rbio;
>
> - __bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
> + __bio_add_page(bio, virt_to_page(sector->kaddr), sectorsize,
> + offset_in_page(sector->kaddr));
> bio_list_add(bio_list, bio);
> return 0;
> }
> @@ -1204,10 +1203,7 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
> struct sector_ptr *sector = &rbio->bio_sectors[index];
> struct bio_vec bv = bio_iter_iovec(bio, iter);
>
> - sector->page = bv.bv_page;
> - sector->pgoff = bv.bv_offset;
> - ASSERT(sector->pgoff < PAGE_SIZE);
> -
> + sector->kaddr = bvec_virt(&bv);
> bio_advance_iter_single(bio, &iter, sectorsize);
> }
> }
> @@ -1298,14 +1294,13 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
> /* First collect one sector from each data stripe */
> for (stripe = 0; stripe < rbio->nr_data; stripe++) {
> sector = sector_in_rbio(rbio, stripe, sectornr, 0);
> - pointers[stripe] = kmap_local_page(sector->page) +
> - sector->pgoff;
> + pointers[stripe] = sector->kaddr;
> }
>
> /* Then add the parity stripe */
> sector = rbio_pstripe_sector(rbio, sectornr);
> sector->uptodate = 1;
> - pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff;
> + pointers[stripe++] = sector->kaddr;
>
> if (has_qstripe) {
> /*
> @@ -1314,8 +1309,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
> */
> sector = rbio_qstripe_sector(rbio, sectornr);
> sector->uptodate = 1;
> - pointers[stripe++] = kmap_local_page(sector->page) +
> - sector->pgoff;
> + pointers[stripe++] = sector->kaddr;
>
> assert_rbio(rbio);
> raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
> @@ -1325,8 +1319,6 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
> memcpy(pointers[rbio->nr_data], pointers[0], sectorsize);
> run_xor(pointers + 1, rbio->nr_data - 1, sectorsize);
> }
> - for (stripe = stripe - 1; stripe >= 0; stripe--)
> - kunmap_local(pointers[stripe]);
> }
>
> static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
> @@ -1474,15 +1466,14 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
> * stripe_pages[], thus we need to locate the sector.
> */
> static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
> - struct page *page,
> - unsigned int pgoff)
> + void *kaddr)
> {
> int i;
>
> for (i = 0; i < rbio->nr_sectors; i++) {
> struct sector_ptr *sector = &rbio->stripe_sectors[i];
>
> - if (sector->page == page && sector->pgoff == pgoff)
> + if (sector->kaddr == kaddr)
> return sector;
> }
> return NULL;
> @@ -1502,11 +1493,11 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
>
> bio_for_each_segment_all(bvec, bio, iter_all) {
> struct sector_ptr *sector;
> - int pgoff;
> + void *kaddr = bvec_virt(bvec);
> + int off;
>
> - for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len;
> - pgoff += sectorsize) {
> - sector = find_stripe_sector(rbio, bvec->bv_page, pgoff);
> + for (off = 0; off < bvec->bv_len; off += sectorsize) {
> + sector = find_stripe_sector(rbio, kaddr + off);
> ASSERT(sector);
> if (sector)
> sector->uptodate = 1;
> @@ -1516,17 +1507,13 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
>
> static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio)
> {
> - struct bio_vec *bv = bio_first_bvec_all(bio);
> + void *bvec_kaddr = bvec_virt(bio_first_bvec_all(bio));
> int i;
>
> for (i = 0; i < rbio->nr_sectors; i++) {
> - struct sector_ptr *sector;
> -
> - sector = &rbio->stripe_sectors[i];
> - if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
> + if (rbio->stripe_sectors[i].kaddr == bvec_kaddr)
> break;
> - sector = &rbio->bio_sectors[i];
> - if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
> + if (rbio->bio_sectors[i].kaddr == bvec_kaddr)
> break;
> }
> ASSERT(i < rbio->nr_sectors);
> @@ -1789,8 +1776,6 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
> struct sector_ptr *sector;
> u8 csum_buf[BTRFS_CSUM_SIZE];
> u8 *csum_expected;
> - void *kaddr;
> - int ret;
>
> if (!rbio->csum_bitmap || !rbio->csum_buf)
> return 0;
> @@ -1808,15 +1793,13 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
> sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
> }
>
> - ASSERT(sector->page);
> + ASSERT(sector->kaddr);
>
> - kaddr = kmap_local_page(sector->page) + sector->pgoff;
> csum_expected = rbio->csum_buf +
> (stripe_nr * rbio->stripe_nsectors + sector_nr) *
> fs_info->csum_size;
> - ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected);
> - kunmap_local(kaddr);
> - return ret;
> + return btrfs_check_sector_csum(fs_info, sector->kaddr, csum_buf,
> + csum_expected);
> }
>
> /*
> @@ -1825,7 +1808,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
> * need to allocate/free the pointers again and again.
> */
> static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
> - void **pointers, void **unmap_array)
> + void **pointers)
> {
> struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
> struct sector_ptr *sector;
> @@ -1872,10 +1855,8 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
> } else {
> sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
> }
> - ASSERT(sector->page);
> - pointers[stripe_nr] = kmap_local_page(sector->page) +
> - sector->pgoff;
> - unmap_array[stripe_nr] = pointers[stripe_nr];
> + ASSERT(sector->kaddr);
> + pointers[stripe_nr] = sector->kaddr;
> }
>
> /* All raid6 handling here */
> @@ -1889,7 +1870,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
> * We have nothing to do, just skip the
> * recovery for this stripe.
> */
> - goto cleanup;
> + return ret;
> /*
> * a single failure in raid6 is rebuilt
> * in the pstripe code below
> @@ -1911,7 +1892,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
> * We only care about data stripes recovery,
> * can skip this vertical stripe.
> */
> - goto cleanup;
> + return ret;
> /*
> * Otherwise we have one bad data stripe and
> * a good P stripe. raid5!
> @@ -1960,7 +1941,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
> if (faila >= 0) {
> ret = verify_one_sector(rbio, faila, sector_nr);
> if (ret < 0)
> - goto cleanup;
> + return ret;
>
> sector = rbio_stripe_sector(rbio, faila, sector_nr);
> sector->uptodate = 1;
> @@ -1968,34 +1949,26 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
> if (failb >= 0) {
> ret = verify_one_sector(rbio, failb, sector_nr);
> if (ret < 0)
> - goto cleanup;
> + return ret;
>
> sector = rbio_stripe_sector(rbio, failb, sector_nr);
> sector->uptodate = 1;
> }
>
> -cleanup:
> - for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)
> - kunmap_local(unmap_array[stripe_nr]);
> return ret;
> }
>
> static int recover_sectors(struct btrfs_raid_bio *rbio)
> {
> void **pointers = NULL;
> - void **unmap_array = NULL;
> int sectornr;
> int ret = 0;
>
> /*
> * @pointers array stores the pointer for each sector.
> - *
> - * @unmap_array stores copy of pointers that does not get reordered
> - * during reconstruction so that kunmap_local works.
> */
> pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
> - unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
> - if (!pointers || !unmap_array) {
> + if (!pointers) {
> ret = -ENOMEM;
> goto out;
> }
> @@ -2009,14 +1982,13 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)
> index_rbio_pages(rbio);
>
> for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
> - ret = recover_vertical(rbio, sectornr, pointers, unmap_array);
> + ret = recover_vertical(rbio, sectornr, pointers);
> if (ret < 0)
> break;
> }
>
> out:
> kfree(pointers);
> - kfree(unmap_array);
> return ret;
> }
>
> @@ -2326,7 +2298,7 @@ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
> * thus this rbio can not be cached one, as cached one must
> * have all its data sectors present and uptodate.
> */
> - if (!sector->page || !sector->uptodate)
> + if (!sector->kaddr || !sector->uptodate)
> return true;
> }
> return false;
> @@ -2547,29 +2519,27 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
> */
> clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
>
> - p_sector.page = alloc_page(GFP_NOFS);
> - if (!p_sector.page)
> + p_sector.kaddr = (void *)__get_free_page(GFP_NOFS);
> + if (!p_sector.kaddr)
> return -ENOMEM;
> - p_sector.pgoff = 0;
> p_sector.uptodate = 1;
>
> if (has_qstripe) {
> /* RAID6, allocate and map temp space for the Q stripe */
> - q_sector.page = alloc_page(GFP_NOFS);
> - if (!q_sector.page) {
> - __free_page(p_sector.page);
> - p_sector.page = NULL;
> + q_sector.kaddr = (void *)__get_free_page(GFP_NOFS);
> + if (!q_sector.kaddr) {
> + free_page((unsigned long)p_sector.kaddr);
> + p_sector.kaddr = NULL;
> return -ENOMEM;
> }
> - q_sector.pgoff = 0;
> q_sector.uptodate = 1;
> - pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page);
> + pointers[rbio->real_stripes - 1] = q_sector.kaddr;
> }
>
> bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
>
> /* Map the parity stripe just once */
> - pointers[nr_data] = kmap_local_page(p_sector.page);
> + pointers[nr_data] = p_sector.kaddr;
>
> for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
> struct sector_ptr *sector;
> @@ -2578,8 +2548,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
> /* first collect one page from each data stripe */
> for (stripe = 0; stripe < nr_data; stripe++) {
> sector = sector_in_rbio(rbio, stripe, sectornr, 0);
> - pointers[stripe] = kmap_local_page(sector->page) +
> - sector->pgoff;
> + pointers[stripe] = sector->kaddr;
> }
>
> if (has_qstripe) {
> @@ -2595,25 +2564,19 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
>
> /* Check scrubbing parity and repair it */
> sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
> - parity = kmap_local_page(sector->page) + sector->pgoff;
> + parity = sector->kaddr;
> if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
> memcpy(parity, pointers[rbio->scrubp], sectorsize);
> else
> /* Parity is right, needn't writeback */
> bitmap_clear(&rbio->dbitmap, sectornr, 1);
> - kunmap_local(parity);
> -
> - for (stripe = nr_data - 1; stripe >= 0; stripe--)
> - kunmap_local(pointers[stripe]);
> }
>
> - kunmap_local(pointers[nr_data]);
> - __free_page(p_sector.page);
> - p_sector.page = NULL;
> - if (q_sector.page) {
> - kunmap_local(pointers[rbio->real_stripes - 1]);
> - __free_page(q_sector.page);
> - q_sector.page = NULL;
> + free_page((unsigned long)p_sector.kaddr);
> + p_sector.kaddr = NULL;
> + if (q_sector.kaddr) {
> + free_page((unsigned long)q_sector.kaddr);
> + q_sector.kaddr = NULL;
> }
>
> /*
> @@ -2669,19 +2632,14 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
> static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
> {
> void **pointers = NULL;
> - void **unmap_array = NULL;
> int sector_nr;
> int ret = 0;
>
> /*
> * @pointers array stores the pointer for each sector.
> - *
> - * @unmap_array stores copy of pointers that does not get reordered
> - * during reconstruction so that kunmap_local works.
> */
> pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
> - unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
> - if (!pointers || !unmap_array) {
> + if (!pointers) {
> ret = -ENOMEM;
> goto out;
> }
> @@ -2740,13 +2698,12 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
> goto out;
> }
>
> - ret = recover_vertical(rbio, sector_nr, pointers, unmap_array);
> + ret = recover_vertical(rbio, sector_nr, pointers);
> if (ret < 0)
> goto out;
> }
> out:
> kfree(pointers);
> - kfree(unmap_array);
> return ret;
> }
>
next prev parent reply other threads:[~2025-04-09 22:34 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-09 11:10 RFC: (almost) stop poking into bvec internals in btrfs Christoph Hellwig
2025-04-09 11:10 ` [PATCH 1/8] btrfs: remove the alignment checks in end_bbio_data_read Christoph Hellwig
2025-04-09 22:13 ` Qu Wenruo
2025-04-10 5:30 ` Christoph Hellwig
2025-04-10 5:39 ` Qu Wenruo
2025-04-09 11:10 ` [PATCH 2/8] btrfs: track the next file offset in struct btrfs_bio_ctrl Christoph Hellwig
2025-04-09 22:15 ` Qu Wenruo
2025-04-09 11:10 ` [PATCH 3/8] btrfs: pass a physical address to btrfs_repair_io_failure Christoph Hellwig
2025-04-09 22:19 ` Qu Wenruo
2025-04-10 5:31 ` Christoph Hellwig
2025-04-10 6:06 ` Johannes Thumshirn
2025-04-10 6:17 ` hch
2025-04-09 11:10 ` [PATCH 4/8] btrfs: move kmapping out of btrfs_check_sector_csum Christoph Hellwig
2025-04-10 6:16 ` Johannes Thumshirn
2025-04-16 4:51 ` Qu Wenruo
2025-04-09 11:10 ` [PATCH 5/8] btrfs: simplify bvec iteration in index_one_bio Christoph Hellwig
2025-04-18 2:09 ` Qu Wenruo
2025-04-09 11:10 ` [PATCH 6/8] btrfs: store a kernel virtual address in struct sector_ptr Christoph Hellwig
2025-04-09 22:34 ` Qu Wenruo [this message]
2025-04-10 5:34 ` Christoph Hellwig
2025-04-14 3:04 ` Qu Wenruo
2025-04-17 23:41 ` Qu Wenruo
2025-04-09 11:10 ` [PATCH 7/8] btrfs: refactor getting the address of a stripe sector Christoph Hellwig
2025-04-09 22:38 ` Qu Wenruo
2025-04-19 1:01 ` Qu Wenruo
2025-04-09 11:10 ` [PATCH 8/8] btrfs: use bvec_kmap_local in btrfs_decompress_buf2page Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0b3a6b18-ab19-4997-86dc-fd269b7b61da@suse.com \
--to=wqu@suse.com \
--cc=hch@lst.de \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox