public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Boris Burkov <boris@bur.io>
To: David Sterba <dsterba@suse.com>
Cc: linux-btrfs@vger.kernel.org, willy@infradead.org
Subject: Re: [PATCH] btrfs: use preallocated page for super block write
Date: Tue, 7 Jun 2022 10:06:20 -0700	[thread overview]
Message-ID: <Yp+FjFF3kKvN07dw@zen> (raw)
In-Reply-To: <20220607154229.9164-1-dsterba@suse.com>

On Tue, Jun 07, 2022 at 05:42:29PM +0200, David Sterba wrote:
> Currently the super block page is from the mapping of the block device,
> this is result of direct conversion from the previous buffer_head to bio
> conversion.  We don't use the page cache or the mapping anywhere else,
> the page is a temporary space for the associated bio.
> 
> Allocate the page at device allocation time, also to avoid any later
> allocation problems when writing the super block. This simplifies the
> page reference tracking, but the page lock is still used as waiting
> mechanism for the write and write error is tracked in the page.
> 
> This was inspired by Matthew's question
> https://lore.kernel.org/all/Yn%2FtxWbij5voeGOB@casper.infradead.org/
> 
> Signed-off-by: David Sterba <dsterba@suse.com>
Reviewed-by: Boris Burkov <boris@bur.io>
> ---
>  fs/btrfs/disk-io.c | 42 +++++++++++-------------------------------
>  fs/btrfs/volumes.c |  6 ++++++
>  fs/btrfs/volumes.h |  2 ++
>  3 files changed, 19 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 0f926d18e6ca..d10ad62ba54d 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3873,7 +3873,6 @@ static void btrfs_end_super_write(struct bio *bio)
>  			SetPageUptodate(page);
>  		}
>  
> -		put_page(page);
>  		unlock_page(page);
>  	}
>  
> @@ -3960,7 +3959,6 @@ static int write_dev_supers(struct btrfs_device *device,
>  			    struct btrfs_super_block *sb, int max_mirrors)
>  {
>  	struct btrfs_fs_info *fs_info = device->fs_info;
> -	struct address_space *mapping = device->bdev->bd_inode->i_mapping;
>  	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
>  	int i;
>  	int errors = 0;
> @@ -3975,7 +3973,6 @@ static int write_dev_supers(struct btrfs_device *device,
>  	for (i = 0; i < max_mirrors; i++) {
>  		struct page *page;
>  		struct bio *bio;
> -		struct btrfs_super_block *disk_super;
>  
>  		bytenr_orig = btrfs_sb_offset(i);
>  		ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
> @@ -3998,21 +3995,17 @@ static int write_dev_supers(struct btrfs_device *device,
>  				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
>  				    sb->csum);
>  
> -		page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
> -					   GFP_NOFS);
> -		if (!page) {
> -			btrfs_err(device->fs_info,
> -			    "couldn't get super block page for bytenr %llu",
> -			    bytenr);
> -			errors++;
> -			continue;
> -		}
> -
> -		/* Bump the refcount for wait_dev_supers() */
> -		get_page(page);
> +		/*
> +		 * Super block is copied to a temporary page, which is locked
> +		 * and submitted for write. Page is unlocked after IO finishes.
> +		 * No page references are needed, write error is returned as
> +		 * page Error bit.
> +		 */
> +		page = device->sb_write_page;
> +		ClearPageError(page);
> +		lock_page(page);
>  
> -		disk_super = page_address(page);
> -		memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
> +		memcpy(page_address(page), sb, BTRFS_SUPER_INFO_SIZE);
>  
>  		/*
>  		 * Directly use bios here instead of relying on the page cache
> @@ -4079,14 +4072,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  		    device->commit_total_bytes)
>  			break;
>  
> -		page = find_get_page(device->bdev->bd_inode->i_mapping,
> -				     bytenr >> PAGE_SHIFT);
> -		if (!page) {
> -			errors++;
> -			if (i == 0)
> -				primary_failed = true;
> -			continue;
> -		}
> +		page = device->sb_write_page;
>  		/* Page is submitted locked and unlocked once the IO completes */
>  		wait_on_page_locked(page);
>  		if (PageError(page)) {
> @@ -4094,12 +4080,6 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  			if (i == 0)
>  				primary_failed = true;
>  		}
> -
> -		/* Drop our reference */
> -		put_page(page);
> -
> -		/* Drop the reference from the writing run */
> -		put_page(page);
>  	}
>  
>  	/* log error, force error return */
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 7513e45c0c42..a9588c52c1f3 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -394,6 +394,7 @@ void btrfs_free_device(struct btrfs_device *device)
>  	rcu_string_free(device->name);
>  	extent_io_tree_release(&device->alloc_state);
>  	btrfs_destroy_dev_zone_info(device);
> +	__free_page(device->sb_write_page);
>  	kfree(device);
>  }
>  
> @@ -6910,6 +6911,11 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
>  	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
>  	if (!dev)
>  		return ERR_PTR(-ENOMEM);
> +	dev->sb_write_page = alloc_page(GFP_KERNEL);
> +	if (!dev->sb_write_page) {
> +		kfree(dev);
> +		return ERR_PTR(-ENOMEM);
> +	}
>  
>  	INIT_LIST_HEAD(&dev->dev_list);
>  	INIT_LIST_HEAD(&dev->dev_alloc_list);
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index a3c3a0d716bd..4a6c4a5f6fe6 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -158,6 +158,8 @@ struct btrfs_device {
>  	/* Bio used for flushing device barriers */
>  	struct bio flush_bio;
>  	struct completion flush_wait;
> +	/* Temporary page for writing the superblock */
> +	struct page *sb_write_page;
>  
>  	/* per-device scrub information */
>  	struct scrub_ctx *scrub_ctx;
> -- 
> 2.36.1
> 

  reply	other threads:[~2022-06-07 17:06 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-07 15:42 [PATCH] btrfs: use preallocated page for super block write David Sterba
2022-06-07 17:06 ` Boris Burkov [this message]
2022-06-08 17:49 ` Johannes Thumshirn
2022-06-09  7:30 ` Nikolay Borisov
2022-06-09 12:26   ` David Sterba
2022-06-09 12:36   ` Matthew Wilcox
2022-06-09 12:37     ` Nikolay Borisov
2022-06-16 14:37 ` [btrfs] 66a7a2412f: xfstests.btrfs.131.fail kernel test robot
2022-06-16 21:17   ` Matthew Wilcox
2022-06-16 22:00     ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Yp+FjFF3kKvN07dw@zen \
    --to=boris@bur.io \
    --cc=dsterba@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox