Re: [PATCH 1/5] btrfs: qgroup: iterate qgroups without memory allocation for qgroup_reserve()

public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed

From: Boris Burkov <boris@bur.io>
To: Qu Wenruo <wqu@suse.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 1/5] btrfs: qgroup: iterate qgroups without memory allocation for qgroup_reserve()
Date: Wed, 30 Aug 2023 14:58:48 -0700	[thread overview]
Message-ID: <20230830215848.GA834639@zen> (raw)
In-Reply-To: <af338549020e57415b5e4079f37e05b5655991f8.1693391268.git.wqu@suse.com>

On Wed, Aug 30, 2023 at 06:37:23PM +0800, Qu Wenruo wrote:
> Qgroup heavily relies on ulist to go through all the involved
> qgroups, but since we're using ulist inside fs_info->qgroup_lock
> spinlock, this means we're doing a lot of GFP_ATOMIC allocation.
> 
> This patch would reduce the GFP_ATOMIC usage for qgroup_reserve() by
> eliminating the memory allocation completely.
> 
> This is done by moving the needed memory to btrfs_qgroup::iterator
> list_head, so that we can put all the involved qgroup into a on-stack list, thus
> eliminate the need to allocate memory holding spinlock.
> 
> The only cost is the slightly higher memory usage, but considering the
> reduce GFP_ATOMIC during a hot path, it should still be acceptable.
> 
> Function qgroup_reserve() is the perfect start point for this
> conversion.

This looks great, thanks! I like it more than my array/ulist hybrid
since it never allocates :)

Reviewed-by: Boris Burkov <boris@bur.io>

> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/qgroup.c | 68 +++++++++++++++++++++++------------------------
>  fs/btrfs/qgroup.h |  9 +++++++
>  2 files changed, 42 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index 74244b4bb0e9..de34e2aef710 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -216,6 +216,7 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
>  	INIT_LIST_HEAD(&qgroup->groups);
>  	INIT_LIST_HEAD(&qgroup->members);
>  	INIT_LIST_HEAD(&qgroup->dirty);
> +	INIT_LIST_HEAD(&qgroup->iterator);
>  
>  	rb_link_node(&qgroup->node, parent, p);
>  	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
> @@ -1367,6 +1368,25 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
>  		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
>  }
>  
> +static void qgroup_iterator_add(struct list_head *head, struct btrfs_qgroup *qgroup)
> +{
> +	if (!list_empty(&qgroup->iterator))
> +		return;
> +
> +	list_add_tail(&qgroup->iterator, head);
> +}
> +
> +static void qgroup_iterator_clean(struct list_head *head)
> +{
> +
> +	while (!list_empty(head)) {
> +		struct btrfs_qgroup *qgroup;
> +
> +		qgroup = list_first_entry(head, struct btrfs_qgroup, iterator);
> +		list_del_init(&qgroup->iterator);
> +	}
> +}
> +
>  /*
>   * The easy accounting, we're updating qgroup relationship whose child qgroup
>   * only has exclusive extents.
> @@ -3154,12 +3174,11 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
>  static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
>  			  enum btrfs_qgroup_rsv_type type)
>  {
> -	struct btrfs_qgroup *qgroup;
> +	struct btrfs_qgroup *cur;
>  	struct btrfs_fs_info *fs_info = root->fs_info;
>  	u64 ref_root = root->root_key.objectid;
>  	int ret = 0;
> -	struct ulist_node *unode;
> -	struct ulist_iterator uiter;
> +	LIST_HEAD(qgroup_list);
>  
>  	if (!is_fstree(ref_root))
>  		return 0;
> @@ -3175,53 +3194,32 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
>  	if (!fs_info->quota_root)
>  		goto out;
>  
> -	qgroup = find_qgroup_rb(fs_info, ref_root);
> -	if (!qgroup)
> +	cur = find_qgroup_rb(fs_info, ref_root);
> +	if (!cur)
>  		goto out;
>  
> -	/*
> -	 * in a first step, we check all affected qgroups if any limits would
> -	 * be exceeded
> -	 */
> -	ulist_reinit(fs_info->qgroup_ulist);
> -	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
> -			qgroup_to_aux(qgroup), GFP_ATOMIC);
> -	if (ret < 0)
> -		goto out;
> -	ULIST_ITER_INIT(&uiter);
> -	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
> -		struct btrfs_qgroup *qg;
> +	qgroup_iterator_add(&qgroup_list, cur);
> +	list_for_each_entry(cur, &qgroup_list, iterator) {
>  		struct btrfs_qgroup_list *glist;
>  
> -		qg = unode_aux_to_qgroup(unode);
> -
> -		if (enforce && !qgroup_check_limits(qg, num_bytes)) {
> +		if (enforce && !qgroup_check_limits(cur, num_bytes)) {
>  			ret = -EDQUOT;
>  			goto out;
>  		}
>  
> -		list_for_each_entry(glist, &qg->groups, next_group) {
> -			ret = ulist_add(fs_info->qgroup_ulist,
> -					glist->group->qgroupid,
> -					qgroup_to_aux(glist->group), GFP_ATOMIC);
> -			if (ret < 0)
> -				goto out;
> -		}
> +		list_for_each_entry(glist, &cur->groups, next_group)
> +			qgroup_iterator_add(&qgroup_list, glist->group);
>  	}
> +
>  	ret = 0;
>  	/*
>  	 * no limits exceeded, now record the reservation into all qgroups
>  	 */
> -	ULIST_ITER_INIT(&uiter);
> -	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
> -		struct btrfs_qgroup *qg;
> -
> -		qg = unode_aux_to_qgroup(unode);
> -
> -		qgroup_rsv_add(fs_info, qg, num_bytes, type);
> -	}
> +	list_for_each_entry(cur, &qgroup_list, iterator)
> +		qgroup_rsv_add(fs_info, cur, num_bytes, type);
>  
>  out:
> +	qgroup_iterator_clean(&qgroup_list);
>  	spin_unlock(&fs_info->qgroup_lock);
>  	return ret;
>  }
> diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
> index 7bffa10589d6..5dc0583622c3 100644
> --- a/fs/btrfs/qgroup.h
> +++ b/fs/btrfs/qgroup.h
> @@ -220,6 +220,15 @@ struct btrfs_qgroup {
>  	struct list_head groups;  /* groups this group is member of */
>  	struct list_head members; /* groups that are members of this group */
>  	struct list_head dirty;   /* dirty groups */
> +
> +	/*
> +	 * For qgroup iteration usage.
> +	 *
> +	 * The iteration list should always be empty until
> +	 * qgroup_iterator_add() is called.
> +	 * And should be reset to empty after the iteration is finished.

Can you also add a note that this relies on global exclusion under the
qgroup spin lock?

> +	 */
> +	struct list_head iterator;
>  	struct rb_node node;	  /* tree of qgroups */
>  
>  	/*
> -- 
> 2.41.0
>

next prev parent reply	other threads:[~2023-08-30 22:03 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-30 10:37 [PATCH 0/5] btrfs: qgroup: reduce GFP_ATOMIC usage for ulist Qu Wenruo
2023-08-30 10:37 ` [PATCH 1/5] btrfs: qgroup: iterate qgroups without memory allocation for qgroup_reserve() Qu Wenruo
2023-08-30 21:58   ` Boris Burkov [this message]
2023-08-30 10:37 ` [PATCH 2/5] btrfs: qgroup: use qgroup_iterator facility for btrfs_qgroup_free_refroot() Qu Wenruo
2023-08-30 10:37 ` [PATCH 3/5] btrfs: qgroup: use qgroup_iterator facility for qgroup_convert_meta() Qu Wenruo
2023-08-30 10:37 ` [PATCH 4/5] btrfs: qgroup: use qgroup_iterator facility for __qgroup_excl_accounting() Qu Wenruo
2023-08-30 10:37 ` [PATCH 5/5] btrfs: qgroup: use qgroup_iterator facility to replace @tmp ulist of qgroup_update_refcnt() Qu Wenruo
2023-08-30 22:09   ` Boris Burkov
2023-08-30 22:55     ` Qu Wenruo
2023-09-05 13:07   ` David Sterba
2023-08-30 22:06 ` [PATCH 0/5] btrfs: qgroup: reduce GFP_ATOMIC usage for ulist Boris Burkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230830215848.GA834639@zen \
    --to=boris@bur.io \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=wqu@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox