public inbox for linux-ext4@vger.kernel.org
 help / color / mirror / Atom feed
From: Ritesh Harjani <ritesh.list@gmail.com>
To: Jan Kara <jack@suse.cz>
Cc: Ted Tso <tytso@mit.edu>, linux-ext4@vger.kernel.org
Subject: Re: [PATCH 10/10] mbcache: Automatically delete entries from cache on freeing
Date: Thu, 14 Jul 2022 18:39:51 +0530	[thread overview]
Message-ID: <20220714130951.2fc4tjrc53b6vzla@riteshh-domain> (raw)
In-Reply-To: <20220712105436.32204-10-jack@suse.cz>

On 22/07/12 12:54PM, Jan Kara wrote:
> Use the fact that entries with elevated refcount are not removed from

The elevated refcnt means >= 2?

> the hash and just move removal of the entry from the hash to the entry
> freeing time. When doing this we also change the generic code to hold
> one reference to the cache entry, not two of them, which makes code
> somewhat more obvious.
>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/mbcache.c            | 108 +++++++++++++++-------------------------
>  include/linux/mbcache.h |  24 ++++++---
>  2 files changed, 55 insertions(+), 77 deletions(-)
>
> diff --git a/fs/mbcache.c b/fs/mbcache.c
> index d1ebb5df2856..96f1d49d30a5 100644
> --- a/fs/mbcache.c
> +++ b/fs/mbcache.c
> @@ -90,7 +90,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
>  		return -ENOMEM;
>
>  	INIT_LIST_HEAD(&entry->e_list);
> -	/* One ref for hash, one ref returned */
> +	/* Initial hash reference */
>  	atomic_set(&entry->e_refcnt, 1);
>  	entry->e_key = key;
>  	entry->e_value = value;
> @@ -106,21 +106,28 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
>  		}
>  	}
>  	hlist_bl_add_head(&entry->e_hash_list, head);
> -	hlist_bl_unlock(head);
> -
> +	/*
> +	 * Add entry to LRU list before it can be found by
> +	 * mb_cache_entry_delete() to avoid races
> +	 */

No reference to mb_cache_entry_delete() now. It is
mb_cache_entry_delete_or_get()

>  	spin_lock(&cache->c_list_lock);
>  	list_add_tail(&entry->e_list, &cache->c_list);
> -	/* Grab ref for LRU list */
> -	atomic_inc(&entry->e_refcnt);
>  	cache->c_entry_count++;
>  	spin_unlock(&cache->c_list_lock);
> +	hlist_bl_unlock(head);
>
>  	return 0;
>  }
>  EXPORT_SYMBOL(mb_cache_entry_create);
>
> -void __mb_cache_entry_free(struct mb_cache_entry *entry)
> +void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
>  {
> +	struct hlist_bl_head *head;
> +
> +	head = mb_cache_entry_head(cache, entry->e_key);
> +	hlist_bl_lock(head);
> +	hlist_bl_del(&entry->e_hash_list);
> +	hlist_bl_unlock(head);
>  	kmem_cache_free(mb_entry_cache, entry);
>  }
>  EXPORT_SYMBOL(__mb_cache_entry_free);
> @@ -134,7 +141,7 @@ EXPORT_SYMBOL(__mb_cache_entry_free);
>   */
>  void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
>  {
> -	wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
> +	wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
>  }
>  EXPORT_SYMBOL(mb_cache_entry_wait_unused);
>
> @@ -155,10 +162,9 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
>  	while (node) {
>  		entry = hlist_bl_entry(node, struct mb_cache_entry,
>  				       e_hash_list);
> -		if (entry->e_key == key && entry->e_reusable) {
> -			atomic_inc(&entry->e_refcnt);
> +		if (entry->e_key == key && entry->e_reusable &&
> +		    atomic_inc_not_zero(&entry->e_refcnt))
>  			goto out;
> -		}
>  		node = node->next;
>  	}
>  	entry = NULL;
> @@ -218,10 +224,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
>  	head = mb_cache_entry_head(cache, key);
>  	hlist_bl_lock(head);
>  	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
> -		if (entry->e_key == key && entry->e_value == value) {
> -			atomic_inc(&entry->e_refcnt);
> +		if (entry->e_key == key && entry->e_value == value &&
> +		    atomic_inc_not_zero(&entry->e_refcnt))
>  			goto out;
> -		}
>  	}
>  	entry = NULL;
>  out:
> @@ -244,37 +249,25 @@ EXPORT_SYMBOL(mb_cache_entry_get);
>  struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
>  						    u32 key, u64 value)
>  {
> -	struct hlist_bl_node *node;
> -	struct hlist_bl_head *head;
>  	struct mb_cache_entry *entry;
>
> -	head = mb_cache_entry_head(cache, key);
> -	hlist_bl_lock(head);
> -	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
> -		if (entry->e_key == key && entry->e_value == value) {
> -			if (atomic_read(&entry->e_refcnt) > 2) {
> -				atomic_inc(&entry->e_refcnt);
> -				hlist_bl_unlock(head);
> -				return entry;
> -			}
> -			/* We keep hash list reference to keep entry alive */
> -			hlist_bl_del_init(&entry->e_hash_list);
> -			hlist_bl_unlock(head);
> -			spin_lock(&cache->c_list_lock);
> -			if (!list_empty(&entry->e_list)) {
> -				list_del_init(&entry->e_list);
> -				if (!WARN_ONCE(cache->c_entry_count == 0,
> -		"mbcache: attempt to decrement c_entry_count past zero"))
> -					cache->c_entry_count--;
> -				atomic_dec(&entry->e_refcnt);
> -			}
> -			spin_unlock(&cache->c_list_lock);
> -			mb_cache_entry_put(cache, entry);
> -			return NULL;
> -		}
> -	}
> -	hlist_bl_unlock(head);
> +	entry = mb_cache_entry_get(cache, key, value);
> +	if (!entry)
> +		return NULL;
> +
> +	/*
> +	 * Drop the ref we got from mb_cache_entry_get() and the initial hash
> +	 * ref if we are the last user
> +	 */
> +	if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
> +		return entry;
>
> +	spin_lock(&cache->c_list_lock);
> +	if (!list_empty(&entry->e_list))
> +		list_del_init(&entry->e_list);
> +	cache->c_entry_count--;
> +	spin_unlock(&cache->c_list_lock);
> +	__mb_cache_entry_free(cache, entry);
>  	return NULL;
>  }
>  EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
> @@ -306,42 +299,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
>  				     unsigned long nr_to_scan)
>  {
>  	struct mb_cache_entry *entry;
> -	struct hlist_bl_head *head;
>  	unsigned long shrunk = 0;
>
>  	spin_lock(&cache->c_list_lock);
>  	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
>  		entry = list_first_entry(&cache->c_list,
>  					 struct mb_cache_entry, e_list);
> -		if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
> +		/* Drop initial hash reference if there is no user */
> +		if (entry->e_referenced ||
> +		    atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {

So here if the refcnt of an entry is 1. That means it is still in use right.
So the shrinker will do the atomic_cmpxchg and make it to 0. And then
delete the entry from the cache?
This will only happen for entry with just 1 reference count.

Is that correct understanding?

-ritesh


>  			entry->e_referenced = 0;
>  			list_move_tail(&entry->e_list, &cache->c_list);
>  			continue;
>  		}
>  		list_del_init(&entry->e_list);
>  		cache->c_entry_count--;
> -		/*
> -		 * We keep LRU list reference so that entry doesn't go away
> -		 * from under us.
> -		 */
>  		spin_unlock(&cache->c_list_lock);
> -		head = mb_cache_entry_head(cache, entry->e_key);
> -		hlist_bl_lock(head);
> -		/* Now a reliable check if the entry didn't get used... */
> -		if (atomic_read(&entry->e_refcnt) > 2) {
> -			hlist_bl_unlock(head);
> -			spin_lock(&cache->c_list_lock);
> -			list_add_tail(&entry->e_list, &cache->c_list);
> -			cache->c_entry_count++;
> -			continue;
> -		}
> -		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
> -			hlist_bl_del_init(&entry->e_hash_list);
> -			atomic_dec(&entry->e_refcnt);
> -		}
> -		hlist_bl_unlock(head);
> -		if (mb_cache_entry_put(cache, entry))
> -			shrunk++;
> +		__mb_cache_entry_free(cache, entry);
> +		shrunk++;
>  		cond_resched();
>  		spin_lock(&cache->c_list_lock);
>  	}
> @@ -433,11 +408,6 @@ void mb_cache_destroy(struct mb_cache *cache)
>  	 * point.
>  	 */
>  	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
> -		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
> -			hlist_bl_del_init(&entry->e_hash_list);
> -			atomic_dec(&entry->e_refcnt);
> -		} else
> -			WARN_ON(1);
>  		list_del(&entry->e_list);
>  		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
>  		mb_cache_entry_put(cache, entry);
> diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
> index 452b579856d4..2da63fd7b98f 100644
> --- a/include/linux/mbcache.h
> +++ b/include/linux/mbcache.h
> @@ -13,8 +13,16 @@ struct mb_cache;
>  struct mb_cache_entry {
>  	/* List of entries in cache - protected by cache->c_list_lock */
>  	struct list_head	e_list;
> -	/* Hash table list - protected by hash chain bitlock */
> +	/*
> +	 * Hash table list - protected by hash chain bitlock. The entry is
> +	 * guaranteed to be hashed while e_refcnt > 0.
> +	 */
>  	struct hlist_bl_node	e_hash_list;
> +	/*
> +	 * Entry refcount. Once it reaches zero, entry is unhashed and freed.
> +	 * While refcount > 0, the entry is guaranteed to stay in the hash and
> +	 * e.g. mb_cache_entry_try_delete() will fail.
> +	 */
>  	atomic_t		e_refcnt;
>  	/* Key in hash - stable during lifetime of the entry */
>  	u32			e_key;
> @@ -29,20 +37,20 @@ void mb_cache_destroy(struct mb_cache *cache);
>
>  int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
>  			  u64 value, bool reusable);
> -void __mb_cache_entry_free(struct mb_cache_entry *entry);
> +void __mb_cache_entry_free(struct mb_cache *cache,
> +			   struct mb_cache_entry *entry);
>  void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
> -static inline int mb_cache_entry_put(struct mb_cache *cache,
> -				     struct mb_cache_entry *entry)
> +static inline void mb_cache_entry_put(struct mb_cache *cache,
> +				      struct mb_cache_entry *entry)
>  {
>  	unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
>
>  	if (cnt > 0) {
> -		if (cnt <= 3)
> +		if (cnt <= 2)
>  			wake_up_var(&entry->e_refcnt);
> -		return 0;
> +		return;
>  	}
> -	__mb_cache_entry_free(entry);
> -	return 1;
> +	__mb_cache_entry_free(cache, entry);
>  }
>
>  struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
> --
> 2.35.3
>

  reply	other threads:[~2022-07-14 13:10 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-12 10:54 [PATCH 0/10 v3] ext4: Fix possible fs corruption due to xattr races Jan Kara
2022-07-12 10:54 ` [PATCH 01/10] mbcache: Don't reclaim used entries Jan Kara
2022-07-14 11:47   ` Ritesh Harjani
2022-07-14 14:36     ` Jan Kara
2022-07-14 14:49       ` Ritesh Harjani
2022-07-22 13:58   ` Theodore Ts'o
2022-07-12 10:54 ` [PATCH 02/10] mbcache: Add functions to delete entry if unused Jan Kara
2022-07-14 12:15   ` Ritesh Harjani
2022-07-14 14:49     ` Jan Kara
2022-07-14 15:00       ` Ritesh Harjani
2022-07-12 10:54 ` [PATCH 03/10] ext4: Remove EA inode entry from mbcache on inode eviction Jan Kara
2022-07-12 10:54 ` [PATCH 04/10] ext4: Unindent codeblock in ext4_xattr_block_set() Jan Kara
2022-07-14 12:19   ` Ritesh Harjani
2022-07-12 10:54 ` [PATCH 05/10] ext4: Fix race when reusing xattr blocks Jan Kara
2022-07-14 12:26   ` Ritesh Harjani
2022-07-16  3:00   ` Zhihao Cheng
2022-07-25 15:23     ` Jan Kara
2022-07-26  1:14       ` Zhihao Cheng
2022-07-12 10:54 ` [PATCH 06/10] ext2: Factor our freeing of xattr block reference Jan Kara
2022-07-14 12:37   ` Ritesh Harjani
2022-07-14 14:55     ` Jan Kara
2022-07-14 16:17       ` Ritesh Harjani
2022-07-12 10:54 ` [PATCH 07/10] ext2: Unindent codeblock in ext2_xattr_set() Jan Kara
2022-07-14 12:38   ` Ritesh Harjani
2022-07-12 10:54 ` [PATCH 08/10] ext2: Avoid deleting xattr block that is being reused Jan Kara
2022-07-12 10:54 ` [PATCH 09/10] mbcache: Remove mb_cache_entry_delete() Jan Kara
2022-07-12 10:54 ` [PATCH 10/10] mbcache: Automatically delete entries from cache on freeing Jan Kara
2022-07-14 13:09   ` Ritesh Harjani [this message]
2022-07-14 15:05     ` Jan Kara
2022-07-12 12:47 ` [PATCH 0/10 v3] ext4: Fix possible fs corruption due to xattr races Ritesh Harjani
  -- strict thread matches above, loose matches on Subject: below --
2022-06-14 16:05 [PATCH 0/10 v2] " Jan Kara
2022-06-14 16:05 ` [PATCH 10/10] mbcache: Automatically delete entries from cache on freeing Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220714130951.2fc4tjrc53b6vzla@riteshh-domain \
    --to=ritesh.list@gmail.com \
    --cc=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox