All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yosry Ahmed <yosryahmed@google.com>
To: Chengming Zhou <zhouchengming@bytedance.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	 Nhat Pham <nphamcs@gmail.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/2] mm/zswap: global lru and shrinker shared by all zswap_pools
Date: Tue, 13 Feb 2024 12:57:42 +0000	[thread overview]
Message-ID: <ZctnRnNMOwQNn_3j@google.com> (raw)
In-Reply-To: <20240210-zswap-global-lru-v1-1-853473d7b0da@bytedance.com>

On Sun, Feb 11, 2024 at 01:57:04PM +0000, Chengming Zhou wrote:
> Dynamic zswap_pool creation may create/reuse to have multiple
> zswap_pools in a list, only the first will be current used.
> 
> Each zswap_pool has its own lru and shrinker, which is not
> necessary and has its problem:
> 
> 1. When memory has pressure, all shrinker of zswap_pools will
>    try to shrink its own lru, there is no order between them.
> 
> 2. When zswap limit hit, only the last zswap_pool's shrink_work
>    will try to shrink its lru, which is inefficient.
> 
> Anyway, having a global lru and shrinker shared by all zswap_pools
> is better and efficient.

It is also a great simplification.

> 
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
>  mm/zswap.c | 153 ++++++++++++++++++++++---------------------------------------
>  1 file changed, 55 insertions(+), 98 deletions(-)
> 
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 62fe307521c9..7668db8c10e3 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -176,14 +176,17 @@ struct zswap_pool {
>  	struct kref kref;
>  	struct list_head list;
>  	struct work_struct release_work;
> -	struct work_struct shrink_work;
>  	struct hlist_node node;
>  	char tfm_name[CRYPTO_MAX_ALG_NAME];
> +};
> +
> +struct {

static?

>  	struct list_lru list_lru;
> -	struct mem_cgroup *next_shrink;
> -	struct shrinker *shrinker;

Just curious, any reason to change the relative ordering of members
here? It produces a couple more lines of diff :)

>  	atomic_t nr_stored;
> -};
> +	struct shrinker *shrinker;
> +	struct work_struct shrink_work;
> +	struct mem_cgroup *next_shrink;
> +} zswap;
>  
>  /*
>   * struct zswap_entry
> @@ -301,9 +304,6 @@ static void zswap_update_total_size(void)
>  * pool functions
>  **********************************/
>  
> -static void zswap_alloc_shrinker(struct zswap_pool *pool);
> -static void shrink_worker(struct work_struct *w);
> -
>  static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>  {
>  	int i;
> @@ -353,30 +353,16 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>  	if (ret)
>  		goto error;
>  
> -	zswap_alloc_shrinker(pool);
> -	if (!pool->shrinker)
> -		goto error;
> -
> -	pr_debug("using %s compressor\n", pool->tfm_name);
> -

Why are we removing this debug print?

>  	/* being the current pool takes 1 ref; this func expects the
>  	 * caller to always add the new pool as the current pool
>  	 */
>  	kref_init(&pool->kref);
>  	INIT_LIST_HEAD(&pool->list);
> -	if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
> -		goto lru_fail;
> -	shrinker_register(pool->shrinker);
> -	INIT_WORK(&pool->shrink_work, shrink_worker);
> -	atomic_set(&pool->nr_stored, 0);
>  
>  	zswap_pool_debug("created", pool);
>  
>  	return pool;
>  
> -lru_fail:
> -	list_lru_destroy(&pool->list_lru);
> -	shrinker_free(pool->shrinker);
>  error:
>  	if (pool->acomp_ctx)
>  		free_percpu(pool->acomp_ctx);
[..]
> @@ -816,14 +777,10 @@ void zswap_folio_swapin(struct folio *folio)
>  
>  void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
>  {
> -	struct zswap_pool *pool;
> -
> -	/* lock out zswap pools list modification */
> +	/* lock out zswap shrinker walking memcg tree */
>  	spin_lock(&zswap_pools_lock);
> -	list_for_each_entry(pool, &zswap_pools, list) {
> -		if (pool->next_shrink == memcg)
> -			pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
> -	}
> +	if (zswap.next_shrink == memcg)
> +		zswap.next_shrink = mem_cgroup_iter(NULL, zswap.next_shrink, NULL);

Now that next_shrink has nothing to do with zswap pools, it feels weird
that we are using zswap_pools_lock for its synchronization. Does it make
sense to have a separate lock for it just for semantic purposes?

>  	spin_unlock(&zswap_pools_lock);
>  }
>  
[..]
> @@ -1328,7 +1284,6 @@ static unsigned long zswap_shrinker_scan(struct shrinker *shrinker,
>  static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
>  		struct shrink_control *sc)
>  {
> -	struct zswap_pool *pool = shrinker->private_data;
>  	struct mem_cgroup *memcg = sc->memcg;
>  	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid));
>  	unsigned long nr_backing, nr_stored, nr_freeable, nr_protected;
> @@ -1343,7 +1298,7 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
>  #else
>  	/* use pool stats instead of memcg stats */
>  	nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT;

"pool" is still being used here.

> -	nr_stored = atomic_read(&pool->nr_stored);
> +	nr_stored = atomic_read(&zswap.nr_stored);
>  #endif
>  
>  	if (!nr_stored)
[..]  
> @@ -1804,6 +1749,21 @@ static int zswap_setup(void)
>  	if (ret)
>  		goto hp_fail;
>  
> +	shrink_wq = alloc_workqueue("zswap-shrink",
> +			WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
> +	if (!shrink_wq)
> +		goto hp_fail;

I think we need a new label here to call cpuhp_remove_multi_state(), but
apparently this is missing from the current code for some reason.

> +
> +	zswap.shrinker = zswap_alloc_shrinker();
> +	if (!zswap.shrinker)
> +		goto shrinker_fail;
> +	if (list_lru_init_memcg(&zswap.list_lru, zswap.shrinker))
> +		goto lru_fail;
> +	shrinker_register(zswap.shrinker);
> +
> +	INIT_WORK(&zswap.shrink_work, shrink_worker);
> +	atomic_set(&zswap.nr_stored, 0);
> +
>  	pool = __zswap_pool_create_fallback();
>  	if (pool) {
>  		pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> @@ -1815,19 +1775,16 @@ static int zswap_setup(void)
>  		zswap_enabled = false;
>  	}
>  
> -	shrink_wq = alloc_workqueue("zswap-shrink",
> -			WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
> -	if (!shrink_wq)
> -		goto fallback_fail;
> -
>  	if (zswap_debugfs_init())
>  		pr_warn("debugfs initialization failed\n");
>  	zswap_init_state = ZSWAP_INIT_SUCCEED;
>  	return 0;
>  
> -fallback_fail:
> -	if (pool)
> -		zswap_pool_destroy(pool);
> +lru_fail:
> +	list_lru_destroy(&zswap.list_lru);

Do we need to call list_lru_destroy() here? I know it is currently being
called if list_lru_init_memcg() fails, but I fail to understand why. It
seems like list_lru_destroy() will do nothing anyway.

> +	shrinker_free(zswap.shrinker);
> +shrinker_fail:
> +	destroy_workqueue(shrink_wq);
>  hp_fail:
>  	kmem_cache_destroy(zswap_entry_cache);
>  cache_fail:
> 
> -- 
> b4 0.10.1


  parent reply	other threads:[~2024-02-13 12:57 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-11 13:57 [PATCH 0/2] mm/zswap: optimize for dynamic zswap_pools Chengming Zhou
2024-02-11 13:57 ` [PATCH 1/2] mm/zswap: global lru and shrinker shared by all zswap_pools Chengming Zhou
2024-02-11 19:01   ` kernel test robot
2024-02-12 13:17     ` Chengming Zhou
2024-02-11 21:04   ` Nhat Pham
2024-02-12 13:20     ` Chengming Zhou
2024-02-11 22:05   ` kernel test robot
2024-02-13 12:57   ` Yosry Ahmed [this message]
2024-02-13 14:20     ` Chengming Zhou
2024-02-13 17:43       ` Yosry Ahmed
2024-02-11 13:57 ` [PATCH 2/2] mm/zswap: change zswap_pool kref to percpu_ref Chengming Zhou
2024-02-11 21:21   ` Nhat Pham
2024-02-12 13:29     ` Chengming Zhou
2024-02-12 18:53       ` Nhat Pham
2024-02-13 14:22         ` Chengming Zhou
2024-02-12 22:42   ` Yosry Ahmed
2024-02-13 14:31     ` Chengming Zhou
2024-02-13 17:45       ` Yosry Ahmed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZctnRnNMOwQNn_3j@google.com \
    --to=yosryahmed@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=zhouchengming@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.