Re: [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast path

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Baoquan He <bhe@redhat.com>
To: Kairui Song <kasong@tencent.com>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	Chris Li <chrisl@kernel.org>, Barry Song <v-songbaohua@oppo.com>,
	Hugh Dickins <hughd@google.com>,
	Yosry Ahmed <yosryahmed@google.com>,
	"Huang, Ying" <ying.huang@linux.alibaba.com>,
	Nhat Pham <nphamcs@gmail.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	Kalesh Singh <kaleshsingh@google.com>,
	Matthew Wilcox <willy@infradead.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast path
Date: Tue, 25 Feb 2025 12:01:32 +0800	[thread overview]
Message-ID: <Z71AnDX+LQuXhY1Y@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20250224180212.22802-6-ryncsn@gmail.com>

On 02/25/25 at 02:02am, Kairui Song wrote:
......snip...
> @@ -1204,19 +1252,36 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
>  	int n_ret = 0;
>  	int node;
>  
> +	/* Fast path using percpu cluster */
> +	local_lock(&percpu_swap_cluster.lock);
> +	n_ret = swap_alloc_fast(swp_entries,
> +				SWAP_HAS_CACHE,
> +				order, n_goal);

Since we always pass in the SWAP_HAS_CACHE for the 2nd parameter, we can
hard code it inside fucntion swap_alloc_fast(), unless there will be
other callsite where different value is passed in for 'usage'. And the
function 2nd parameter of swap_alloc_slow() too. Anyway, it's not a big
deal, could be my personal peference.

Other than this nit, this looks good to me.

> +	if (n_ret == n_goal)
> +		goto out;
> +
> +	n_goal = min_t(int, n_goal - n_ret, SWAP_BATCH);
> +	/* Rotate the device and switch to a new cluster */
>  	spin_lock(&swap_avail_lock);
>  start_over:
>  	node = numa_node_id();
>  	plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
> -		/* requeue si to after same-priority siblings */
>  		plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
>  		spin_unlock(&swap_avail_lock);
>  		if (get_swap_device_info(si)) {
> -			n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
> -					n_goal, swp_entries, order);
> +			/*
> +			 * For order 0 allocation, try best to fill the request
> +			 * as it's used by slot cache.
> +			 *
> +			 * For mTHP allocation, it always have n_goal == 1,
> +			 * and falling a mTHP swapin will just make the caller
> +			 * fallback to order 0 allocation, so just bail out.
> +			 */
> +			n_ret += scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal,
> +					swp_entries + n_ret, order);
>  			put_swap_device(si);
>  			if (n_ret || size > 1)
> -				goto check_out;
> +				goto out;
>  		}
>  
>  		spin_lock(&swap_avail_lock);
> @@ -1234,12 +1299,10 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
>  		if (plist_node_empty(&next->avail_lists[node]))
>  			goto start_over;
>  	}
> -
>  	spin_unlock(&swap_avail_lock);
> -
> -check_out:
> +out:
> +	local_unlock(&percpu_swap_cluster.lock);
>  	atomic_long_sub(n_ret * size, &nr_swap_pages);
> -
>  	return n_ret;
>  }
>  
> @@ -2725,8 +2788,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
>  	arch_swap_invalidate_area(p->type);
>  	zswap_swapoff(p->type);
>  	mutex_unlock(&swapon_mutex);
> -	free_percpu(p->percpu_cluster);
> -	p->percpu_cluster = NULL;
>  	kfree(p->global_cluster);
>  	p->global_cluster = NULL;
>  	vfree(swap_map);
> @@ -3125,7 +3186,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
>  	unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
>  	struct swap_cluster_info *cluster_info;
>  	unsigned long i, j, idx;
> -	int cpu, err = -ENOMEM;
> +	int err = -ENOMEM;
>  
>  	cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL);
>  	if (!cluster_info)
> @@ -3134,20 +3195,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
>  	for (i = 0; i < nr_clusters; i++)
>  		spin_lock_init(&cluster_info[i].lock);
>  
> -	if (si->flags & SWP_SOLIDSTATE) {
> -		si->percpu_cluster = alloc_percpu(struct percpu_cluster);
> -		if (!si->percpu_cluster)
> -			goto err_free;
> -
> -		for_each_possible_cpu(cpu) {
> -			struct percpu_cluster *cluster;
> -
> -			cluster = per_cpu_ptr(si->percpu_cluster, cpu);
> -			for (i = 0; i < SWAP_NR_ORDERS; i++)
> -				cluster->next[i] = SWAP_ENTRY_INVALID;
> -			local_lock_init(&cluster->lock);
> -		}
> -	} else {
> +	if (!(si->flags & SWP_SOLIDSTATE)) {
>  		si->global_cluster = kmalloc(sizeof(*si->global_cluster),
>  				     GFP_KERNEL);
>  		if (!si->global_cluster)
> @@ -3424,8 +3472,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
>  bad_swap_unlock_inode:
>  	inode_unlock(inode);
>  bad_swap:
> -	free_percpu(si->percpu_cluster);
> -	si->percpu_cluster = NULL;
>  	kfree(si->global_cluster);
>  	si->global_cluster = NULL;
>  	inode = NULL;
> -- 
> 2.48.1
>

next prev parent reply	other threads:[~2025-02-25  4:01 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-24 18:02 [PATCH v2 0/7] mm, swap: remove swap slot cache Kairui Song
2025-02-24 18:02 ` [PATCH v2 1/7] mm, swap: avoid reclaiming irrelevant swap cache Kairui Song
2025-02-24 18:02 ` [PATCH v2 2/7] mm, swap: drop the flag TTRS_DIRECT Kairui Song
2025-02-24 18:02 ` [PATCH v2 3/7] mm, swap: avoid redundant swap device pinning Kairui Song
2025-02-24 18:02 ` [PATCH v2 4/7] mm, swap: don't update the counter up-front Kairui Song
2025-02-25  6:32   ` Baoquan He
2025-02-24 18:02 ` [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast path Kairui Song
2025-02-25  4:01   ` Baoquan He [this message]
2025-02-25  6:38   ` Baoquan He
2025-03-07  2:54   ` Kairui Song
2025-02-24 18:02 ` [PATCH v2 6/7] mm, swap: remove swap slot cache Kairui Song
2025-02-25  6:39   ` Baoquan He
2025-02-24 18:02 ` [PATCH v2 7/7] mm, swap: simplify folio swap allocation Kairui Song
2025-02-25  6:43   ` Baoquan He

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z71AnDX+LQuXhY1Y@MiWiFi-R3L-srv \
    --to=bhe@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=chrisl@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kaleshsingh@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=v-songbaohua@oppo.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.