From: Baoquan He <bhe@redhat.com>
To: Kairui Song <kasong@tencent.com>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
Chris Li <chrisl@kernel.org>, Barry Song <v-songbaohua@oppo.com>,
Ryan Roberts <ryan.roberts@arm.com>,
Hugh Dickins <hughd@google.com>,
Yosry Ahmed <yosryahmed@google.com>,
"Huang, Ying" <ying.huang@linux.alibaba.com>,
Nhat Pham <nphamcs@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Kalesh Singh <kaleshsingh@google.com>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 04/13] mm, swap: use cluster lock for HDD
Date: Thu, 9 Jan 2025 12:07:43 +0800 [thread overview]
Message-ID: <Z39Lj5Vr4sfb0IlA@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20241230174621.61185-5-ryncsn@gmail.com>
On 12/31/24 at 01:46am, Kairui Song wrote:
> From: Kairui Song <kasong@tencent.com>
>
> Cluster lock (ci->lock) was introduce to reduce contention for certain
~~~~~~~~~ typo, introduced.
> operations. Using cluster lock for HDD is not helpful as HDD have a poor
> performance, so locking isn't the bottleneck. But having different set
> of locks for HDD / non-HDD prevents further rework of device lock
> (si->lock).
>
> This commit just changed all lock_cluster_or_swap_info to lock_cluster,
> which is a safe and straight conversion since cluster info is always
> allocated now, also removed all cluster_info related checks.
>
> Suggested-by: Chris Li <chrisl@kernel.org>
> Signed-off-by: Kairui Song <kasong@tencent.com>
> ---
> mm/swapfile.c | 107 ++++++++++++++++----------------------------------
> 1 file changed, 34 insertions(+), 73 deletions(-)
Other than the nit in patch log, LGTM,
Reviewed-by: Baoquan He <bhe@redhat.com>
>
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index fca58d43b836..d0e5b9fa0c48 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -58,10 +58,9 @@ static void swap_entry_range_free(struct swap_info_struct *si, swp_entry_t entry
> static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
> unsigned int nr_entries);
> static bool folio_swapcache_freeable(struct folio *folio);
> -static struct swap_cluster_info *lock_cluster_or_swap_info(
> - struct swap_info_struct *si, unsigned long offset);
> -static void unlock_cluster_or_swap_info(struct swap_info_struct *si,
> - struct swap_cluster_info *ci);
> +static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
> + unsigned long offset);
> +static void unlock_cluster(struct swap_cluster_info *ci);
>
> static DEFINE_SPINLOCK(swap_lock);
> static unsigned int nr_swapfiles;
> @@ -222,9 +221,9 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
> * swap_map is HAS_CACHE only, which means the slots have no page table
> * reference or pending writeback, and can't be allocated to others.
> */
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
> need_reclaim = swap_is_has_cache(si, offset, nr_pages);
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> if (!need_reclaim)
> goto out_unlock;
>
> @@ -404,45 +403,15 @@ static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si
> {
> struct swap_cluster_info *ci;
>
> - ci = si->cluster_info;
> - if (ci) {
> - ci += offset / SWAPFILE_CLUSTER;
> - spin_lock(&ci->lock);
> - }
> - return ci;
> -}
> -
> -static inline void unlock_cluster(struct swap_cluster_info *ci)
> -{
> - if (ci)
> - spin_unlock(&ci->lock);
> -}
> -
> -/*
> - * Determine the locking method in use for this device. Return
> - * swap_cluster_info if SSD-style cluster-based locking is in place.
> - */
> -static inline struct swap_cluster_info *lock_cluster_or_swap_info(
> - struct swap_info_struct *si, unsigned long offset)
> -{
> - struct swap_cluster_info *ci;
> -
> - /* Try to use fine-grained SSD-style locking if available: */
> - ci = lock_cluster(si, offset);
> - /* Otherwise, fall back to traditional, coarse locking: */
> - if (!ci)
> - spin_lock(&si->lock);
> + ci = &si->cluster_info[offset / SWAPFILE_CLUSTER];
> + spin_lock(&ci->lock);
>
> return ci;
> }
>
> -static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
> - struct swap_cluster_info *ci)
> +static inline void unlock_cluster(struct swap_cluster_info *ci)
> {
> - if (ci)
> - unlock_cluster(ci);
> - else
> - spin_unlock(&si->lock);
> + spin_unlock(&ci->lock);
> }
>
> /* Add a cluster to discard list and schedule it to do discard */
> @@ -558,9 +527,6 @@ static void inc_cluster_info_page(struct swap_info_struct *si,
> unsigned long idx = page_nr / SWAPFILE_CLUSTER;
> struct swap_cluster_info *ci;
>
> - if (!cluster_info)
> - return;
> -
> ci = cluster_info + idx;
> ci->count++;
>
> @@ -576,9 +542,6 @@ static void inc_cluster_info_page(struct swap_info_struct *si,
> static void dec_cluster_info_page(struct swap_info_struct *si,
> struct swap_cluster_info *ci, int nr_pages)
> {
> - if (!si->cluster_info)
> - return;
> -
> VM_BUG_ON(ci->count < nr_pages);
> VM_BUG_ON(cluster_is_free(ci));
> lockdep_assert_held(&si->lock);
> @@ -1007,8 +970,6 @@ static int cluster_alloc_swap(struct swap_info_struct *si,
> {
> int n_ret = 0;
>
> - VM_BUG_ON(!si->cluster_info);
> -
> si->flags += SWP_SCANNING;
>
> while (n_ret < nr) {
> @@ -1052,10 +1013,10 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
> }
>
> /*
> - * Swapfile is not block device or not using clusters so unable
> + * Swapfile is not block device so unable
> * to allocate large entries.
> */
> - if (!(si->flags & SWP_BLKDEV) || !si->cluster_info)
> + if (!(si->flags & SWP_BLKDEV))
> return 0;
> }
>
> @@ -1295,9 +1256,9 @@ static unsigned char __swap_entry_free(struct swap_info_struct *si,
> unsigned long offset = swp_offset(entry);
> unsigned char usage;
>
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
> usage = __swap_entry_free_locked(si, offset, 1);
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> if (!usage)
> free_swap_slot(entry);
>
> @@ -1320,14 +1281,14 @@ static bool __swap_entries_free(struct swap_info_struct *si,
> if (nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER)
> goto fallback;
>
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
> if (!swap_is_last_map(si, offset, nr, &has_cache)) {
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> goto fallback;
> }
> for (i = 0; i < nr; i++)
> WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
>
> if (!has_cache) {
> for (i = 0; i < nr; i++)
> @@ -1383,7 +1344,7 @@ static void cluster_swap_free_nr(struct swap_info_struct *si,
> DECLARE_BITMAP(to_free, BITS_PER_LONG) = { 0 };
> int i, nr;
>
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
> while (nr_pages) {
> nr = min(BITS_PER_LONG, nr_pages);
> for (i = 0; i < nr; i++) {
> @@ -1391,18 +1352,18 @@ static void cluster_swap_free_nr(struct swap_info_struct *si,
> bitmap_set(to_free, i, 1);
> }
> if (!bitmap_empty(to_free, BITS_PER_LONG)) {
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> for_each_set_bit(i, to_free, BITS_PER_LONG)
> free_swap_slot(swp_entry(si->type, offset + i));
> if (nr == nr_pages)
> return;
> bitmap_clear(to_free, 0, BITS_PER_LONG);
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
> }
> offset += nr;
> nr_pages -= nr;
> }
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> }
>
> /*
> @@ -1441,9 +1402,9 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
> if (!si)
> return;
>
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
> if (size > 1 && swap_is_has_cache(si, offset, size)) {
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> spin_lock(&si->lock);
> swap_entry_range_free(si, entry, size);
> spin_unlock(&si->lock);
> @@ -1451,14 +1412,14 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
> }
> for (int i = 0; i < size; i++, entry.val++) {
> if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> free_swap_slot(entry);
> if (i == size - 1)
> return;
> - lock_cluster_or_swap_info(si, offset);
> + lock_cluster(si, offset);
> }
> }
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> }
>
> static int swp_entry_cmp(const void *ent1, const void *ent2)
> @@ -1522,9 +1483,9 @@ int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
> struct swap_cluster_info *ci;
> int count;
>
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
> count = swap_count(si->swap_map[offset]);
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> return count;
> }
>
> @@ -1547,7 +1508,7 @@ int swp_swapcount(swp_entry_t entry)
>
> offset = swp_offset(entry);
>
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
>
> count = swap_count(si->swap_map[offset]);
> if (!(count & COUNT_CONTINUED))
> @@ -1570,7 +1531,7 @@ int swp_swapcount(swp_entry_t entry)
> n *= (SWAP_CONT_MAX + 1);
> } while (tmp_count & COUNT_CONTINUED);
> out:
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> return count;
> }
>
> @@ -1585,8 +1546,8 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
> int i;
> bool ret = false;
>
> - ci = lock_cluster_or_swap_info(si, offset);
> - if (!ci || nr_pages == 1) {
> + ci = lock_cluster(si, offset);
> + if (nr_pages == 1) {
> if (swap_count(map[roffset]))
> ret = true;
> goto unlock_out;
> @@ -1598,7 +1559,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
> }
> }
> unlock_out:
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> return ret;
> }
>
> @@ -3428,7 +3389,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
> offset = swp_offset(entry);
> VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
> VM_WARN_ON(usage == 1 && nr > 1);
> - ci = lock_cluster_or_swap_info(si, offset);
> + ci = lock_cluster(si, offset);
>
> err = 0;
> for (i = 0; i < nr; i++) {
> @@ -3483,7 +3444,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
> }
>
> unlock_out:
> - unlock_cluster_or_swap_info(si, ci);
> + unlock_cluster(ci);
> return err;
> }
>
> --
> 2.47.1
>
>
next prev parent reply other threads:[~2025-01-09 4:08 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-30 17:46 [PATCH v3 00/13] mm, swap: rework of swap allocator locks Kairui Song
2024-12-30 17:46 ` [PATCH v3 01/13] mm, swap: minor clean up for swap entry allocation Kairui Song
2025-01-09 4:04 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 02/13] mm, swap: fold swap_info_get_cont in the only caller Kairui Song
2025-01-09 4:05 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 03/13] mm, swap: remove old allocation path for HDD Kairui Song
2025-01-09 4:06 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 04/13] mm, swap: use cluster lock " Kairui Song
2025-01-09 4:07 ` Baoquan He [this message]
2024-12-30 17:46 ` [PATCH v3 05/13] mm, swap: clean up device availability check Kairui Song
2025-01-09 4:08 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 06/13] mm, swap: clean up plist removal and adding Kairui Song
2025-01-02 8:59 ` Baoquan He
2025-01-03 8:07 ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 07/13] mm, swap: hold a reference during scan and cleanup flag usage Kairui Song
2025-01-04 5:46 ` Baoquan He
2025-01-13 5:34 ` Kairui Song
2025-01-20 2:39 ` Baoquan He
2025-01-27 9:19 ` Kairui Song
2025-02-05 9:18 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 08/13] mm, swap: use an enum to define all cluster flags and wrap flags changes Kairui Song
2025-01-06 8:43 ` Baoquan He
2025-01-13 5:49 ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 09/13] mm, swap: reduce contention on device lock Kairui Song
2025-01-06 10:12 ` Baoquan He
2025-01-08 11:09 ` Baoquan He
2025-01-09 2:15 ` Kairui Song
2025-01-10 11:23 ` Baoquan He
2025-01-13 6:33 ` Kairui Song
2025-01-13 8:07 ` Kairui Song
2024-12-30 17:46 ` [PATCH v3 10/13] mm, swap: simplify percpu cluster updating Kairui Song
2025-01-09 2:07 ` Baoquan He
2024-12-30 17:46 ` [PATCH v3 11/13] mm, swap: introduce a helper for retrieving cluster from offset Kairui Song
2024-12-30 17:46 ` [PATCH v3 12/13] mm, swap: use a global swap cluster for non-rotation devices Kairui Song
2024-12-30 17:46 ` [PATCH v3 13/13] mm, swap_slots: remove slot cache for freeing path Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Z39Lj5Vr4sfb0IlA@MiWiFi-R3L-srv \
--to=bhe@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=chrisl@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=ryan.roberts@arm.com \
--cc=v-songbaohua@oppo.com \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.