From: chrisl@kernel.org
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Kairui Song <kasong@tencent.com>, Hugh Dickins <hughd@google.com>,
Ryan Roberts <ryan.roberts@arm.com>,
"Huang, Ying" <ying.huang@intel.com>,
Kalesh Singh <kaleshsingh@google.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Chris Li <chrisl@kernel.org>, Barry Song <baohua@kernel.org>
Subject: [PATCH v5 5/9] mm: swap: skip slot cache on freeing for mTHP
Date: Tue, 30 Jul 2024 23:49:17 -0700 [thread overview]
Message-ID: <20240730-swap-allocator-v5-5-cb9c148b9297@kernel.org> (raw)
In-Reply-To: <20240730-swap-allocator-v5-0-cb9c148b9297@kernel.org>
From: Kairui Song <kasong@tencent.com>
Currently when we are freeing mTHP folios from swap cache, we free
then one by one and put each entry into swap slot cache. Slot
cache is designed to reduce the overhead by batching the freeing,
but mTHP swap entries are already continuous so they can be batch
freed without it already, it saves litle overhead, or even increase
overhead for larger mTHP.
What's more, mTHP entries could stay in swap cache for a while.
Contiguous swap entry is an rather rare resource so releasing them
directly can help improve mTHP allocation success rate when under
pressure.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/swapfile.c | 59 ++++++++++++++++++++++++++---------------------------------
1 file changed, 26 insertions(+), 33 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 34e6ea13e8e4..9b63b2262cc2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -479,20 +479,21 @@ static void inc_cluster_info_page(struct swap_info_struct *p,
}
/*
- * The cluster ci decreases one usage. If the usage counter becomes 0,
+ * The cluster ci decreases @nr_pages usage. If the usage counter becomes 0,
* which means no page in the cluster is in use, we can optionally discard
* the cluster and add it to free cluster list.
*/
-static void dec_cluster_info_page(struct swap_info_struct *p, struct swap_cluster_info *ci)
+static void dec_cluster_info_page(struct swap_info_struct *p,
+ struct swap_cluster_info *ci, int nr_pages)
{
if (!p->cluster_info)
return;
- VM_BUG_ON(ci->count == 0);
+ VM_BUG_ON(ci->count < nr_pages);
VM_BUG_ON(cluster_is_free(ci));
lockdep_assert_held(&p->lock);
lockdep_assert_held(&ci->lock);
- ci->count--;
+ ci->count -= nr_pages;
if (!ci->count) {
free_cluster(p, ci);
@@ -998,19 +999,6 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
return n_ret;
}
-static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
-{
- unsigned long offset = idx * SWAPFILE_CLUSTER;
- struct swap_cluster_info *ci;
-
- ci = lock_cluster(si, offset);
- memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER);
- ci->count = 0;
- free_cluster(si, ci);
- unlock_cluster(ci);
- swap_range_free(si, offset, SWAPFILE_CLUSTER);
-}
-
int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
{
int order = swap_entry_order(entry_order);
@@ -1269,21 +1257,28 @@ static unsigned char __swap_entry_free(struct swap_info_struct *p,
return usage;
}
-static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
+/*
+ * Drop the last HAS_CACHE flag of swap entries, caller have to
+ * ensure all entries belong to the same cgroup.
+ */
+static void swap_entry_range_free(struct swap_info_struct *p, swp_entry_t entry,
+ unsigned int nr_pages)
{
- struct swap_cluster_info *ci;
unsigned long offset = swp_offset(entry);
- unsigned char count;
+ unsigned char *map = p->swap_map + offset;
+ unsigned char *map_end = map + nr_pages;
+ struct swap_cluster_info *ci;
ci = lock_cluster(p, offset);
- count = p->swap_map[offset];
- VM_BUG_ON(count != SWAP_HAS_CACHE);
- p->swap_map[offset] = 0;
- dec_cluster_info_page(p, ci);
+ do {
+ VM_BUG_ON(*map != SWAP_HAS_CACHE);
+ *map = 0;
+ } while (++map < map_end);
+ dec_cluster_info_page(p, ci, nr_pages);
unlock_cluster(ci);
- mem_cgroup_uncharge_swap(entry, 1);
- swap_range_free(p, offset, 1);
+ mem_cgroup_uncharge_swap(entry, nr_pages);
+ swap_range_free(p, offset, nr_pages);
}
static void cluster_swap_free_nr(struct swap_info_struct *sis,
@@ -1343,7 +1338,6 @@ void swap_free_nr(swp_entry_t entry, int nr_pages)
void put_swap_folio(struct folio *folio, swp_entry_t entry)
{
unsigned long offset = swp_offset(entry);
- unsigned long idx = offset / SWAPFILE_CLUSTER;
struct swap_cluster_info *ci;
struct swap_info_struct *si;
unsigned char *map;
@@ -1356,19 +1350,18 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
return;
ci = lock_cluster_or_swap_info(si, offset);
- if (size == SWAPFILE_CLUSTER) {
+ if (size > 1) {
map = si->swap_map + offset;
- for (i = 0; i < SWAPFILE_CLUSTER; i++) {
+ for (i = 0; i < size; i++) {
val = map[i];
VM_BUG_ON(!(val & SWAP_HAS_CACHE));
if (val == SWAP_HAS_CACHE)
free_entries++;
}
- if (free_entries == SWAPFILE_CLUSTER) {
+ if (free_entries == size) {
unlock_cluster_or_swap_info(si, ci);
spin_lock(&si->lock);
- mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
- swap_free_cluster(si, idx);
+ swap_entry_range_free(si, entry, size);
spin_unlock(&si->lock);
return;
}
@@ -1413,7 +1406,7 @@ void swapcache_free_entries(swp_entry_t *entries, int n)
for (i = 0; i < n; ++i) {
p = swap_info_get_cont(entries[i], prev);
if (p)
- swap_entry_free(p, entries[i]);
+ swap_entry_range_free(p, entries[i], 1);
prev = p;
}
if (p)
--
2.46.0.rc1.232.g9752f9e123-goog
next prev parent reply other threads:[~2024-07-31 6:49 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-31 6:49 [PATCH v5 0/9] mm: swap: mTHP swap allocator base on swap cluster order Chris Li
2024-07-31 6:49 ` [PATCH v5 1/9] mm: swap: swap cluster switch to double link list Chris Li
2024-07-31 6:49 ` [PATCH v5 2/9] mm: swap: mTHP allocate swap entries from nonfull list Chris Li
[not found] ` <87bk23250r.fsf@yhuang6-desk2.ccr.corp.intel.com>
2024-08-16 8:01 ` Chris Li
2024-08-19 8:08 ` Huang, Ying
2024-08-26 21:26 ` Chris Li
2024-09-09 7:19 ` Huang, Ying
2024-07-31 6:49 ` [PATCH v5 3/9] mm: swap: separate SSD allocation from scan_swap_map_slots() Chris Li
2024-07-31 6:49 ` [PATCH v5 4/9] mm: swap: clean up initialization helper chrisl
2024-07-31 6:49 ` chrisl [this message]
2024-08-03 9:11 ` [PATCH v5 5/9] mm: swap: skip slot cache on freeing for mTHP Barry Song
2024-08-03 10:57 ` Barry Song
2024-07-31 6:49 ` [PATCH v5 6/9] mm: swap: allow cache reclaim to skip slot cache chrisl
2024-08-03 10:38 ` Barry Song
2024-08-03 12:18 ` Kairui Song
2024-08-04 18:06 ` Chris Li
2024-08-05 1:53 ` Barry Song
2024-07-31 6:49 ` [PATCH v5 7/9] mm: swap: add a fragment cluster list chrisl
2024-07-31 6:49 ` [PATCH v5 8/9] mm: swap: relaim the cached parts that got scanned chrisl
2024-07-31 6:49 ` [PATCH v5 9/9] mm: swap: add a adaptive full cluster cache reclaim chrisl
2024-08-01 9:14 ` [PATCH v5 0/9] mm: swap: mTHP swap allocator base on swap cluster order David Hildenbrand
2024-08-01 9:59 ` Kairui Song
2024-08-01 10:06 ` Kairui Song
[not found] ` <87le17z9zr.fsf@yhuang6-desk2.ccr.corp.intel.com>
2024-08-16 7:36 ` Chris Li
2024-08-17 17:47 ` Kairui Song
[not found] ` <87h6bw3gxl.fsf@yhuang6-desk2.ccr.corp.intel.com>
[not found] ` <CACePvbXH8b9SOePQ-Ld_UBbcAdJ3gdYtEkReMto5Hbq9WAL7JQ@mail.gmail.com>
[not found] ` <87sevfza3w.fsf@yhuang6-desk2.ccr.corp.intel.com>
2024-08-16 7:47 ` Chris Li
2024-08-18 16:59 ` Kairui Song
2024-08-19 8:27 ` Huang, Ying
2024-08-19 8:47 ` Kairui Song
2024-08-19 21:27 ` Chris Li
2024-08-19 8:39 ` Huang, Ying
2024-09-02 1:20 ` Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240730-swap-allocator-v5-5-cb9c148b9297@kernel.org \
--to=chrisl@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ryan.roberts@arm.com \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).