From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Matthew Wilcox <willy@infradead.org>,
Hugh Dickins <hughd@google.com>, Chris Li <chrisl@kernel.org>,
David Hildenbrand <david@redhat.com>,
Yosry Ahmed <yosryahmed@google.com>,
"Huang, Ying" <ying.huang@linux.alibaba.com>,
Nhat Pham <nphamcs@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Baoquan He <bhe@redhat.com>, Barry Song <baohua@kernel.org>,
Kalesh Singh <kaleshsingh@google.com>,
Kemeng Shi <shikemeng@huaweicloud.com>,
Tim Chen <tim.c.chen@linux.intel.com>,
Ryan Roberts <ryan.roberts@arm.com>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 24/28] mm, swap: implement helpers for reserving data in swap table
Date: Thu, 15 May 2025 04:17:24 +0800 [thread overview]
Message-ID: <20250514201729.48420-25-ryncsn@gmail.com> (raw)
In-Reply-To: <20250514201729.48420-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
To prepare for using swap table as the unified swap layer, introduce
macro and helpers for storing multiple kind of data in an swap table
entry.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/swap_table.h | 130 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 119 insertions(+), 11 deletions(-)
diff --git a/mm/swap_table.h b/mm/swap_table.h
index 69a074339444..9356004d211a 100644
--- a/mm/swap_table.h
+++ b/mm/swap_table.h
@@ -5,9 +5,41 @@
#include "swap.h"
/*
- * Swap table entry could be a pointer (folio), a XA_VALUE (shadow), or NULL.
+ * Swap table entry type and bit layouts:
+ *
+ * NULL: | ------------ 0 -------------|
+ * Shadow: | SWAP_COUNT |---- SHADOW_VAL ---|1|
+ * Folio: | SWAP_COUNT |------ PFN -------|10|
+ * Pointer: |----------- Pointer ----------|100|
+ *
+ * Usage:
+ * - NULL: Swap Entry is unused.
+ *
+ * - Shadow: Swap Entry is used and not cached (swapped out).
+ * It's reusing XA_VALUE format to be compatible with workingset
+ * shadows. SHADOW_VAL part could be all 0.
+ *
+ * - Folio: Swap Entry is in cache.
+ *
+ * - Pointer: Unused yet. Because only the last three bit of a pointer
+ * is usable so now `100` is reserved for potential pointer use.
*/
+#define ENTRY_COUNT_BITS BITS_PER_BYTE
+#define ENTRY_SHADOW_MARK 0b1UL
+#define ENTRY_PFN_MARK 0b10UL
+#define ENTRY_PFN_LOW_MASK 0b11UL
+#define ENTRY_PFN_SHIFT 2
+#define ENTRY_PFN_MASK ((~0UL) >> ENTRY_COUNT_BITS)
+#define ENTRY_COUNT_MASK (~((~0UL) >> ENTRY_COUNT_BITS))
+#define ENTRY_COUNT_SHIFT (BITS_PER_LONG - BITS_PER_BYTE)
+#define ENTRY_COUNT_MAX ((1 << ENTRY_COUNT_BITS) - 2)
+#define ENTRY_COUNT_BAD ((1 << ENTRY_COUNT_BITS) - 1) /* ENTRY_BAD */
+#define ENTRY_BAD (~0UL)
+
+/* For shadow offset calculation */
+#define SWAP_COUNT_SHIFT ENTRY_COUNT_BITS
+
/*
* Helpers for casting one type of info into a swap table entry.
*/
@@ -19,17 +51,27 @@ static inline swp_te_t null_swp_te(void)
static inline swp_te_t folio_swp_te(struct folio *folio)
{
- BUILD_BUG_ON(sizeof(swp_te_t) != sizeof(void *));
- swp_te_t swp_te = { .counter = (unsigned long)folio };
+ BUILD_BUG_ON((MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) >
+ (BITS_PER_LONG - ENTRY_PFN_SHIFT - ENTRY_COUNT_BITS));
+ swp_te_t swp_te = {
+ .counter = (folio_pfn(folio) << ENTRY_PFN_SHIFT) | ENTRY_PFN_MARK
+ };
return swp_te;
}
static inline swp_te_t shadow_swp_te(void *shadow)
{
- BUILD_BUG_ON((BITS_PER_XA_VALUE + 1) !=
- BITS_PER_BYTE * sizeof(swp_te_t));
- VM_WARN_ON_ONCE(shadow && !xa_is_value(shadow));
swp_te_t swp_te = { .counter = ((unsigned long)shadow) };
+ BUILD_BUG_ON((BITS_PER_XA_VALUE + 1) != BITS_PER_BYTE * sizeof(swp_te_t));
+ BUILD_BUG_ON((unsigned long)xa_mk_value(0) != ENTRY_SHADOW_MARK);
+ VM_WARN_ON_ONCE(shadow && !xa_is_value(shadow));
+ swp_te.counter |= ENTRY_SHADOW_MARK;
+ return swp_te;
+}
+
+static inline swp_te_t bad_swp_te(void)
+{
+ swp_te_t swp_te = { .counter = ENTRY_BAD };
return swp_te;
}
@@ -43,7 +85,7 @@ static inline bool swp_te_is_null(swp_te_t swp_te)
static inline bool swp_te_is_folio(swp_te_t swp_te)
{
- return !xa_is_value((void *)swp_te.counter) && !swp_te_is_null(swp_te);
+ return ((swp_te.counter & ENTRY_PFN_LOW_MASK) == ENTRY_PFN_MARK);
}
static inline bool swp_te_is_shadow(swp_te_t swp_te)
@@ -51,19 +93,63 @@ static inline bool swp_te_is_shadow(swp_te_t swp_te)
return xa_is_value((void *)swp_te.counter);
}
+static inline bool swp_te_is_valid_shadow(swp_te_t swp_te)
+{
+ /* The shadow could be empty, just for holding the swap count */
+ return xa_is_value((void *)swp_te.counter) &&
+ xa_to_value((void *)swp_te.counter);
+}
+
+static inline bool swp_te_is_bad(swp_te_t swp_te)
+{
+ return swp_te.counter == ENTRY_BAD;
+}
+
+static inline bool __swp_te_is_countable(swp_te_t ent)
+{
+ return (swp_te_is_shadow(ent) || swp_te_is_folio(ent) ||
+ swp_te_is_null(ent));
+}
+
/*
* Helpers for retrieving info from swap table.
*/
static inline struct folio *swp_te_folio(swp_te_t swp_te)
{
VM_WARN_ON(!swp_te_is_folio(swp_te));
- return (void *)swp_te.counter;
+ return pfn_folio((swp_te.counter & ENTRY_PFN_MASK) >> ENTRY_PFN_SHIFT);
}
static inline void *swp_te_shadow(swp_te_t swp_te)
{
VM_WARN_ON(!swp_te_is_shadow(swp_te));
- return (void *)swp_te.counter;
+ return (void *)(swp_te.counter & ~ENTRY_COUNT_MASK);
+}
+
+static inline unsigned char swp_te_get_count(swp_te_t swp_te)
+{
+ VM_WARN_ON(!__swp_te_is_countable(swp_te));
+ return ((swp_te.counter & ENTRY_COUNT_MASK) >> ENTRY_COUNT_SHIFT);
+}
+
+static inline unsigned char swp_te_try_get_count(swp_te_t swp_te)
+{
+ if (__swp_te_is_countable(swp_te))
+ return swp_te_get_count(swp_te);
+ return 0;
+}
+
+static inline swp_te_t swp_te_set_count(swp_te_t swp_te,
+ unsigned char count)
+{
+ VM_BUG_ON(!__swp_te_is_countable(swp_te));
+ VM_BUG_ON(count > ENTRY_COUNT_MAX);
+
+ swp_te.counter &= ~ENTRY_COUNT_MASK;
+ swp_te.counter |= ((unsigned long)count) << ENTRY_COUNT_SHIFT;
+ VM_BUG_ON(swp_te_get_count(swp_te) != count);
+
+ return swp_te;
}
/*
@@ -87,17 +173,39 @@ static inline swp_te_t __swap_table_get(struct swap_cluster_info *ci, pgoff_t of
static inline void __swap_table_set_folio(struct swap_cluster_info *ci, pgoff_t off,
struct folio *folio)
{
- __swap_table_set(ci, off, folio_swp_te(folio));
+ swp_te_t swp_te;
+ unsigned char count;
+
+ swp_te = __swap_table_get(ci, off);
+ count = swp_te_get_count(swp_te);
+ swp_te = swp_te_set_count(folio_swp_te(folio), count);
+
+ __swap_table_set(ci, off, swp_te);
}
static inline void __swap_table_set_shadow(struct swap_cluster_info *ci, pgoff_t off,
void *shadow)
{
- __swap_table_set(ci, off, shadow_swp_te(shadow));
+ swp_te_t swp_te;
+ unsigned char count;
+
+ swp_te = __swap_table_get(ci, off);
+ count = swp_te_get_count(swp_te);
+ swp_te = swp_te_set_count(shadow_swp_te(shadow), count);
+
+ __swap_table_set(ci, off, swp_te);
}
static inline void __swap_table_set_null(struct swap_cluster_info *ci, pgoff_t off)
{
__swap_table_set(ci, off, null_swp_te());
}
+
+static inline void __swap_table_set_count(struct swap_cluster_info *ci, pgoff_t off,
+ unsigned char count)
+{
+ swp_te_t swp_te;
+ swp_te = swp_te_set_count(__swap_table_get(ci, off), count);
+ __swap_table_set(ci, off, swp_te);
+}
#endif
--
2.49.0
next prev parent reply other threads:[~2025-05-14 20:19 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-14 20:17 [PATCH 00/28] mm, swap: introduce swap table Kairui Song
2025-05-14 20:17 ` [PATCH 01/28] mm, swap: don't scan every fragment cluster Kairui Song
2025-05-14 20:17 ` [PATCH 02/28] mm, swap: consolidate the helper for mincore Kairui Song
2025-05-14 20:17 ` [PATCH 03/28] mm/shmem, swap: remove SWAP_MAP_SHMEM Kairui Song
2025-05-14 20:17 ` [PATCH 04/28] mm, swap: split readahead update out of swap cache lookup Kairui Song
2025-05-14 20:17 ` [PATCH 05/28] mm, swap: sanitize swap cache lookup convention Kairui Song
2025-05-19 4:38 ` Barry Song
2025-05-20 3:31 ` Kairui Song
2025-05-20 4:41 ` Barry Song
2025-05-20 19:09 ` Kairui Song
2025-05-20 22:33 ` Barry Song
2025-05-21 2:45 ` Kairui Song
2025-05-21 3:24 ` Barry Song
2025-05-23 2:29 ` Barry Song
2025-05-23 20:01 ` Kairui Song
2025-05-27 7:58 ` Barry Song
2025-05-27 15:11 ` Kairui Song
2025-05-30 8:49 ` Kairui Song
2025-05-30 19:24 ` Kairui Song
2025-05-14 20:17 ` [PATCH 06/28] mm, swap: rearrange swap cluster definition and helpers Kairui Song
2025-05-19 6:26 ` Barry Song
2025-05-20 3:50 ` Kairui Song
2025-05-14 20:17 ` [PATCH 07/28] mm, swap: tidy up swap device and cluster info helpers Kairui Song
2025-05-14 20:17 ` [PATCH 08/28] mm, swap: use swap table for the swap cache and switch API Kairui Song
2025-05-14 20:17 ` [PATCH 09/28] mm/swap: rename __read_swap_cache_async to __swapin_cache_alloc Kairui Song
2025-05-14 20:17 ` [PATCH 10/28] mm, swap: add a swap helper for bypassing only read ahead Kairui Song
2025-05-14 20:17 ` [PATCH 11/28] mm, swap: clean up and consolidate helper for mTHP swapin check Kairui Song
2025-05-15 9:31 ` Klara Modin
2025-05-15 9:39 ` Kairui Song
2025-05-19 7:08 ` Barry Song
2025-05-19 11:09 ` Kairui Song
2025-05-19 11:57 ` Barry Song
2025-05-14 20:17 ` [PATCH 12/28] mm, swap: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 13/28] mm/shmem, swap: avoid redundant Xarray lookup during swapin Kairui Song
2025-05-14 20:17 ` [PATCH 14/28] mm/shmem: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 15/28] mm, swap: split locked entry freeing into a standalone helper Kairui Song
2025-05-14 20:17 ` [PATCH 16/28] mm, swap: use swap cache as the swap in synchronize layer Kairui Song
2025-05-14 20:17 ` [PATCH 17/28] mm, swap: sanitize swap entry management workflow Kairui Song
2025-05-14 20:17 ` [PATCH 18/28] mm, swap: rename and introduce folio_free_swap_cache Kairui Song
2025-05-14 20:17 ` [PATCH 19/28] mm, swap: clean up and improve swap entries batch freeing Kairui Song
2025-05-14 20:17 ` [PATCH 20/28] mm, swap: check swap table directly for checking cache Kairui Song
2025-06-19 10:38 ` Baoquan He
2025-06-19 10:50 ` Kairui Song
2025-06-20 8:04 ` Baoquan He
2025-05-14 20:17 ` [PATCH 21/28] mm, swap: add folio to swap cache directly on allocation Kairui Song
2025-05-14 20:17 ` [PATCH 22/28] mm, swap: drop the SWAP_HAS_CACHE flag Kairui Song
2025-05-14 20:17 ` [PATCH 23/28] mm, swap: remove no longer needed _swap_info_get Kairui Song
2025-05-14 20:17 ` Kairui Song [this message]
2025-05-15 9:40 ` [PATCH 24/28] mm, swap: implement helpers for reserving data in swap table Klara Modin
2025-05-16 2:35 ` Kairui Song
2025-05-14 20:17 ` [PATCH 25/28] mm/workingset: leave highest 8 bits empty for anon shadow Kairui Song
2025-05-14 20:17 ` [PATCH 26/28] mm, swap: minor clean up for swapon Kairui Song
2025-05-14 20:17 ` [PATCH 27/28] mm, swap: use swap table to track swap count Kairui Song
2025-05-14 20:17 ` [PATCH 28/28] mm, swap: implement dynamic allocation of swap table Kairui Song
2025-05-21 18:36 ` Nhat Pham
2025-05-22 4:13 ` Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250514201729.48420-25-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=ryan.roberts@arm.com \
--cc=shikemeng@huaweicloud.com \
--cc=tim.c.chen@linux.intel.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.