From: <xu.xin16@zte.com.cn>
To: <akpm@linux-foundation.org>, <david@kernel.org>, <ljs@kernel.org>,
<hughd@google.com>
Cc: <hughd@google.com>, <linux-mm@kvack.org>,
<linux-kernel@vger.kernel.org>, <michel@lespinasse.org>,
<xu.xin16@zte.com.cn>
Subject: [PATCH v4 3/5] ksm: add vm_pgoff into ksm_rmap_item
Date: Sun, 3 May 2026 20:48:43 +0800 (CST) [thread overview]
Message-ID: <20260503204843889ik1YHe8LX_5N0Neyn0ner@zte.com.cn> (raw)
In-Reply-To: <20260503203538194jFwVGloy43M1F3sQGaFt7@zte.com.cn>
From: xu xin <xu.xin16@zte.com.cn>
The reason for adding vm_pgoff to ksm_rmap_item has been discussed in previous
mailing list threads [1][2]. The main purpose is to allow the KSM reverse mapping
to obtain the original VMA's vm_pgoff, so that during anon_vma_tree travering, it
can conditionally locate the VMAs and avoid scanning the entire address space
[0, ULONG_MAX].
To minimize the size impact of adding vm_pgoff to ksm_rmap_item as much as
possible, a trick that David suggested is to use a UNION that groups the members
related to the unstable tree together with the newly added vm_pgoff. The members
that valids only when in unstable tree include oldchecksum and age information.
However, the function should_skip_rmap_item() in the smart scanning needs slight
modification, since this function still uses the age information even when the
rmap_item is in a stable state (the page is not KSM), a situation that occurs
during COW faults. After using union, the size is still 64 byte without increasing.
The setting and resetting of rmap_item->vm_pgoff are similar to rmap_item->anon_vma.
[1] https://lore.kernel.org/all/adTPQSb-qSSHviJN@lucifer/
[2] https://lore.kernel.org/all/202604091806051535BJWZ_FTtdIm3Snk24ei_@zte.com.cn/
Suggested-by: David Hildenbrand (Arm) <david@kernel.org>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
---
mm/ksm.c | 41 ++++++++++++++++++++++++++++++++++-------
1 file changed, 34 insertions(+), 7 deletions(-)
diff --git a/mm/ksm.c b/mm/ksm.c
index 7d5b76478f0b..0299a53ba7c9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -195,22 +195,28 @@ struct ksm_stable_node {
* @node: rb node of this rmap_item in the unstable tree
* @head: pointer to stable_node heading this list in the stable tree
* @hlist: link into hlist of rmap_items hanging off that stable_node
- * @age: number of scan iterations since creation
- * @remaining_skips: how many scans to skip
+ * @age: number of scan iterations since creation (unstable node)
+ * @remaining_skips: how many scans to skip (unstable node)
+ * @vm_pgoff: vm_pgoff into the original VMA where the page is mapped (stable node)
*/
struct ksm_rmap_item {
struct ksm_rmap_item *rmap_list;
union {
- struct anon_vma *anon_vma; /* when stable */
+ struct anon_vma *anon_vma; /* for reverse mapping, when stable */
#ifdef CONFIG_NUMA
int nid; /* when node of unstable tree */
#endif
};
struct mm_struct *mm;
unsigned long address; /* + low bits used for flags below */
- unsigned int oldchecksum; /* when unstable */
- rmap_age_t age;
- rmap_age_t remaining_skips;
+ union {
+ struct {
+ unsigned int oldchecksum;
+ rmap_age_t age;
+ rmap_age_t remaining_skips;
+ }; /* when unstable */
+ unsigned long vm_pgoff; /* for reverse mapping, when stable */
+ };
union {
struct rb_node node; /* when node of unstable tree */
struct { /* when listed from stable tree */
@@ -776,6 +782,10 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
return vma;
}
+/*
+ * break_cow: actively break the write-protect of the VMA. This is calld when
+ * rmap_item has not yet become stable, but page has been merged.
+ */
static void break_cow(struct ksm_rmap_item *rmap_item)
{
struct mm_struct *mm = rmap_item->mm;
@@ -787,6 +797,8 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
* to undo, we also need to drop a reference to the anon_vma.
*/
put_anon_vma(rmap_item->anon_vma);
+ /* Reset pgoff that overlays age-related information. (still unstable) */
+ rmap_item->vm_pgoff = 0;
mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr);
@@ -899,6 +911,8 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
stable_node->rmap_hlist_len--;
put_anon_vma(rmap_item->anon_vma);
+ /* Reset pgoff that overlays age-related information. */
+ rmap_item->vm_pgoff = 0;
rmap_item->address &= PAGE_MASK;
cond_resched();
}
@@ -1052,6 +1066,8 @@ static void remove_rmap_item_from_tree(struct ksm_rmap_item *rmap_item)
stable_node->rmap_hlist_len--;
put_anon_vma(rmap_item->anon_vma);
+ /* Reset pgoff that overlays age-related information. */
+ rmap_item->vm_pgoff = 0;
rmap_item->head = NULL;
rmap_item->address &= PAGE_MASK;
@@ -1598,8 +1614,15 @@ static int try_to_merge_with_ksm_page(struct ksm_rmap_item *rmap_item,
/* Unstable nid is in union with stable anon_vma: remove first */
remove_rmap_item_from_tree(rmap_item);
- /* Must get reference to anon_vma while still holding mmap_lock */
+ /*
+ * Must get reference to anon_vma while still holding mmap_lock,
+ * We set these two members of stable node here instead of
+ * stable_tree_append(), maybe because we don't want to hold
+ * mmap_read_lock again? Here mmap_read_lock is already held to
+ * find_mergeable_vma before merging.
+ */
rmap_item->anon_vma = vma->anon_vma;
+ rmap_item->vm_pgoff = vma->vm_pgoff;
get_anon_vma(vma->anon_vma);
out:
mmap_read_unlock(mm);
@@ -2458,6 +2481,10 @@ static bool should_skip_rmap_item(struct folio *folio,
if (folio_test_ksm(folio))
return false;
+ /* There is no age information in stable-tree nodes. */
+ if (rmap_item->address & STABLE_FLAG)
+ return false;
+
age = rmap_item->age;
if (age != U8_MAX)
rmap_item->age++;
--
2.25.1
next prev parent reply other threads:[~2026-05-03 12:48 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-03 12:35 [PATCH v4 0/5] KSM: Optimizations for rmap_walk_ksm xu.xin16
2026-05-03 12:39 ` [PATCH v4 1/5] mm/rmap: add tracepoint for rmap_walk xu.xin16
2026-05-03 12:42 ` [PATCH v4 2/5] tools/testing: add rmap walk latency benchmark for KSM, anonymous and file pages xu.xin16
2026-05-03 12:48 ` xu.xin16 [this message]
2026-05-03 12:50 ` [PATCH v4 4/5] ksm: Optimize rmap_walk_ksm by passing a suitable address range xu.xin16
2026-05-03 12:51 ` [PATCH v4 5/5] ksm: add mremap selftests for ksm_rmap_walk xu.xin16
2026-05-03 14:59 ` [PATCH v4 0/5] KSM: Optimizations for rmap_walk_ksm Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260503204843889ik1YHe8LX_5N0Neyn0ner@zte.com.cn \
--to=xu.xin16@zte.com.cn \
--cc=akpm@linux-foundation.org \
--cc=david@kernel.org \
--cc=hughd@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=michel@lespinasse.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox