* [RFC PATCH 01/10] mm/vma: introduce VMA virtual page offset field and add helpers
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 02/10] mm: introduce linear_virt_page_index() Lorenzo Stoakes
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
This patch establishes fields within the vm_area_struct type to store the
virtual page offset of VMAs.
The virtual page offset of a VMA is equal to vma->vm_start >> PAGE_SHIFT if
they are unfaulted or were not remapped, otherwise it is equal to this
value at the point of first fault.
Currently, anonymous folios belonging to CoW'd MAP_PRIVATE-mapped
file-backed VMAs are tracked by their file offset. By adding virtual offset
as a property of VMAs, we can now track them by their virtual page offset
instead.
By tracking this, we provide the means by which to eliminate this
inconsistency, and more importantly lay the foundations for future work for
the scalable CoW anonymous rmap rework.
This patch simply adds the fields and some simple helpers. Subsequent
patches will update mm code to make use of these fields correctly.
The fields chosen are packed in the VMA such that, for 64-bit kernel
builds, no additional space is taken up.
The first field is present on cacheline 0 containing key VMA fields, and
the second on cacheline 3, which contains file-backed reverse mapping
fields.
Given the relative time spent accessing reverse mapping fields as well as
updating them, there shouldn't be any performance impact here from false
sharing.
Update the VMA userland tests to account for this change.
No callsites are updated yet, so no functional change intended.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
include/linux/mm.h | 59 +++++++++++++++++++++++++++++++++
include/linux/mm_types.h | 4 +++
mm/vma.h | 14 ++++++++
mm/vma_init.c | 1 +
tools/testing/vma/include/dup.h | 26 +++++++++++++++
5 files changed, 104 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 868b2334bff3..cd826c052be1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4335,6 +4335,65 @@ static inline pgoff_t vma_last_pgoff(const struct vm_area_struct *vma)
return vma_end_pgoff(vma) - 1;
}
+/**
+ * vma_start_virt_pgoff() - Get the virtual page offset of the start of @vma
+ * @vma: The VMA whose virtual page offset is required.
+ *
+ * If unfaulted, then this is vma->vm_start >> PAGE_SHIFT, if faulted then the
+ * virtual page offset at the time of first fault.
+ *
+ * If the VMA is anonymous, this returns the same value as vma_start_pgoff().
+ *
+ * This value is used for tracking MAP_PRIVATE file-backed mappings by their
+ * virtual page offset.
+ *
+ * Returns: The virtual page offset of the start of @vma.
+ */
+static inline pgoff_t vma_start_virt_pgoff(const struct vm_area_struct *vma)
+{
+ pgoff_t pgoff = 0;
+
+#ifdef CONFIG_64BIT
+ pgoff += vma->__vm_virt_pgoff_hi;
+ pgoff <<= 32;
+#endif
+ pgoff += vma->__vm_virt_pgoff_lo;
+ return pgoff;
+}
+
+/**
+ * vma_end_virt_pgoff() - Get the virtual page offset of the exclusive end of
+ * @vma.
+ * @vma: The VMA whose end virtual page offset is required.
+ *
+ * This returns the virtual exclusive end page offset of @vma, which is useful
+ * for expressing page offset ranges.
+ *
+ * See the description of vma_start_virt_pgoff() for a description of VMA
+ * virtual page offsets.
+ *
+ * Returns: The exclusive end virtual page offset of @vma.
+ */
+static inline pgoff_t vma_end_virt_pgoff(const struct vm_area_struct *vma)
+{
+ return vma_start_virt_pgoff(vma) + vma_pages(vma);
+}
+
+/**
+ * vma_last_virt_pgoff() - Get the virtual page offset of the last page in
+ * @vma.
+ * @vma: The VMA whose last virtual page offset is required.
+ *
+ * See the description of vma_start_virt_pgoff() for a description of VMA
+ * virtual page offsets.
+ *
+ * Returns: The last virtual page offset of @vma.
+ */
+static inline pgoff_t vma_last_virt_pgoff(const struct vm_area_struct *vma)
+{
+ return vma_end_virt_pgoff(vma) - 1;
+}
+
static inline unsigned long vma_desc_size(const struct vm_area_desc *desc)
{
return desc->end - desc->start;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b18c2b2e7d2c..b1bf3db84ee7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -964,6 +964,7 @@ struct vm_area_struct {
*/
unsigned int vm_lock_seq;
#endif
+ unsigned int __vm_virt_pgoff_lo; /* Low 32-bits of virtual pgoff. */
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
@@ -1038,6 +1039,9 @@ struct vm_area_struct {
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map vmlock_dep_map;
#endif
+#endif
+#ifdef CONFIG_64BIT
+ unsigned int __vm_virt_pgoff_hi; /* High 32-bits of virtual pgoff. */
#endif
/*
* For areas with an address space and backing store,
diff --git a/mm/vma.h b/mm/vma.h
index f4f885615a92..68fb2f49bbab 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -263,6 +263,20 @@ static inline void vma_set_pgoff(struct vm_area_struct *vma, pgoff_t pgoff)
vma->vm_pgoff = pgoff;
}
+static inline void __vma_set_virt_pgoff(struct vm_area_struct *vma, pgoff_t pgoff)
+{
+#ifdef CONFIG_64BIT
+ vma->__vm_virt_pgoff_hi = pgoff >> 32;
+#endif
+ vma->__vm_virt_pgoff_lo = pgoff & GENMASK(31, 0);
+}
+
+static inline void vma_set_virt_pgoff(struct vm_area_struct *vma, pgoff_t pgoff)
+{
+ vma_assert_can_modify(vma);
+ __vma_set_virt_pgoff(vma, pgoff);
+}
+
static inline void vma_add_pgoff(struct vm_area_struct *vma, pgoff_t delta)
{
vma_assert_can_modify(vma);
diff --git a/mm/vma_init.c b/mm/vma_init.c
index 715feee283f0..710b18849a36 100644
--- a/mm/vma_init.c
+++ b/mm/vma_init.c
@@ -51,6 +51,7 @@ static void vm_area_init_from(const struct vm_area_struct *src,
dest->vm_end = src->vm_end;
dest->anon_vma = src->anon_vma;
dest->vm_pgoff = vma_start_pgoff(src);
+ __vma_set_virt_pgoff(dest, vma_start_virt_pgoff(src));
dest->vm_file = src->vm_file;
dest->vm_private_data = src->vm_private_data;
vm_flags_init(dest, src->vm_flags);
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 5d7d0afd7765..09cfbf9572e8 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -573,6 +573,7 @@ struct vm_area_struct {
*/
unsigned int vm_lock_seq;
#endif
+ unsigned int __vm_virt_pgoff_lo;
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -608,6 +609,9 @@ struct vm_area_struct {
#ifdef CONFIG_PER_VMA_LOCK
/* Unstable RCU readers are allowed to read this. */
refcount_t vm_refcnt;
+#endif
+#ifdef CONFIG_64BIT
+ unsigned int __vm_virt_pgoff_hi;
#endif
/*
* For areas with an address space and backing store,
@@ -1322,6 +1326,28 @@ static inline pgoff_t vma_end_pgoff(const struct vm_area_struct *vma)
return vma_start_pgoff(vma) + vma_pages(vma);
}
+static inline pgoff_t vma_start_virt_pgoff(const struct vm_area_struct *vma)
+{
+ pgoff_t pgoff = 0;
+
+#ifdef CONFIG_64BIT
+ pgoff += vma->__vm_virt_pgoff_hi;
+ pgoff <<= 32;
+#endif
+ pgoff += vma->__vm_virt_pgoff_lo;
+ return pgoff;
+}
+
+static inline pgoff_t vma_end_virt_pgoff(const struct vm_area_struct *vma)
+{
+ return vma_start_virt_pgoff(vma) + vma_pages(vma);
+}
+
+static inline pgoff_t vma_last_virt_pgoff(const struct vm_area_struct *vma)
+{
+ return vma_end_virt_pgoff(vma) - 1;
+}
+
static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
{
return file->f_op->mmap_prepare(desc);
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 02/10] mm: introduce linear_virt_page_index()
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 01/10] mm/vma: introduce VMA virtual page offset field and add helpers Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 03/10] mm: abstract vma_address() and introduce vma_anon_address() Lorenzo Stoakes
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
This function provides the virtual equivalent of linear_page_index(),
instead offsetting based on the virtual page offset of the VMA.
It is valid only for anonymous or MAP_PRIVATE file-backed mappings. It must
not be called for shared file-backed mappings.
For pure anon VMAs, this will be equal to linear_page_index().
We implement the algorithm in __linear_virt_page_index(), which is provided
for internal mm code that might be interacting with shared VMAs.
In linear_virt_page_index() we assert that both of these invariants are
true.
Note that MAP_PRIVATE-/dev/zero mappings will satisfy vma_is_anonymous()
but not fulfill this invariant, so when asserting this we check
vma->vm_file to account for this.
We do not update callsites yet, so no functional change intended.
Also const-ify vma_is_anonymous() to make it compatible with the
const-ified linear_virt_page_index().
VMA userland tests are updated accordingly.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
include/linux/mm.h | 2 +-
include/linux/pagemap.h | 41 +++++++++++++++++++++++++++++++++
tools/testing/vma/include/dup.h | 22 ++++++++++++++++++
3 files changed, 64 insertions(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cd826c052be1..9451aa537abb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1533,7 +1533,7 @@ static inline void vma_desc_set_anonymous(struct vm_area_desc *desc)
desc->vm_ops = NULL;
}
-static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+static inline bool vma_is_anonymous(const struct vm_area_struct *vma)
{
return !vma->vm_ops;
}
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 68a88d34a468..6e0d719d639a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1105,6 +1105,47 @@ static inline pgoff_t linear_page_index(const struct vm_area_struct *vma,
return pgoff;
}
+static inline pgoff_t __linear_virt_page_index(const struct vm_area_struct *vma,
+ const unsigned long address)
+{
+ pgoff_t pgoff;
+
+ pgoff = linear_page_delta(vma, address);
+ pgoff += vma_start_virt_pgoff(vma);
+ return pgoff;
+}
+
+/**
+ * linear_virt_page_index() - Determine the absolute virtual page offset of
+ * @address within @vma.
+ * @vma: An anonymous or MAP_PRIVATE file-backed VMA in which @address resides.
+ * @address: The address whose absolute page offset is required.
+ *
+ * This returns the virtual page offset of @address, which is the page offset
+ * the address possessed at the time the VMA was first faulted.
+ *
+ * For anonymous mappings, this returns the same value as linear_page_index().
+ *
+ * For MAP_PRIVATE file-backed mappings, this returns the virtual page offset of
+ * @address, which is the page offset the address possessed at the time the VMA
+ * was first faulted.
+ *
+ * It is not valid to call this function for shared file-backed mappings.
+ *
+ * Returns: The absolute virtual page offset of @address within @vma.
+ */
+static inline pgoff_t linear_virt_page_index(const struct vm_area_struct *vma,
+ const unsigned long address)
+{
+ const pgoff_t pgoff = __linear_virt_page_index(vma, address);
+
+ VM_WARN_ON_ONCE(vma_test(vma, VMA_SHARED_BIT));
+ if (!vma->vm_file) /* Is anonymous except MAP_PRIVATE-/dev/zero */
+ VM_WARN_ON_ONCE(pgoff != linear_page_index(vma, address));
+
+ return pgoff;
+}
+
struct wait_page_key {
struct folio *folio;
int bit_nr;
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 09cfbf9572e8..bac6caf2eaa2 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -1601,3 +1601,25 @@ static inline pgoff_t linear_page_index(const struct vm_area_struct *vma,
pgoff += vma_start_pgoff(vma);
return pgoff;
}
+
+static inline pgoff_t __linear_virt_page_index(const struct vm_area_struct *vma,
+ const unsigned long address)
+{
+ pgoff_t pgoff;
+
+ pgoff = linear_page_delta(vma, address);
+ pgoff += vma_start_virt_pgoff(vma);
+ return pgoff;
+}
+
+static inline pgoff_t linear_virt_page_index(const struct vm_area_struct *vma,
+ const unsigned long address)
+{
+ const pgoff_t pgoff = __linear_virt_page_index(vma, address);
+
+ VM_WARN_ON_ONCE(vma_test(vma, VMA_SHARED_BIT));
+ if (!vma->vm_file) /* Is anonymous except MAP_PRIVATE-/dev/zero */
+ VM_WARN_ON_ONCE(pgoff != linear_page_index(vma, address));
+
+ return pgoff;
+}
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 03/10] mm: abstract vma_address() and introduce vma_anon_address()
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 01/10] mm/vma: introduce VMA virtual page offset field and add helpers Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 02/10] mm: introduce linear_virt_page_index() Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 04/10] mm: update print_bad_page_map() to show virtual page index Lorenzo Stoakes
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
Introduce __vma_address() which abstracts the VMA start page offset field
as pgoff_start, then update vma_address() to use it.
Then introduce vma_anon_address() which does the equivalent of
vma_address(), only using the virtual page offset of the VMA rather than
the file-backed one.
Also add an assert to ensure that the function is not called for mappings
which are file-backed but not MAP_PRIVATE to ensure it is only used in the
correct places.
This will be necessary for determining the address of a folio's index
within a VMA when the folio belongs to a MAP_PRIVATE file-backed VMA but
has been CoW'd, and thus is anonymous, once the anonymous VMA page offset
field is used for the reverse mapping.
No callers are updated, so no functional change intended.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
mm/internal.h | 49 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 37 insertions(+), 12 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index e127dfea9c0f..8689f560854f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1224,19 +1224,9 @@ void mlock_drain_remote(int cpu);
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
-/**
- * vma_address - Find the virtual address a page range is mapped at
- * @vma: The vma which maps this object.
- * @pgoff: The page offset within its object.
- * @nr_pages: The number of pages to consider.
- *
- * If any page in this range is mapped by this VMA, return the first address
- * where any of these pages appear. Otherwise, return -EFAULT.
- */
-static inline unsigned long vma_address(const struct vm_area_struct *vma,
- pgoff_t pgoff, unsigned long nr_pages)
+static inline unsigned long __vma_address(const struct vm_area_struct *vma,
+ pgoff_t pgoff, pgoff_t pgoff_start, unsigned long nr_pages)
{
- const pgoff_t pgoff_start = vma_start_pgoff(vma);
unsigned long address;
if (pgoff >= pgoff_start) {
@@ -1254,6 +1244,41 @@ static inline unsigned long vma_address(const struct vm_area_struct *vma,
return address;
}
+/**
+ * vma_address - Find the virtual address a page range is mapped at.
+ * @vma: The vma which maps this object.
+ * @pgoff: The page offset within its object.
+ * @nr_pages: The number of pages to consider.
+ *
+ * If any page in this range is mapped by this VMA, return the first address
+ * where any of these pages appear. Otherwise, return -EFAULT.
+ */
+static inline unsigned long vma_address(const struct vm_area_struct *vma,
+ pgoff_t pgoff, unsigned long nr_pages)
+{
+ return __vma_address(vma, pgoff, vma_start_pgoff(vma), nr_pages);
+}
+
+/**
+ * vma_anon_address - Find the address an anonymous folio with index @pgoff_virt
+ * is mapped at.
+ * @vma: The vma which maps this object.
+ * @pgoff_virt: The virtual page index belonging to the folio.
+ * @nr_pages: The number of pages to consider.
+ *
+ * This is only valid for anonymous or MAP_PRIVATE-mapped file-backed VMAs.
+ *
+ * Returns: If any page in this range is mapped by this VMA, return the first address
+ * where any of these pages appear. Otherwise, return -EFAULT.
+ */
+static inline unsigned long vma_anon_address(const struct vm_area_struct *vma,
+ pgoff_t pgoff_virt, unsigned long nr_pages)
+{
+ VM_WARN_ON_ONCE(!vma_is_anonymous(vma) && vma_test(vma, VMA_SHARED_BIT));
+
+ return __vma_address(vma, pgoff_virt, vma_start_virt_pgoff(vma), nr_pages);
+}
+
/*
* Then at what user virtual address will none of the range be found in vma?
* Assumes that vma_address() already returned a good starting address.
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 04/10] mm: update print_bad_page_map() to show virtual page index
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
` (2 preceding siblings ...)
2026-06-29 15:03 ` [RFC PATCH 03/10] mm: abstract vma_address() and introduce vma_anon_address() Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 05/10] mm: introduce and use vma_filebacked_address() Lorenzo Stoakes
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
This is potentially useful debugging information and matches the existing
page offset provided.
We only do so if the VMA is not a shared mapping, as shared mappings ignore
the virtual page offset.
We use __linear_virt_page_offset() as the VMA may be shared.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
mm/memory.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index f5eb06544ba4..7890e5200ecb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -588,21 +588,23 @@ static void print_bad_page_map(struct vm_area_struct *vma,
enum pgtable_level level)
{
struct address_space *mapping;
- pgoff_t index;
+ pgoff_t index, virt_index;
if (is_bad_page_map_ratelimited())
return;
mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
index = linear_page_index(vma, addr);
+ virt_index = __linear_virt_page_index(vma, addr);
pr_alert("BUG: Bad page map in process %s %s:%08llx", current->comm,
pgtable_level_to_str(level), entry);
__print_bad_page_map_pgtable(vma->vm_mm, addr);
if (page)
dump_page(page, "bad page map");
- pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx\n",
- (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
+ pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx virt_index:%lx\n",
+ (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index,
+ virt_index);
pr_alert("file:%pD fault:%ps mmap:%ps mmap_prepare: %ps read_folio:%ps\n",
vma->vm_file,
vma->vm_ops ? vma->vm_ops->fault : NULL,
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 05/10] mm: introduce and use vma_filebacked_address()
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
` (3 preceding siblings ...)
2026-06-29 15:03 ` [RFC PATCH 04/10] mm: update print_bad_page_map() to show virtual page index Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 06/10] mm: propagate VMA virtual page offset on map, remap, split + merge Lorenzo Stoakes
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
In cases where we know that the VMA is file-backed, use
vma_filebacked_address() rather than vma_address().
This lays the foundation for using the virtual page offset for anonymous
VMAs via vma_anon_address().
Also add an assert to ensure that the VMA whose address is required is not
anonymous.
No functional change intended.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
mm/internal.h | 18 ++++++++++++++++++
mm/memory-failure.c | 4 ++--
mm/page_vma_mapped.c | 6 +++++-
mm/rmap.c | 10 ++++++----
4 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 8689f560854f..f1e7e6256b4c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1244,6 +1244,24 @@ static inline unsigned long __vma_address(const struct vm_area_struct *vma,
return address;
}
+/**
+ * vma_filebacked_address - Find the virtual address a file-backed page range is
+ * mapped at.
+ * @vma: The vma which maps this object.
+ * @pgoff: The page offset within its object.
+ * @nr_pages: The number of pages to consider.
+ *
+ * Returns: If any page in this range is mapped by this VMA, return the first
+ * address where any of these pages appear. Otherwise, return -EFAULT.
+ */
+static inline unsigned long vma_filebacked_address(const struct vm_area_struct *vma,
+ pgoff_t pgoff, unsigned long nr_pages)
+{
+ VM_WARN_ON_ONCE(vma_is_anonymous(vma));
+
+ return __vma_address(vma, pgoff, vma_start_pgoff(vma), nr_pages);
+}
+
/**
* vma_address - Find the virtual address a page range is mapped at.
* @vma: The vma which maps this object.
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index cbdec52b6d23..5b7cf2291b09 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -608,7 +608,7 @@ static void add_to_kill_fsdax(struct task_struct *tsk, const struct page *p,
struct vm_area_struct *vma,
struct list_head *to_kill, pgoff_t pgoff)
{
- unsigned long addr = vma_address(vma, pgoff, 1);
+ unsigned long addr = vma_filebacked_address(vma, pgoff, 1);
__add_to_kill(tsk, p, vma, to_kill, addr);
}
@@ -2207,7 +2207,7 @@ static void add_to_kill_pgoff(struct task_struct *tsk,
}
/* Check for pgoff not backed by struct page */
- tk->addr = vma_address(vma, pgoff, 1);
+ tk->addr = vma_filebacked_address(vma, pgoff, 1);
tk->size_shift = PAGE_SHIFT;
if (tk->addr == -EFAULT)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 2ccbabfb2cc1..eff619180e84 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -347,6 +347,7 @@ unsigned long page_mapped_in_vma(const struct page *page,
struct vm_area_struct *vma)
{
const struct folio *folio = page_folio(page);
+ const pgoff_t pgoff = page_pgoff(folio, page);
struct page_vma_mapped_walk pvmw = {
.pfn = page_to_pfn(page),
.nr_pages = 1,
@@ -354,7 +355,10 @@ unsigned long page_mapped_in_vma(const struct page *page,
.flags = PVMW_SYNC,
};
- pvmw.address = vma_address(vma, page_pgoff(folio, page), 1);
+ if (folio_test_anon(folio))
+ pvmw.address = vma_address(vma, pgoff, 1);
+ else
+ pvmw.address = vma_filebacked_address(vma, pgoff, 1);
if (pvmw.address == -EFAULT)
goto out;
if (!page_vma_mapped_walk(&pvmw))
diff --git a/mm/rmap.c b/mm/rmap.c
index 183603813255..0bdb65852222 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -865,14 +865,15 @@ unsigned long page_address_in_vma(const struct folio *folio,
if (!vma->anon_vma || !anon_vma ||
vma->anon_vma->root != anon_vma->root)
return -EFAULT;
+ /* KSM folios don't reach here because of the !anon_vma check */
+ return vma_address(vma, page_pgoff(folio, page), 1);
} else if (!vma->vm_file) {
return -EFAULT;
} else if (vma->vm_file->f_mapping != folio->mapping) {
return -EFAULT;
}
- /* KSM folios don't reach here because of the !anon_vma check */
- return vma_address(vma, page_pgoff(folio, page), 1);
+ return vma_filebacked_address(vma, page_pgoff(folio, page), 1);
}
/*
@@ -1321,7 +1322,7 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
if (invalid_mkclean_vma(vma, NULL))
return 0;
- pvmw.address = vma_address(vma, pgoff, nr_pages);
+ pvmw.address = vma_filebacked_address(vma, pgoff, nr_pages);
VM_BUG_ON_VMA(pvmw.address == -EFAULT, vma);
return page_vma_mkclean_one(&pvmw);
@@ -3051,7 +3052,8 @@ static void __rmap_walk_file(struct folio *folio, struct address_space *mapping,
}
lookup:
mapping_interval_tree_foreach(vma, mapping, pgoff_start, pgoff_end) {
- unsigned long address = vma_address(vma, pgoff_start, nr_pages);
+ unsigned long address = vma_filebacked_address(vma, pgoff_start,
+ nr_pages);
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 06/10] mm: propagate VMA virtual page offset on map, remap, split + merge
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
` (4 preceding siblings ...)
2026-06-29 15:03 ` [RFC PATCH 05/10] mm: introduce and use vma_filebacked_address() Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 07/10] mm/rmap: track whether the page VMA mapped walk is anonymous Lorenzo Stoakes
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
We must correctly update VMA virtual page offset state on all VMA
operations that would result in it changing, with special attention given
to remapping.
We cover most cases by simply updating vma_set_range() to do so (with a new
virtual page offset parameter), but also notably must update the merging
and mapping logic to propagate this parameter correctly.
The remap logic remains the same - we may update the virtual page offset if
the VMA is unfaulted, but now this applies to MAP_PRIVATE file-backed
mappings too, so we update the code to reflect this.
Note that we use __linear_virt_page_index() upon remap as the VMA may be
shared, in order that we update the field consistently regardless of VMA
type.
Also while we're here, replace a VMA_BUG_ON_VMA() with a
VMA_WARN_ON_ONCE_VMA().
We also introduce vma_anon_pgoff_addr(), vma_start_anon_pgoff(), and
vma_end_anon_pgoff() which differ from their virtual page offset
equivalents in that a shared file-backed mapping returns its file page
offset otherwise the virtual page offset is used.
This means we don't predicate merges for shared file-backed mappings on
virtual page offset.
We simply ensure state is correctly propagated here, so no functional
changes are intended.
Finally, we update insert_vm_struct() to correctly set the virtual page
offset on insertion of a VMA.
Also update VMA userland tests to reflect this change.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
mm/mremap.c | 6 +-
mm/vma.c | 56 ++++++++++----
mm/vma.h | 127 ++++++++++++++++++++++++-------
mm/vma_exec.c | 2 +-
tools/testing/vma/shared.c | 3 +-
tools/testing/vma/tests/merge.c | 4 +-
tools/testing/vma/tests/vma.c | 4 +-
tools/testing/vma/vma_internal.h | 1 +
8 files changed, 152 insertions(+), 51 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index 079a0ba0c4a7..f4cbf7d686b7 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1254,7 +1254,9 @@ static void unmap_source_vma(struct vma_remap_struct *vrm)
static int copy_vma_and_data(struct vma_remap_struct *vrm,
struct vm_area_struct **new_vma_ptr)
{
- const unsigned long new_pgoff = linear_page_index(vrm->vma, vrm->addr);
+ const pgoff_t new_pgoff = linear_page_index(vrm->vma, vrm->addr);
+ const pgoff_t new_virt_pgoff =
+ __linear_virt_page_index(vrm->vma, vrm->addr);
struct vm_area_struct *vma = vrm->vma;
struct vm_area_struct *new_vma;
unsigned long moved_len;
@@ -1262,7 +1264,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
PAGETABLE_MOVE(pmc, NULL, NULL, vrm->addr, vrm->new_addr, vrm->old_len);
new_vma = copy_vma(&vma, vrm->new_addr, vrm->new_len, new_pgoff,
- &pmc.need_rmap_locks);
+ new_virt_pgoff, &pmc.need_rmap_locks);
if (!new_vma) {
vrm_uncharge(vrm);
*new_vma_ptr = NULL;
diff --git a/mm/vma.c b/mm/vma.c
index 7201199fc668..c4bb41400751 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -18,6 +18,7 @@ struct mmap_state {
unsigned long addr;
unsigned long end;
pgoff_t pgoff;
+ pgoff_t virt_pgoff;
unsigned long pglen;
union {
vm_flags_t vm_flags;
@@ -46,13 +47,22 @@ struct mmap_state {
bool file_doesnt_need_get :1;
};
-#define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vma_flags_, file_) \
+static inline pgoff_t map_anon_pgoff(const struct mmap_state *map)
+{
+ if (vma_flags_test(&map->vma_flags, VMA_SHARED_BIT))
+ return map->pgoff;
+
+ return map->virt_pgoff;
+}
+
+#define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, virt_pgoff_, vma_flags_, file_) \
struct mmap_state name = { \
.mm = mm_, \
.vmi = vmi_, \
.addr = addr_, \
.end = (addr_) + (len_), \
.pgoff = pgoff_, \
+ .virt_pgoff = virt_pgoff_, \
.pglen = PHYS_PFN(len_), \
.vma_flags = vma_flags_, \
.file = file_, \
@@ -67,6 +77,7 @@ struct mmap_state {
.end = (map_)->end, \
.vma_flags = (map_)->vma_flags, \
.pgoff = (map_)->pgoff, \
+ .anon_pgoff = map_anon_pgoff(map_), \
.file = (map_)->file, \
.prev = (map_)->prev, \
.middle = vma_, \
@@ -82,10 +93,11 @@ static void __vma_set_range(struct vm_area_struct *vma, unsigned long start,
}
static void vma_set_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, pgoff_t pgoff)
+ unsigned long end, pgoff_t pgoff, pgoff_t virt_pgoff)
{
__vma_set_range(vma, start, end);
vma_set_pgoff(vma, pgoff);
+ vma_set_virt_pgoff(vma, virt_pgoff);
}
/* Was this VMA ever forked from a parent, i.e. maybe contains CoW mappings? */
@@ -812,7 +824,8 @@ static int commit_merge(struct vma_merge_struct *vmg)
*/
vma_adjust_trans_huge(vma, vmg->start, vmg->end,
vmg->__adjust_middle_start ? vmg->middle : NULL);
- vma_set_range(vma, vmg->start, vmg->end, vmg_start_pgoff(vmg));
+ vma_set_range(vma, vmg->start, vmg->end, vmg_start_pgoff(vmg),
+ vmg_start_anon_pgoff(vmg));
vmg_adjust_set_range(vmg);
vma_iter_store_overwrite(vmg->vmi, vmg->target);
@@ -982,6 +995,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
vmg->start = prev->vm_start;
vmg->end = next->vm_end;
vmg->pgoff = vma_start_pgoff(prev);
+ vmg->anon_pgoff = vma_start_anon_pgoff(prev);
/*
* We already ensured anon_vma compatibility above, so now it's
@@ -1000,6 +1014,7 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
*/
vmg->start = prev->vm_start;
vmg->pgoff = vma_start_pgoff(prev);
+ vmg->anon_pgoff = vma_start_anon_pgoff(prev);
if (!vmg->__remove_middle)
vmg->__adjust_middle_start = true;
@@ -1022,12 +1037,14 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
if (vmg->__remove_middle) {
vmg->end = next->vm_end;
vmg->pgoff = vma_start_pgoff(next) - pglen;
+ vmg->anon_pgoff = vma_start_anon_pgoff(next) - pglen;
} else {
/* We shrink middle and expand next. */
vmg->__adjust_next_start = true;
vmg->start = middle->vm_start;
vmg->end = start;
vmg->pgoff = vma_start_pgoff(middle);
+ vmg->anon_pgoff = vma_start_anon_pgoff(middle);
}
err = dup_anon_vma(next, middle, &anon_dup);
@@ -1137,6 +1154,7 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
vmg->start = prev->vm_start;
vmg->target = prev;
vmg->pgoff = vma_start_pgoff(prev);
+ vmg->anon_pgoff = vma_start_anon_pgoff(prev);
/*
* If this merge would result in removal of the next VMA but we
@@ -1911,9 +1929,10 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
*/
struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
unsigned long addr, unsigned long len, pgoff_t pgoff,
- bool *need_rmap_locks)
+ pgoff_t virt_pgoff, bool *need_rmap_locks)
{
struct vm_area_struct *vma = *vmap;
+ const bool is_shared = vma_test(vma, VMA_SHARED_BIT);
unsigned long vma_start = vma->vm_start;
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
@@ -1922,11 +1941,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
VMG_VMA_STATE(vmg, &vmi, NULL, vma, addr, addr + len);
/*
- * If anonymous vma has not yet been faulted, update new pgoff
- * to match new location, to increase its chance of merging.
+ * If a vma has not yet been faulted, update its virtual pgoff to match
+ * the new location to increase its chance of merging.
*/
- if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
- pgoff = addr >> PAGE_SHIFT;
+ if (!vma->anon_vma && !is_shared) {
+ virt_pgoff = addr >> PAGE_SHIFT;
+
+ if (vma_is_anonymous(vma))
+ pgoff = virt_pgoff;
faulted_in_anon_vma = false;
}
@@ -1943,6 +1965,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return NULL; /* should never get here */
vmg.pgoff = pgoff;
+ vmg.anon_pgoff = is_shared ? pgoff : virt_pgoff;
vmg.next = vma_iter_next_rewind(&vmi, NULL);
new_vma = vma_merge_copied_range(&vmg);
@@ -1964,7 +1987,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
* safe. It is only safe to keep the vm_pgoff
* linear if there are no pages mapped yet.
*/
- VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
+ VM_WARN_ON_ONCE_VMA(faulted_in_anon_vma, new_vma);
*vmap = vma = new_vma;
}
*need_rmap_locks =
@@ -1973,7 +1996,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
new_vma = vm_area_dup(vma);
if (!new_vma)
goto out;
- vma_set_range(new_vma, addr, addr + len, pgoff);
+ vma_set_range(new_vma, addr, addr + len, pgoff, virt_pgoff);
if (vma_dup_policy(vma, new_vma))
goto out_free_vma;
if (anon_vma_clone(new_vma, vma, VMA_OP_REMAP))
@@ -2609,7 +2632,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap,
return -ENOMEM;
vma_iter_config(vmi, map->addr, map->end);
- vma_set_range(vma, map->addr, map->end, map->pgoff);
+ vma_set_range(vma, map->addr, map->end, map->pgoff, map->virt_pgoff);
vma->flags = map->vma_flags;
vma->vm_page_prot = map->page_prot;
@@ -2799,7 +2822,8 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
struct vm_area_struct *vma = NULL;
bool have_mmap_prepare = file && file->f_op->mmap_prepare;
VMA_ITERATOR(vmi, mm, addr);
- MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vma_flags, file);
+ const pgoff_t virt_pgoff = addr >> PAGE_SHIFT;
+ MMAP_STATE(map, mm, &vmi, addr, len, pgoff, virt_pgoff, vma_flags, file);
struct vm_area_desc desc = {
.mm = mm,
.file = file,
@@ -2945,6 +2969,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, unsigned long len, vma_flags_t vma_flags)
{
struct mm_struct *mm = current->mm;
+ const pgoff_t pgoff = addr >> PAGE_SHIFT;
/*
* Check against address space limits by the changed size
@@ -2969,7 +2994,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
* occur after forking, so the expand will only happen on new VMAs.
*/
if (vma && vma->vm_end == addr) {
- VMG_STATE(vmg, mm, vmi, addr, addr + len, vma_flags, PHYS_PFN(addr));
+ VMG_STATE(vmg, mm, vmi, addr, addr + len, vma_flags, pgoff, pgoff);
vmg.prev = vma;
/* vmi is positioned at prev, which this mode expects. */
@@ -2989,7 +3014,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto unacct_fail;
vma_set_anonymous(vma);
- vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
+ vma_set_range(vma, addr, addr + len, pgoff, pgoff);
vma->flags = vma_flags;
vma->vm_page_prot = vm_get_page_prot(vma_flags_to_legacy(vma_flags));
vma_start_write(vma);
@@ -3381,6 +3406,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
WARN_ON_ONCE(vma->anon_vma);
vma_set_pgoff(vma, vma->vm_start >> PAGE_SHIFT);
}
+ vma_set_virt_pgoff(vma, vma->vm_start >> PAGE_SHIFT);
if (vma_link(mm, vma)) {
if (vma_test(vma, VMA_ACCOUNT_BIT))
@@ -3433,7 +3459,7 @@ struct vm_area_struct *__install_special_mapping(
vma->vm_ops = ops;
vma->vm_private_data = priv;
- vma_set_range(vma, addr, addr + len, 0);
+ vma_set_range(vma, addr, addr + len, 0, addr >> PAGE_SHIFT);
ret = insert_vm_struct(mm, vma);
if (ret)
diff --git a/mm/vma.h b/mm/vma.h
index 68fb2f49bbab..9881d105a0c4 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -104,6 +104,7 @@ struct vma_merge_struct {
unsigned long start;
unsigned long end;
pgoff_t pgoff;
+ pgoff_t anon_pgoff;
union {
/* Temporary while VMA flags are being converted. */
@@ -237,11 +238,6 @@ static inline bool vmg_nomem(struct vma_merge_struct *vmg)
return vmg->state == VMA_MERGE_ERROR_NOMEM;
}
-static inline pgoff_t vmg_start_pgoff(const struct vma_merge_struct *vmg)
-{
- return vmg->pgoff;
-}
-
static inline pgoff_t vmg_pages(const struct vma_merge_struct *vmg)
{
const unsigned long size = vmg->end - vmg->start;
@@ -249,6 +245,11 @@ static inline pgoff_t vmg_pages(const struct vma_merge_struct *vmg)
return size >> PAGE_SHIFT;
}
+static inline pgoff_t vmg_start_pgoff(const struct vma_merge_struct *vmg)
+{
+ return vmg->pgoff;
+}
+
static inline pgoff_t vmg_end_pgoff(const struct vma_merge_struct *vmg)
{
return vmg_start_pgoff(vmg) + vmg_pages(vmg);
@@ -263,6 +264,16 @@ static inline void vma_set_pgoff(struct vm_area_struct *vma, pgoff_t pgoff)
vma->vm_pgoff = pgoff;
}
+static inline pgoff_t vmg_start_anon_pgoff(const struct vma_merge_struct *vmg)
+{
+ return vmg->anon_pgoff;
+}
+
+static inline pgoff_t vmg_end_anon_pgoff(const struct vma_merge_struct *vmg)
+{
+ return vmg_start_anon_pgoff(vmg) + vmg_pages(vmg);
+}
+
static inline void __vma_set_virt_pgoff(struct vm_area_struct *vma, pgoff_t pgoff)
{
#ifdef CONFIG_64BIT
@@ -281,44 +292,102 @@ static inline void vma_add_pgoff(struct vm_area_struct *vma, pgoff_t delta)
{
vma_assert_can_modify(vma);
vma_set_pgoff(vma, vma_start_pgoff(vma) + delta);
+ vma_set_virt_pgoff(vma, vma_start_virt_pgoff(vma) + delta);
}
static inline void vma_sub_pgoff(struct vm_area_struct *vma, pgoff_t delta)
{
vma_assert_can_modify(vma);
vma_set_pgoff(vma, vma_start_pgoff(vma) - delta);
+ vma_set_virt_pgoff(vma, vma_start_virt_pgoff(vma) - delta);
+}
+
+/**
+ * vma_anon_pgoff_addr() - Calculates the absolute anonymous page offset of
+ * @address.
+ * @vma: The VMA whose anonymous page offset is required.
+ * @address: The address whose absolute page offset is required.
+ *
+ * If the VMA is a shared file-backed mapping, then the file-based page offset
+ * is returned.
+ *
+ * Otherwise, the virtual page offset is returned.
+ *
+ * This means that shared file-backed mappings are correctly merged based on
+ * their file page offset compatibility.
+ *
+ * Returns: The absolute anonymous page offset of @address within @vma.
+ */
+static inline pgoff_t vma_anon_pgoff_addr(const struct vm_area_struct *vma,
+ unsigned long address)
+{
+ if (vma_test(vma, VMA_SHARED_BIT))
+ return linear_page_index(vma, address);
+
+ return linear_virt_page_index(vma, address);
+}
+
+/**
+ * vma_start_anon_pgoff() - Calculates the absolute anonymous page offset used
+ * for purposes of merge compatibility.
+ * @vma: The VMA whose anonymous page offset is required.
+ *
+ * See vma_anon_pgoff_addr().
+ *
+ * Returns: The absolute anonymous page offset of @vma for purposes of merging.
+ */
+static inline pgoff_t vma_start_anon_pgoff(const struct vm_area_struct *vma)
+{
+ return vma_anon_pgoff_addr(vma, vma->vm_start);
}
-#define VMG_STATE(name, mm_, vmi_, start_, end_, vma_flags_, pgoff_) \
+/**
+ * vma_end_anon_pgoff() - Calculates the absolute exclusive end anonymous page
+ * offset used for purposes of merge compatibility.
+ * @vma: The VMA whosse anonymous end page offset is required.
+ *
+ * See vma_start_anon_pgoff().
+ *
+ * Returns: The absolute exclusive end anonymous page offset of @vma for
+ * purposes of merging.
+ */
+static inline pgoff_t vma_end_anon_pgoff(const struct vm_area_struct *vma)
+{
+ return vma_start_anon_pgoff(vma) + vma_pages(vma);
+}
+
+#define VMG_STATE(name, mm_, vmi_, start_, end_, vma_flags_, pgoff_, anon_pgoff_) \
+ struct vma_merge_struct name = { \
+ .mm = mm_, \
+ .vmi = vmi_, \
+ .start = start_, \
+ .end = end_, \
+ .vma_flags = vma_flags_, \
+ .pgoff = pgoff_, \
+ .anon_pgoff = anon_pgoff_, \
+ .state = VMA_MERGE_START, \
+ }
+
+#define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \
struct vma_merge_struct name = { \
- .mm = mm_, \
+ .mm = vma_->vm_mm, \
.vmi = vmi_, \
+ .prev = prev_, \
+ .middle = vma_, \
+ .next = NULL, \
.start = start_, \
.end = end_, \
- .vma_flags = vma_flags_, \
- .pgoff = pgoff_, \
+ .vm_flags = vma_->vm_flags, \
+ .pgoff = linear_page_index(vma_, start_), \
+ .anon_pgoff = vma_anon_pgoff_addr(vma_, start_), \
+ .file = vma_->vm_file, \
+ .anon_vma = vma_->anon_vma, \
+ .policy = vma_policy(vma_), \
+ .uffd_ctx = vma_->vm_userfaultfd_ctx, \
+ .anon_name = anon_vma_name(vma_), \
.state = VMA_MERGE_START, \
}
-#define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_) \
- struct vma_merge_struct name = { \
- .mm = vma_->vm_mm, \
- .vmi = vmi_, \
- .prev = prev_, \
- .middle = vma_, \
- .next = NULL, \
- .start = start_, \
- .end = end_, \
- .vm_flags = vma_->vm_flags, \
- .pgoff = linear_page_index(vma_, start_), \
- .file = vma_->vm_file, \
- .anon_vma = vma_->anon_vma, \
- .policy = vma_policy(vma_), \
- .uffd_ctx = vma_->vm_userfaultfd_ctx, \
- .anon_name = anon_vma_name(vma_), \
- .state = VMA_MERGE_START, \
- }
-
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
void validate_mm(struct mm_struct *mm);
#else
@@ -501,7 +570,7 @@ void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb,
struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
unsigned long addr, unsigned long len, pgoff_t pgoff,
- bool *need_rmap_locks);
+ pgoff_t anon_pgoff, bool *need_rmap_locks);
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma);
diff --git a/mm/vma_exec.c b/mm/vma_exec.c
index c0f7ba2cfb27..0b15805160fb 100644
--- a/mm/vma_exec.c
+++ b/mm/vma_exec.c
@@ -41,7 +41,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
unsigned long new_end = old_end - shift;
VMA_ITERATOR(vmi, mm, new_start);
VMG_STATE(vmg, mm, &vmi, new_start, old_end, EMPTY_VMA_FLAGS,
- vma_start_pgoff(vma));
+ vma_start_pgoff(vma), vma_start_anon_pgoff(vma));
struct vm_area_struct *next;
struct mmu_gather tlb;
PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length);
diff --git a/tools/testing/vma/shared.c b/tools/testing/vma/shared.c
index bea9ea6db02a..f410bb6f858e 100644
--- a/tools/testing/vma/shared.c
+++ b/tools/testing/vma/shared.c
@@ -23,7 +23,8 @@ struct vm_area_struct *alloc_vma(struct mm_struct *mm,
vma->vm_start = start;
vma->vm_end = end;
- vma->vm_pgoff = pgoff;
+ vma_set_pgoff(vma, pgoff);
+ vma_set_virt_pgoff(vma, start >> PAGE_SHIFT);
vma->flags = vma_flags;
vma_assert_detached(vma);
diff --git a/tools/testing/vma/tests/merge.c b/tools/testing/vma/tests/merge.c
index 04704d6eb426..ed8fa0d7da97 100644
--- a/tools/testing/vma/tests/merge.c
+++ b/tools/testing/vma/tests/merge.c
@@ -45,6 +45,7 @@ void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
vmg->start = start;
vmg->end = end;
vmg->pgoff = pgoff;
+ vmg->anon_pgoff = start >> PAGE_SHIFT;
vmg->vma_flags = vma_flags;
vmg->just_expand = false;
@@ -108,6 +109,7 @@ static bool test_simple_merge(void)
.end = 0x2000,
.vma_flags = vma_flags,
.pgoff = 1,
+ .anon_pgoff = 1,
};
ASSERT_FALSE(attach_vma(&mm, vma_left));
@@ -1431,7 +1433,7 @@ static bool test_expand_only_mode(void)
struct mm_struct mm = {};
VMA_ITERATOR(vmi, &mm, 0);
struct vm_area_struct *vma_prev, *vma;
- VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, vma_flags, 5);
+ VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, vma_flags, 5, 5);
/*
* Place a VMA prior to the one we're expanding so we assert that we do
diff --git a/tools/testing/vma/tests/vma.c b/tools/testing/vma/tests/vma.c
index 754a2da06321..7ca5289e0f95 100644
--- a/tools/testing/vma/tests/vma.c
+++ b/tools/testing/vma/tests/vma.c
@@ -38,7 +38,7 @@ static bool test_copy_vma(void)
/* Move backwards and do not merge. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vma_flags);
- vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
+ vma_new = copy_vma(&vma, 0, 0x2000, 0, 3, &need_locks);
ASSERT_NE(vma_new, vma);
ASSERT_EQ(vma_new->vm_start, 0);
ASSERT_EQ(vma_new->vm_end, 0x2000);
@@ -51,7 +51,7 @@ static bool test_copy_vma(void)
vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vma_flags);
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vma_flags);
- vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
+ vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, 4, &need_locks);
vma_assert_attached(vma_new);
ASSERT_EQ(vma_new, vma_next);
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index e12ab2c80f95..f197312adb77 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -53,6 +53,7 @@ typedef __bitwise unsigned int vm_fault_t;
#define VM_WARN_ON(_expr) (WARN_ON(_expr))
#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
+#define VM_WARN_ON_ONCE_VMA(_expr, _vma) (WARN_ON_ONCE(_expr))
#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
#define VM_BUG_ON(_expr) (BUG_ON(_expr))
#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 07/10] mm/rmap: track whether the page VMA mapped walk is anonymous
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
` (5 preceding siblings ...)
2026-06-29 15:03 ` [RFC PATCH 06/10] mm: propagate VMA virtual page offset on map, remap, split + merge Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 08/10] mm: introduce and use linear_folio_page_index() Lorenzo Stoakes
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
Update the page_vma_mapped_walk structure to track whether the walk is over
an anonymous folio or not.
This is necessary in order to determine the correct VMA page
offset (virtual or not) in vma_address_end() ready for a subsequent change
which adjusts which page offset to use depending on this parameter, and
update the comment slightly.
No functional change intended.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
include/linux/rmap.h | 2 ++
mm/internal.h | 13 ++++++++-----
mm/rmap.c | 3 +++
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 8dc0871e5f00..a48ae9575bd2 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -871,6 +871,7 @@ struct page_vma_mapped_walk {
pte_t *pte;
spinlock_t *ptl;
unsigned int flags;
+ bool is_anon_walk;
};
#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \
@@ -881,6 +882,7 @@ struct page_vma_mapped_walk {
.vma = _vma, \
.address = _address, \
.flags = _flags, \
+ .is_anon_walk = folio_test_anon(_folio), \
}
static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
diff --git a/mm/internal.h b/mm/internal.h
index f1e7e6256b4c..120957a7850c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1298,22 +1298,25 @@ static inline unsigned long vma_anon_address(const struct vm_area_struct *vma,
}
/*
- * Then at what user virtual address will none of the range be found in vma?
+ * At what user virtual address will none of the range be found in vma?
* Assumes that vma_address() already returned a good starting address.
*/
static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw)
{
- struct vm_area_struct *vma = pvmw->vma;
- pgoff_t pgoff;
+ const struct vm_area_struct *vma = pvmw->vma;
+ const pgoff_t pgoff = pvmw->pgoff;
+ pgoff_t pgoff_vma_start;
unsigned long address;
+ pgoff_t pgoff_end;
/* Common case, plus ->pgoff is invalid for KSM */
if (pvmw->nr_pages == 1)
return pvmw->address + PAGE_SIZE;
- pgoff = pvmw->pgoff + pvmw->nr_pages;
+ pgoff_vma_start = vma_start_pgoff(vma);
+ pgoff_end = pgoff + pvmw->nr_pages;
address = vma->vm_start +
- ((pgoff - vma_start_pgoff(vma)) << PAGE_SHIFT);
+ ((pgoff_end - pgoff_vma_start) << PAGE_SHIFT);
/* Check for address beyond vma (or wrapped through 0?) */
if (address < vma->vm_start || address > vma->vm_end)
address = vma->vm_end;
diff --git a/mm/rmap.c b/mm/rmap.c
index 0bdb65852222..a3e926a708b1 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1240,8 +1240,10 @@ static bool mapping_wrprotect_range_one(struct folio *folio,
.vma = vma,
.address = address,
.flags = PVMW_SYNC,
+ .is_anon_walk = false,
};
+ VM_WARN_ON_ONCE(folio_test_anon(folio));
state->cleaned += page_vma_mkclean_one(&pvmw);
return true;
@@ -1317,6 +1319,7 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
.pgoff = pgoff,
.vma = vma,
.flags = PVMW_SYNC,
+ .is_anon_walk = false,
};
if (invalid_mkclean_vma(vma, NULL))
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 08/10] mm: introduce and use linear_folio_page_index()
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
` (6 preceding siblings ...)
2026-06-29 15:03 ` [RFC PATCH 07/10] mm/rmap: track whether the page VMA mapped walk is anonymous Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 09/10] mm/rmap: use virt pgoff for MAP_PRIVATE file-backed anon folios Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 10/10] tools/testing/vma: expand VMA merge tests to assert virt pgoff Lorenzo Stoakes
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
This function is, for now, a placeholder; it will be used in future to
determine whether to use the virtual page index or not, based on whether
the folio is anonymous or not.
Currently it simply wraps linear_page_index(), so this does not change
behaviour.
We update callers that will, once the change is introduced to track
anonymous folios by virtual page offset if MAP_PRIVATE file-backed, need to
determine which index to use based on folio type.
No functional change intended.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
include/linux/pagemap.h | 18 ++++++++++++++++++
mm/huge_memory.c | 3 ++-
mm/migrate.c | 6 ++++--
mm/userfaultfd.c | 6 ++++--
4 files changed, 28 insertions(+), 5 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 6e0d719d639a..e2affa57dadd 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1146,6 +1146,24 @@ static inline pgoff_t linear_virt_page_index(const struct vm_area_struct *vma,
return pgoff;
}
+/**
+ * linear_folio_page_index() - Determine the absolute page offset of
+ * @address within @vma from @folio.
+ * @folio: The folio whose linear page index is sought.
+ * @vma: The VMA in which @address resides.
+ * @address: The address whose absolute page offset is required.
+ *
+ * For compatibility, currently identical to linear_page_index().
+ *
+ * Returns: The absolute page offset of @address within @vma.
+ */
+static inline pgoff_t linear_folio_page_index(const struct folio *folio,
+ const struct vm_area_struct *vma,
+ const unsigned long address)
+{
+ return linear_page_index(vma, address);
+}
+
struct wait_page_key {
struct folio *folio;
int bit_nr;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e94f56487225..1b3456a9ff74 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2832,7 +2832,8 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
}
folio_move_anon_rmap(src_folio, dst_vma);
- src_folio->index = linear_page_index(dst_vma, dst_addr);
+ src_folio->index = linear_folio_page_index(src_folio, dst_vma,
+ dst_addr);
_dst_pmd = folio_mk_pmd(src_folio, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
diff --git a/mm/migrate.c b/mm/migrate.c
index d9b23909d716..4250a56e09c0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -358,8 +358,10 @@ static bool remove_migration_pte(struct folio *folio,
unsigned long idx = 0;
/* pgoff is invalid for ksm pages, but they are never large */
- if (folio_test_large(folio) && !folio_test_hugetlb(folio))
- idx = linear_page_index(vma, pvmw.address) - pvmw.pgoff;
+ if (folio_test_large(folio) && !folio_test_hugetlb(folio)) {
+ idx += linear_folio_page_index(folio, vma, pvmw.address);
+ idx -= pvmw.pgoff;
+ }
new = folio_page(folio, idx);
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index bf4518f4449d..9c6b1a678ca6 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1283,7 +1283,8 @@ static long move_present_ptes(struct mm_struct *mm,
}
folio_move_anon_rmap(src_folio, dst_vma);
- src_folio->index = linear_page_index(dst_vma, dst_addr);
+ src_folio->index = linear_folio_page_index(src_folio, dst_vma,
+ dst_addr);
orig_dst_pte = folio_mk_pte(src_folio, dst_vma->vm_page_prot);
/* Set soft dirty bit so userspace can notice the pte was moved */
@@ -1352,7 +1353,8 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
*/
if (src_folio) {
folio_move_anon_rmap(src_folio, dst_vma);
- src_folio->index = linear_page_index(dst_vma, dst_addr);
+ src_folio->index = linear_folio_page_index(src_folio, dst_vma,
+ dst_addr);
} else {
/*
* Check if the swap entry is cached after acquiring the src_pte
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 09/10] mm/rmap: use virt pgoff for MAP_PRIVATE file-backed anon folios
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
` (7 preceding siblings ...)
2026-06-29 15:03 ` [RFC PATCH 08/10] mm: introduce and use linear_folio_page_index() Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
2026-06-29 15:03 ` [RFC PATCH 10/10] tools/testing/vma: expand VMA merge tests to assert virt pgoff Lorenzo Stoakes
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
Currently, anonymous folios belonging to CoW'd MAP_PRIVATE file-backed
mappings are indexed by their page offset within the file in which their
were originally mapped.
This differs from anonymous foios belonging to pure anon mappings which are
indexed by their virtual page offset (the address at which they'd belong in
the VMA when first faulted).
This change fixes this inconsistency, always indexing anonymous folios by
their virtual page offset regardless of the VMA to which they belong.
We have laid the foundations for making this change to the point where we
need only 'switch it on', and this patch switches it on by:
* Using linear_virt_page_index() in __folio_set_anon() to assign the
folio's index to the anonymous linear index rather than the file-backed
one.
* Otherwise using linear_virt_page_index() in all instances where
anonymous folios are being referenced or manipulated.
* Replacing vma_address() with vma_filebacked_address() or
vma_anon_address() as appropriate.
* Updating the rmap lock logic in copy_vma() to also account for virtual
page offsets.
* Updating the merging logic to check that virtual page offsets are
aligned as well as filebacked ones for anonymous or MAP_PRIVATE
file-backed VMAs.
* Updating linear_folio_page_index() to invoke linear_virt_page_index()
if the folio is anonymous.
This will have no impact on merging of anonymous VMAs or shared file-backed
VMAs, whose page offset and anonymous page offset will be identical.
However, MAP_PRIVATE file-backed mappings must now be aligned on virtual
page offset as well.
In most instances this should have no impact on merging of file-backed
mappings, which are usually not merged all that often, let alone
MAP_PRIVATE mapped ones, and rarely remapped and faulted before being moved
back in place (the case in which a merge may now fail).
This change lays the foundations for future scalable CoW work which needs
to track at least some remaps.
This change means that most remap tracking can be avoided, and in nearly
all cases the anonymous page offset can be used to quickly find the VMA in
an mm.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
include/linux/pagemap.h | 9 ++++++++-
mm/internal.h | 25 +++++++------------------
mm/interval_tree.c | 4 ++--
mm/ksm.c | 2 +-
mm/page_vma_mapped.c | 2 +-
mm/rmap.c | 12 ++++++------
mm/vma.c | 14 ++++++++++++--
7 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e2affa57dadd..079a08fa83f5 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1153,7 +1153,11 @@ static inline pgoff_t linear_virt_page_index(const struct vm_area_struct *vma,
* @vma: The VMA in which @address resides.
* @address: The address whose absolute page offset is required.
*
- * For compatibility, currently identical to linear_page_index().
+ * Determines whether to obtain the virtual linear page index based on whether
+ * @folio is anonymous or not.
+ *
+ * See the descriptions of linear_virt_page_index() and linear_page_index() for
+ * details of each.
*
* Returns: The absolute page offset of @address within @vma.
*/
@@ -1161,6 +1165,9 @@ static inline pgoff_t linear_folio_page_index(const struct folio *folio,
const struct vm_area_struct *vma,
const unsigned long address)
{
+ if (folio_test_anon(folio))
+ return linear_virt_page_index(vma, address);
+
return linear_page_index(vma, address);
}
diff --git a/mm/internal.h b/mm/internal.h
index 120957a7850c..0a395801bbe2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1263,23 +1263,8 @@ static inline unsigned long vma_filebacked_address(const struct vm_area_struct *
}
/**
- * vma_address - Find the virtual address a page range is mapped at.
- * @vma: The vma which maps this object.
- * @pgoff: The page offset within its object.
- * @nr_pages: The number of pages to consider.
- *
- * If any page in this range is mapped by this VMA, return the first address
- * where any of these pages appear. Otherwise, return -EFAULT.
- */
-static inline unsigned long vma_address(const struct vm_area_struct *vma,
- pgoff_t pgoff, unsigned long nr_pages)
-{
- return __vma_address(vma, pgoff, vma_start_pgoff(vma), nr_pages);
-}
-
-/**
- * vma_anon_address - Find the address an anonymous folio with index @pgoff_virt
- * is mapped at.
+ * vma_anon_address - Find the virtual address an anonymous page range is mapped
+ * at.
* @vma: The vma which maps this object.
* @pgoff_virt: The virtual page index belonging to the folio.
* @nr_pages: The number of pages to consider.
@@ -1313,7 +1298,11 @@ static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw)
if (pvmw->nr_pages == 1)
return pvmw->address + PAGE_SIZE;
- pgoff_vma_start = vma_start_pgoff(vma);
+ if (pvmw->is_anon_walk)
+ pgoff_vma_start = vma_start_virt_pgoff(vma);
+ else
+ pgoff_vma_start = vma_start_pgoff(vma);
+
pgoff_end = pgoff + pvmw->nr_pages;
address = vma->vm_start +
((pgoff_end - pgoff_vma_start) << PAGE_SHIFT);
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
index d90e962b28f7..350838dcfba5 100644
--- a/mm/interval_tree.c
+++ b/mm/interval_tree.c
@@ -83,12 +83,12 @@ mapping_interval_tree_iter_next(struct vm_area_struct *vma,
static pgoff_t avc_start_pgoff(struct anon_vma_chain *avc)
{
- return vma_start_pgoff(avc->vma);
+ return vma_start_virt_pgoff(avc->vma);
}
static pgoff_t avc_last_pgoff(struct anon_vma_chain *avc)
{
- return vma_last_pgoff(avc->vma);
+ return vma_last_virt_pgoff(avc->vma);
}
INTERVAL_TREE_DEFINE(struct anon_vma_chain, rb, pgoff_t, rb_subtree_last,
diff --git a/mm/ksm.c b/mm/ksm.c
index c6a6e1ef581d..b499f3240fc6 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -3120,7 +3120,7 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
return folio; /* no need to copy it */
} else if (!anon_vma) {
return folio; /* no need to copy it */
- } else if (folio->index == linear_page_index(vma, addr) &&
+ } else if (folio->index == linear_virt_page_index(vma, addr) &&
anon_vma->root == vma->anon_vma->root) {
return folio; /* still no need to copy it */
}
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index eff619180e84..3d90fd4178d2 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -356,7 +356,7 @@ unsigned long page_mapped_in_vma(const struct page *page,
};
if (folio_test_anon(folio))
- pvmw.address = vma_address(vma, pgoff, 1);
+ pvmw.address = vma_anon_address(vma, pgoff, 1);
else
pvmw.address = vma_filebacked_address(vma, pgoff, 1);
if (pvmw.address == -EFAULT)
diff --git a/mm/rmap.c b/mm/rmap.c
index a3e926a708b1..03c9ee92acc0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -866,7 +866,7 @@ unsigned long page_address_in_vma(const struct folio *folio,
vma->anon_vma->root != anon_vma->root)
return -EFAULT;
/* KSM folios don't reach here because of the !anon_vma check */
- return vma_address(vma, page_pgoff(folio, page), 1);
+ return vma_anon_address(vma, page_pgoff(folio, page), 1);
} else if (!vma->vm_file) {
return -EFAULT;
} else if (vma->vm_file->f_mapping != folio->mapping) {
@@ -1486,7 +1486,7 @@ static void __folio_set_anon(struct folio *folio, struct vm_area_struct *vma,
*/
anon_vma = (void *) anon_vma + FOLIO_MAPPING_ANON;
WRITE_ONCE(folio->mapping, (struct address_space *) anon_vma);
- folio->index = linear_page_index(vma, address);
+ folio->index = linear_virt_page_index(vma, address);
}
/**
@@ -1513,8 +1513,8 @@ static void __page_check_anon_rmap(const struct folio *folio,
*/
VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root,
folio);
- VM_BUG_ON_PAGE(page_pgoff(folio, page) != linear_page_index(vma, address),
- page);
+ VM_BUG_ON_PAGE(page_pgoff(folio, page) !=
+ linear_virt_page_index(vma, address), page);
}
static __always_inline void __folio_add_anon_rmap(struct folio *folio,
@@ -2992,10 +2992,10 @@ static void rmap_walk_anon(struct folio *folio,
pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
anon_vma_interval_tree_foreach(avc, anon_vma, pgoff_start, pgoff_end) {
struct vm_area_struct *vma = avc->vma;
- unsigned long address = vma_address(vma, pgoff_start,
+ const unsigned long address = vma_anon_address(vma, pgoff_start,
folio_nr_pages(folio));
- VM_BUG_ON_VMA(address == -EFAULT, vma);
+ VM_WARN_ON_ONCE_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
diff --git a/mm/vma.c b/mm/vma.c
index c4bb41400751..cda263d92694 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -233,6 +233,8 @@ static bool can_vma_merge_before(struct vma_merge_struct *vmg)
return false;
if (vmg_end_pgoff(vmg) != vma_start_pgoff(vmg->next))
return false;
+ if (vmg_end_anon_pgoff(vmg) != vma_start_anon_pgoff(vmg->next))
+ return false;
return true;
}
@@ -253,6 +255,8 @@ static bool can_vma_merge_after(struct vma_merge_struct *vmg)
return false;
if (vma_end_pgoff(vmg->prev) != vmg_start_pgoff(vmg))
return false;
+ if (vma_end_anon_pgoff(vmg->prev) != vmg_start_anon_pgoff(vmg))
+ return false;
return true;
}
@@ -1991,7 +1995,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
*vmap = vma = new_vma;
}
*need_rmap_locks =
- (vma_start_pgoff(new_vma) <= vma_start_pgoff(vma));
+ (vma_start_pgoff(new_vma) <= vma_start_pgoff(vma)) ||
+ (vma_start_anon_pgoff(new_vma) <= vma_start_anon_pgoff(vma));
} else {
new_vma = vm_area_dup(vma);
if (!new_vma)
@@ -2062,7 +2067,12 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
if (!vma_flags_empty(&diff))
return false;
/* Page offset must align. */
- return vma_end_pgoff(a) == vma_start_pgoff(b);
+ if (vma_end_pgoff(a) != vma_start_pgoff(b))
+ return false;
+ /* Anon page offset must align. */
+ if (vma_end_anon_pgoff(a) != vma_start_anon_pgoff(b))
+ return false;
+ return true;
}
/*
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [RFC PATCH 10/10] tools/testing/vma: expand VMA merge tests to assert virt pgoff
2026-06-29 15:03 [RFC PATCH 00/10] mm/rmap: index MAP_PRIVATE file-backed folios by virt pgoff Lorenzo Stoakes
` (8 preceding siblings ...)
2026-06-29 15:03 ` [RFC PATCH 09/10] mm/rmap: use virt pgoff for MAP_PRIVATE file-backed anon folios Lorenzo Stoakes
@ 2026-06-29 15:03 ` Lorenzo Stoakes
9 siblings, 0 replies; 11+ messages in thread
From: Lorenzo Stoakes @ 2026-06-29 15:03 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Matthew Wilcox,
Jan Kara, Rik van Riel, Harry Yoo, Jann Horn, Zi Yan, Baolin Wang,
Nico Pache, Ryan Roberts, Dev Jain, Barry Song, Lance Yang,
Xu Xin, Chengming Zhou, Miaohe Lin, Naoya Horiguchi,
Matthew Brost, Joshua Hahn, Rakie Kim, Byungchul Park,
Gregory Price, Ying Huang, Alistair Popple, Pedro Falcato,
Peter Xu, Kees Cook, linux-mm, linux-kernel, linux-fsdevel
Now we have introduced the VMA virtual page offset attribute and update it
when VMAs are manipulated, update VMA merge tests to assert that the
virtual page offset is as expected.
Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
---
tools/testing/vma/tests/merge.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/tools/testing/vma/tests/merge.c b/tools/testing/vma/tests/merge.c
index ed8fa0d7da97..ca415052fd8c 100644
--- a/tools/testing/vma/tests/merge.c
+++ b/tools/testing/vma/tests/merge.c
@@ -121,6 +121,7 @@ static bool test_simple_merge(void)
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x3000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
ASSERT_FLAGS_SAME_MASK(&vma->flags, vma_flags);
detach_free_vma(vma);
@@ -153,6 +154,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_start, 0x1000);
ASSERT_EQ(vma->vm_end, 0x2000);
ASSERT_EQ(vma_start_pgoff(vma), 1);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 1);
/*
* Now walk through the three split VMAs and make sure they are as
@@ -165,6 +167,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x1000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
detach_free_vma(vma);
vma_iter_clear(&vmi);
@@ -174,6 +177,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_start, 0x1000);
ASSERT_EQ(vma->vm_end, 0x2000);
ASSERT_EQ(vma_start_pgoff(vma), 1);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 1);
detach_free_vma(vma);
vma_iter_clear(&vmi);
@@ -183,6 +187,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_start, 0x2000);
ASSERT_EQ(vma->vm_end, 0x3000);
ASSERT_EQ(vma_start_pgoff(vma), 2);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 2);
detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
@@ -212,6 +217,7 @@ static bool test_simple_expand(void)
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x3000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
@@ -234,6 +240,7 @@ static bool test_simple_shrink(void)
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x1000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
@@ -346,6 +353,7 @@ static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky,
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x5000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 3);
@@ -367,6 +375,7 @@ static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky,
ASSERT_EQ(vma->vm_start, 0x6000);
ASSERT_EQ(vma->vm_end, 0x9000);
ASSERT_EQ(vma_start_pgoff(vma), 6);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 6);
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 3);
@@ -387,6 +396,7 @@ static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky,
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x9000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
@@ -407,6 +417,7 @@ static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky,
ASSERT_EQ(vma->vm_start, 0xa000);
ASSERT_EQ(vma->vm_end, 0xc000);
ASSERT_EQ(vma_start_pgoff(vma), 0xa);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0xa);
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
@@ -426,6 +437,7 @@ static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky,
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0xc000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 1);
@@ -446,6 +458,7 @@ static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky,
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0xc000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
detach_free_vma(vma);
@@ -808,6 +821,7 @@ static bool test_vma_merge_new_with_close(void)
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x5000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
ASSERT_EQ(vma->vm_ops, &vm_ops);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
@@ -868,6 +882,7 @@ static bool __test_merge_existing(bool prev_is_sticky, bool middle_is_sticky, bo
ASSERT_EQ(vma->vm_start, 0x2000);
ASSERT_EQ(vma->vm_end, 0x3000);
ASSERT_EQ(vma_start_pgoff(vma), 2);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 2);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_TRUE(vma_write_started(vma_next));
ASSERT_EQ(mm.map_count, 2);
@@ -934,6 +949,7 @@ static bool __test_merge_existing(bool prev_is_sticky, bool middle_is_sticky, bo
ASSERT_EQ(vma->vm_start, 0x6000);
ASSERT_EQ(vma->vm_end, 0x7000);
ASSERT_EQ(vma_start_pgoff(vma), 6);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 6);
ASSERT_TRUE(vma_write_started(vma_prev));
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
@@ -1419,6 +1435,7 @@ static bool test_merge_extend(void)
ASSERT_EQ(vma->vm_start, 0);
ASSERT_EQ(vma->vm_end, 0x4000);
ASSERT_EQ(vma_start_pgoff(vma), 0);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 0);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 1);
@@ -1459,6 +1476,7 @@ static bool test_expand_only_mode(void)
ASSERT_EQ(vma->vm_start, 0x3000);
ASSERT_EQ(vma->vm_end, 0x9000);
ASSERT_EQ(vma_start_pgoff(vma), 3);
+ ASSERT_EQ(vma_start_virt_pgoff(vma), 3);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(vma_iter_addr(&vmi), 0x3000);
vma_assert_attached(vma);
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread