public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed
* [PATCH] mm/shmem: use invalidate_lock to fix hole-punch race
@ 2026-03-26 16:26 Gregory Price
  2026-03-26 17:07 ` Pedro Falcato
  2026-03-26 19:21 ` Matthew Wilcox
  0 siblings, 2 replies; 8+ messages in thread
From: Gregory Price @ 2026-03-26 16:26 UTC (permalink / raw)
  To: linux-mm, akpm, hughd
  Cc: david, ljs, Liam.Howlett, vbabka, rppt, surenb, mhocko,
	baolin.wang, linux-kernel, kernel-team, stable

Inflating a VM's balloon while vhost-user-net fork+exec's a helper
triggers "still mapped when deleted" on the memfd backing guest RAM:

  BUG: Bad page cache in process __balloon  pfn:6520704
  page dumped because: still mapped when deleted
  ...
  shmem_undo_range+0x3fa/0x570
  shmem_fallocate+0x366/0x4d0
  vfs_fallocate+0x13c/0x310

This BUG also resulted in guests seeing stale mappings backed by a
zeroed page, causing guest kernel panics.  I was unable to trace that
specific interaction, but it appears to be related to THP splitting.

Two races allow PTEs to be re-installed for a folio that fallocate
is about to remove from page cache:

Race 1 — fault-around (filemap_map_pages):

  fallocate              fault-around           fork
  --------               ------------           ----
  set i_private
  unmap_mapping_range()
  # zaps PTEs
                       filemap_map_pages()
                        # re-maps folio!
                                              dup_mmap()
                                              # child VMA
                                              # in tree
  shmem_undo_range()
    lock folio
    unmap_mapping_folio()
    # child VMA:
    #   no PTE, skip
                                            copy_page_range()
                                              # copies PTE
    # parent VMA:
    #   zaps PTE
  filemap_remove_folio()
    # mapcount=1, BUG!

filemap_map_pages() is called directly as .map_pages, bypassing
shmem_fault()'s i_private synchronization.

Race 2 — shmem_fault TOCTOU:

  fallocate                   shmem_fault
  --------                    -----------
                            check i_private → NULL
  set i_private
  unmap_mapping_range()
  # zaps PTEs
                            shmem_get_folio_gfp()
                              # finds folio in cache
                            finish_fault()
                              # installs PTE
  shmem_undo_range()
    truncate_inode_folio()
      # mapcount=1, BUG!

Fix both races with invalidate_lock.

This matches the existing pattern used by secretmem_fault(),
udf_page_mkwrite(), and zonefs_filemap_page_mkwrite(), all of
which take invalidate_lock shared under mmap_lock in their
fault handlers.

This also requires removing the rcu_read_lock() from
do_fault_around() so that .map_pages may use sleeping locks.

The outer rcu_read_lock is redundant for all in-tree .map_pages
implementations: every one either IS filemap_map_pages (which
takes rcu_read_lock) or is a thin wrapper around it.

Fixes: d7c1755179b8 ("mm: implement ->map_pages for shmem/tmpfs")
Cc: stable@vger.kernel.org
Signed-off-by: Gregory Price <gourry@gourry.net>
---
 mm/memory.c |  2 --
 mm/shmem.c  | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index e44469f9cf65..838583591fdf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5900,11 +5900,9 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			return VM_FAULT_OOM;
 	}
 
-	rcu_read_lock();
 	ret = vmf->vma->vm_ops->map_pages(vmf,
 			vmf->pgoff + from_pte - pte_off,
 			vmf->pgoff + to_pte - pte_off);
-	rcu_read_unlock();
 
 	return ret;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 4ecefe02881d..5c654b86f3cf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2731,7 +2731,8 @@ static vm_fault_t shmem_falloc_wait(struct vm_fault *vmf, struct inode *inode)
 static vm_fault_t shmem_fault(struct vm_fault *vmf)
 {
 	struct inode *inode = file_inode(vmf->vma->vm_file);
-	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+	struct address_space *mapping = inode->i_mapping;
+	gfp_t gfp = mapping_gfp_mask(mapping);
 	struct folio *folio = NULL;
 	vm_fault_t ret = 0;
 	int err;
@@ -2747,8 +2748,15 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 	}
 
 	WARN_ON_ONCE(vmf->page != NULL);
+	/*
+	 * shmem_fallocate(PUNCH_HOLE) holds invalidate_lock exclusive across
+	 * unmap+truncate.  Take it shared here so shmem_fault cannot obtain
+	 * a folio in the process of being punched.
+	 */
+	filemap_invalidate_lock_shared(mapping);
 	err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, SGP_CACHE,
 				  gfp, vmf, &ret);
+	filemap_invalidate_unlock_shared(mapping);
 	if (err)
 		return vmf_error(err);
 	if (folio) {
@@ -3683,11 +3691,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		inode->i_private = &shmem_falloc;
 		spin_unlock(&inode->i_lock);
 
+		filemap_invalidate_lock(mapping);
 		if ((u64)unmap_end > (u64)unmap_start)
 			unmap_mapping_range(mapping, unmap_start,
 					    1 + unmap_end - unmap_start, 0);
 		shmem_truncate_range(inode, offset, offset + len - 1);
 		/* No need to unmap again: hole-punching leaves COWed pages */
+		filemap_invalidate_unlock(mapping);
 
 		spin_lock(&inode->i_lock);
 		inode->i_private = NULL;
@@ -5268,9 +5278,26 @@ static const struct super_operations shmem_ops = {
 #endif
 };
 
+/*
+ * shmem_fallocate(PUNCH_HOLE) holds invalidate_lock for write across
+ * unmap+truncate.  Take it for read here so fault-around cannot re-map
+ * pages being punched.
+ */
+static vm_fault_t shmem_map_pages(struct vm_fault *vmf,
+				  pgoff_t start_pgoff, pgoff_t end_pgoff)
+{
+	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+	vm_fault_t ret;
+
+	filemap_invalidate_lock_shared(mapping);
+	ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
+	filemap_invalidate_unlock_shared(mapping);
+	return ret;
+}
+
 static const struct vm_operations_struct shmem_vm_ops = {
 	.fault		= shmem_fault,
-	.map_pages	= filemap_map_pages,
+	.map_pages	= shmem_map_pages,
 #ifdef CONFIG_NUMA
 	.set_policy     = shmem_set_policy,
 	.get_policy     = shmem_get_policy,
@@ -5282,7 +5309,7 @@ static const struct vm_operations_struct shmem_vm_ops = {
 
 static const struct vm_operations_struct shmem_anon_vm_ops = {
 	.fault		= shmem_fault,
-	.map_pages	= filemap_map_pages,
+	.map_pages	= shmem_map_pages,
 #ifdef CONFIG_NUMA
 	.set_policy     = shmem_set_policy,
 	.get_policy     = shmem_get_policy,
-- 
2.53.0



^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-03-27  4:35 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-26 16:26 [PATCH] mm/shmem: use invalidate_lock to fix hole-punch race Gregory Price
2026-03-26 17:07 ` Pedro Falcato
2026-03-26 18:37   ` Gregory Price
2026-03-26 19:16     ` Pedro Falcato
2026-03-26 19:48       ` Gregory Price
2026-03-27  4:35       ` Gregory Price
2026-03-26 19:21 ` Matthew Wilcox
2026-03-26 20:09   ` Gregory Price

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox