From: Mike Rapoport <rppt@kernel.org>
To: linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Axel Rasmussen <axelrasmussen@google.com>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
David Hildenbrand <david@redhat.com>,
Hugh Dickins <hughd@google.com>,
James Houghton <jthoughton@google.com>,
"Liam R. Howlett" <Liam.Howlett@oracle.com>,
Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>,
Nikita Kalyazin <kalyazin@amazon.com>,
Paolo Bonzini <pbonzini@redhat.com>, Peter Xu <peterx@redhat.com>,
Sean Christopherson <seanjc@google.com>,
Shuah Khan <shuah@kernel.org>,
Suren Baghdasaryan <surenb@google.com>,
Vlastimil Babka <vbabka@suse.cz>,
linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
linux-kselftest@vger.kernel.org,
"David Hildenbrand (Red Hat)" <david@kernel.org>
Subject: [PATCH v3 2/5] userfaultfd, shmem: use a VMA callback to handle UFFDIO_CONTINUE
Date: Sun, 30 Nov 2025 13:18:09 +0200 [thread overview]
Message-ID: <20251130111812.699259-3-rppt@kernel.org> (raw)
In-Reply-To: <20251130111812.699259-1-rppt@kernel.org>
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
When userspace resolves a page fault in a shmem VMA with UFFDIO_CONTINUE
it needs to get a folio that already exists in the pagecache backing
that VMA.
Instead of using shmem_get_folio() for that, add a get_folio_noalloc()
method to 'struct vm_operations_struct' that will return a folio if it
exists in the VMA's pagecache at given pgoff.
Implement get_folio_noalloc() method for shmem and slightly refactor
userfaultfd's mfill_atomic() and mfill_atomic_pte_continue() to support
this new API.
Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/mm.h | 9 ++++++++
mm/shmem.c | 18 ++++++++++++++++
mm/userfaultfd.c | 52 +++++++++++++++++++++++++++++-----------------
3 files changed, 60 insertions(+), 19 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7c79b3369b82..6351a9cde360 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -690,6 +690,15 @@ struct vm_operations_struct {
struct page *(*find_normal_page)(struct vm_area_struct *vma,
unsigned long addr);
#endif /* CONFIG_FIND_NORMAL_PAGE */
+#ifdef CONFIG_USERFAULTFD
+ /*
+ * Called by userfault to resolve UFFDIO_CONTINUE request.
+ * Should return the folio found at pgoff in the VMA's pagecache if it
+ * exists or ERR_PTR otherwise.
+ * The returned folio is locked and with reference held.
+ */
+ struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t pgoff);
+#endif
};
#ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/shmem.c b/mm/shmem.c
index 5a3f0f754dc0..9f8c54ad0e32 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3262,6 +3262,18 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
shmem_inode_unacct_blocks(inode, 1);
return ret;
}
+
+static struct folio *shmem_get_folio_noalloc(struct inode *inode, pgoff_t pgoff)
+{
+ struct folio *folio;
+ int err;
+
+ err = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
+ if (err)
+ return ERR_PTR(err);
+
+ return folio;
+}
#endif /* CONFIG_USERFAULTFD */
#ifdef CONFIG_TMPFS
@@ -5294,6 +5306,9 @@ static const struct vm_operations_struct shmem_vm_ops = {
.set_policy = shmem_set_policy,
.get_policy = shmem_get_policy,
#endif
+#ifdef CONFIG_USERFAULTFD
+ .get_folio_noalloc = shmem_get_folio_noalloc,
+#endif
};
static const struct vm_operations_struct shmem_anon_vm_ops = {
@@ -5303,6 +5318,9 @@ static const struct vm_operations_struct shmem_anon_vm_ops = {
.set_policy = shmem_set_policy,
.get_policy = shmem_get_policy,
#endif
+#ifdef CONFIG_USERFAULTFD
+ .get_folio_noalloc = shmem_get_folio_noalloc,
+#endif
};
int shmem_init_fs_context(struct fs_context *fc)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 8dc964389b0d..5610f29dac73 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -388,15 +388,12 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
struct page *page;
int ret;
- ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
+ folio = dst_vma->vm_ops->get_folio_noalloc(inode, pgoff);
/* Our caller expects us to return -EFAULT if we failed to find folio */
- if (ret == -ENOENT)
- ret = -EFAULT;
- if (ret)
- goto out;
- if (!folio) {
- ret = -EFAULT;
- goto out;
+ if (IS_ERR_OR_NULL(folio)) {
+ if (PTR_ERR(folio) == -ENOENT || !folio)
+ return -EFAULT;
+ return PTR_ERR(folio);
}
page = folio_file_page(folio, pgoff);
@@ -411,13 +408,12 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
goto out_release;
folio_unlock(folio);
- ret = 0;
-out:
- return ret;
+ return 0;
+
out_release:
folio_unlock(folio);
folio_put(folio);
- goto out;
+ return ret;
}
/* Handles UFFDIO_POISON for all non-hugetlb VMAs. */
@@ -694,6 +690,15 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
return err;
}
+static __always_inline bool vma_can_mfill_atomic(struct vm_area_struct *vma,
+ uffd_flags_t flags)
+{
+ if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
+ return vma->vm_ops && vma->vm_ops->get_folio_noalloc;
+
+ return vma_is_anonymous(vma) || vma_is_shmem(vma);
+}
+
static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
unsigned long dst_start,
unsigned long src_start,
@@ -766,10 +771,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
return mfill_atomic_hugetlb(ctx, dst_vma, dst_start,
src_start, len, flags);
- if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
- goto out_unlock;
- if (!vma_is_shmem(dst_vma) &&
- uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
+ if (!vma_can_mfill_atomic(dst_vma, flags))
goto out_unlock;
while (src_addr < src_start + len) {
@@ -1985,9 +1987,21 @@ bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags,
if (vma->vm_flags & VM_DROPPABLE)
return false;
- if ((vm_flags & VM_UFFD_MINOR) &&
- (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
- return false;
+ if (vm_flags & VM_UFFD_MINOR) {
+ /*
+ * If only MINOR mode is requested and we can request an
+ * existing folio from VMA's page cache, allow it
+ */
+ if (vm_flags == VM_UFFD_MINOR && vma->vm_ops &&
+ vma->vm_ops->get_folio_noalloc)
+ return true;
+ /*
+ * Only hugetlb and shmem can support MINOR mode in combination
+ * with other modes
+ */
+ if (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))
+ return false;
+ }
/*
* If wp async enabled, and WP is the only mode enabled, allow any
--
2.51.0
next prev parent reply other threads:[~2025-11-30 11:18 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-30 11:18 [PATCH v3 0/5] mm, kvm: add guest_memfd support for uffd minor faults Mike Rapoport
2025-11-30 11:18 ` [PATCH v3 1/5] userfaultfd: move vma_can_userfault out of line Mike Rapoport
2025-11-30 11:18 ` Mike Rapoport [this message]
2025-11-30 11:18 ` [PATCH v3 3/5] mm: introduce VM_FAULT_UFFD_MINOR fault reason Mike Rapoport
2025-12-01 8:59 ` David Hildenbrand (Red Hat)
2025-11-30 11:18 ` [PATCH v3 4/5] guest_memfd: add support for userfaultfd minor mode Mike Rapoport
2025-12-01 9:12 ` David Hildenbrand (Red Hat)
2025-12-01 13:39 ` Nikita Kalyazin
2025-12-01 15:54 ` David Hildenbrand (Red Hat)
2025-12-01 16:48 ` Nikita Kalyazin
2025-12-01 18:35 ` Peter Xu
2025-12-01 20:12 ` Nikita Kalyazin
2025-12-01 20:57 ` Peter Xu
2025-12-02 11:50 ` Nikita Kalyazin
2025-12-02 15:36 ` Peter Xu
2025-12-02 15:59 ` Nikita Kalyazin
2025-12-03 9:23 ` David Hildenbrand (Red Hat)
2025-12-03 10:03 ` Nikita Kalyazin
2025-12-04 17:27 ` Nikita Kalyazin
2025-11-30 11:18 ` [PATCH v3 5/5] KVM: selftests: test userfaultfd minor for guest_memfd Mike Rapoport
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251130111812.699259-3-rppt@kernel.org \
--to=rppt@kernel.org \
--cc=Liam.Howlett@oracle.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=baolin.wang@linux.alibaba.com \
--cc=david@kernel.org \
--cc=david@redhat.com \
--cc=hughd@google.com \
--cc=jthoughton@google.com \
--cc=kalyazin@amazon.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mhocko@suse.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=seanjc@google.com \
--cc=shuah@kernel.org \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.