From: Peter Xu <peterx@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Mike Kravetz <mike.kravetz@oracle.com>,
Nadav Amit <nadav.amit@gmail.com>,
Matthew Wilcox <willy@infradead.org>,
Mike Rapoport <rppt@linux.vnet.ibm.com>,
David Hildenbrand <david@redhat.com>,
Hugh Dickins <hughd@google.com>,
Jerome Glisse <jglisse@redhat.com>,
"Kirill A . Shutemov" <kirill@shutemov.name>,
Andrea Arcangeli <aarcange@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Axel Rasmussen <axelrasmussen@google.com>,
Alistair Popple <apopple@nvidia.com>,
peterx@redhat.com
Subject: [PATCH v8 07/23] mm/shmem: Persist uffd-wp bit across zapping for file-backed
Date: Mon, 4 Apr 2022 21:48:47 -0400 [thread overview]
Message-ID: <20220405014847.14295-1-peterx@redhat.com> (raw)
In-Reply-To: <20220405014646.13522-1-peterx@redhat.com>
File-backed memory is prone to being unmapped at any time. It means all
information in the pte will be dropped, including the uffd-wp flag.
To persist the uffd-wp flag, we'll use the pte markers. This patch teaches the
zap code to understand uffd-wp and know when to keep or drop the uffd-wp bit.
Add a new flag ZAP_FLAG_DROP_MARKER and set it in zap_details when we don't
want to persist such an information, for example, when destroying the whole
vma, or punching a hole in a shmem file. For the rest cases we should never
drop the uffd-wp bit, or the wr-protect information will get lost.
The new ZAP_FLAG_DROP_MARKER needs to be put into mm.h rather than memory.c
because it'll be further referenced in hugetlb files later.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/linux/mm.h | 10 ++++++++
include/linux/mm_inline.h | 43 ++++++++++++++++++++++++++++++++++
mm/memory.c | 49 ++++++++++++++++++++++++++++++++++++---
mm/rmap.c | 8 +++++++
4 files changed, 107 insertions(+), 3 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 26428ff262fc..857bc8f7af45 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3422,4 +3422,14 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
}
#endif
+typedef unsigned int __bitwise zap_flags_t;
+
+/*
+ * Whether to drop the pte markers, for example, the uffd-wp information for
+ * file-backed memory. This should only be specified when we will completely
+ * drop the page in the mm, either by truncation or unmapping of the vma. By
+ * default, the flag is not set.
+ */
+#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index ac32125745ab..7b25b53c474a 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -6,6 +6,8 @@
#include <linux/huge_mm.h>
#include <linux/swap.h>
#include <linux/string.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/swapops.h>
/**
* folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
@@ -316,5 +318,46 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
return atomic_read(&mm->tlb_flush_pending) > 1;
}
+/*
+ * If this pte is wr-protected by uffd-wp in any form, arm the special pte to
+ * replace a none pte. NOTE! This should only be called when *pte is already
+ * cleared so we will never accidentally replace something valuable. Meanwhile
+ * none pte also means we are not demoting the pte so tlb flushed is not needed.
+ * E.g., when pte cleared the caller should have taken care of the tlb flush.
+ *
+ * Must be called with pgtable lock held so that no thread will see the none
+ * pte, and if they see it, they'll fault and serialize at the pgtable lock.
+ *
+ * This function is a no-op if PTE_MARKER_UFFD_WP is not enabled.
+ */
+static inline void
+pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *pte, pte_t pteval)
+{
+#ifdef CONFIG_PTE_MARKER_UFFD_WP
+ bool arm_uffd_pte = false;
+
+ /* The current status of the pte should be "cleared" before calling */
+ WARN_ON_ONCE(!pte_none(*pte));
+
+ if (vma_is_anonymous(vma) || !userfaultfd_wp(vma))
+ return;
+
+ /* A uffd-wp wr-protected normal pte */
+ if (unlikely(pte_present(pteval) && pte_uffd_wp(pteval)))
+ arm_uffd_pte = true;
+
+ /*
+ * A uffd-wp wr-protected swap pte. Note: this should even cover an
+ * existing pte marker with uffd-wp bit set.
+ */
+ if (unlikely(pte_swp_uffd_wp_any(pteval)))
+ arm_uffd_pte = true;
+
+ if (unlikely(arm_uffd_pte))
+ set_pte_at(vma->vm_mm, addr, pte,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+#endif
+}
#endif
diff --git a/mm/memory.c b/mm/memory.c
index 21abb8a30553..1144845ff734 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -74,6 +74,7 @@
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
#include <trace/events/kmem.h>
@@ -1306,6 +1307,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
struct zap_details {
struct folio *single_folio; /* Locked folio to be unmapped */
bool even_cows; /* Zap COWed private pages too? */
+ zap_flags_t zap_flags; /* Extra flags for zapping */
};
/* Whether we should zap all COWed (private) pages too */
@@ -1334,6 +1336,29 @@ static inline bool should_zap_page(struct zap_details *details, struct page *pag
return !PageAnon(page);
}
+static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
+{
+ if (!details)
+ return false;
+
+ return details->zap_flags & ZAP_FLAG_DROP_MARKER;
+}
+
+/*
+ * This function makes sure that we'll replace the none pte with an uffd-wp
+ * swap special pte marker when necessary. Must be with the pgtable lock held.
+ */
+static inline void
+zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte,
+ struct zap_details *details, pte_t pteval)
+{
+ if (zap_drop_file_uffd_wp(details))
+ return;
+
+ pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
+}
+
static unsigned long zap_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
@@ -1371,6 +1396,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
+ zap_install_uffd_wp_if_needed(vma, addr, pte, details,
+ ptent);
if (unlikely(!page))
continue;
@@ -1401,6 +1428,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
page = pfn_swap_entry_to_page(entry);
if (unlikely(!should_zap_page(details, page)))
continue;
+ /*
+ * Both device private/exclusive mappings should only
+ * work with anonymous page so far, so we don't need to
+ * consider uffd-wp bit when zap. For more information,
+ * see zap_install_uffd_wp_if_needed().
+ */
+ WARN_ON_ONCE(!vma_is_anonymous(vma));
rss[mm_counter(page)]--;
if (is_device_private_entry(entry))
page_remove_rmap(page, vma, false);
@@ -1417,8 +1451,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
if (!should_zap_page(details, page))
continue;
rss[mm_counter(page)]--;
- } else if (is_pte_marker_entry(entry)) {
- /* By default, simply drop all pte markers when zap */
+ } else if (pte_marker_entry_uffd_wp(entry)) {
+ /* Only drop the uffd-wp marker if explicitly requested */
+ if (!zap_drop_file_uffd_wp(details))
+ continue;
} else if (is_hwpoison_entry(entry)) {
if (!should_zap_cows(details))
continue;
@@ -1427,6 +1463,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
WARN_ON_ONCE(1);
}
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+ zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
} while (pte++, addr += PAGE_SIZE, addr != end);
add_mm_rss_vec(mm, rss);
@@ -1637,12 +1674,17 @@ void unmap_vmas(struct mmu_gather *tlb,
unsigned long end_addr)
{
struct mmu_notifier_range range;
+ struct zap_details details = {
+ .zap_flags = ZAP_FLAG_DROP_MARKER,
+ /* Careful - we need to zap private pages too! */
+ .even_cows = true,
+ };
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
start_addr, end_addr);
mmu_notifier_invalidate_range_start(&range);
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
- unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
+ unmap_single_vma(tlb, vma, start_addr, end_addr, &details);
mmu_notifier_invalidate_range_end(&range);
}
@@ -3438,6 +3480,7 @@ void unmap_mapping_folio(struct folio *folio)
details.even_cows = false;
details.single_folio = folio;
+ details.zap_flags = ZAP_FLAG_DROP_MARKER;
i_mmap_lock_read(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
diff --git a/mm/rmap.c b/mm/rmap.c
index 208b2c683cec..69416072b1a6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -73,6 +73,7 @@
#include <linux/page_idle.h>
#include <linux/memremap.h>
#include <linux/userfaultfd_k.h>
+#include <linux/mm_inline.h>
#include <asm/tlbflush.h>
@@ -1538,6 +1539,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
pteval = ptep_clear_flush(vma, address, pvmw.pte);
}
+ /*
+ * Now the pte is cleared. If this pte was uffd-wp armed,
+ * we may want to replace a none pte with a marker pte if
+ * it's file-backed, so we don't lose the tracking info.
+ */
+ pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
+
/* Set the dirty flag on the folio now the pte is gone. */
if (pte_dirty(pteval))
folio_mark_dirty(folio);
--
2.32.0
next prev parent reply other threads:[~2022-04-05 1:53 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-05 1:46 [PATCH v8 00/23] userfaultfd-wp: Support shmem and hugetlbfs Peter Xu
2022-04-05 1:46 ` [PATCH v8 01/23] mm: Introduce PTE_MARKER swap entry Peter Xu
2022-04-12 1:07 ` Alistair Popple
2022-04-12 19:45 ` Peter Xu
2022-04-13 0:30 ` Alistair Popple
2022-04-13 13:44 ` Peter Xu
2022-04-19 8:25 ` Alistair Popple
2022-04-19 19:44 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 02/23] mm: Teach core mm about pte markers Peter Xu
2022-04-12 1:22 ` Alistair Popple
2022-04-12 19:53 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 03/23] mm: Check against orig_pte for finish_fault() Peter Xu
2022-04-12 2:05 ` Alistair Popple
2022-04-12 19:54 ` Peter Xu
[not found] ` <CGME20220413140330eucas1p167da41e079712b829ef8237dc27b049c@eucas1p1.samsung.com>
2022-04-13 14:03 ` Marek Szyprowski
2022-04-13 16:43 ` Peter Xu
2022-04-14 7:51 ` Marek Szyprowski
2022-04-14 16:30 ` Peter Xu
2022-04-14 20:57 ` Andrew Morton
2022-04-14 21:08 ` Peter Xu
2022-04-15 14:21 ` Guenter Roeck
2022-04-15 14:41 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 04/23] mm/uffd: PTE_MARKER_UFFD_WP Peter Xu
2022-04-06 1:41 ` kernel test robot
2022-04-05 1:48 ` [PATCH v8 05/23] mm/shmem: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2022-04-05 1:48 ` [PATCH v8 06/23] mm/shmem: Handle uffd-wp special pte in page fault handler Peter Xu
2022-05-11 16:30 ` David Hildenbrand
2022-05-12 16:34 ` Peter Xu
2022-04-05 1:48 ` Peter Xu [this message]
2022-04-05 1:48 ` [PATCH v8 08/23] mm/shmem: Allow uffd wr-protect none pte for file-backed mem Peter Xu
2022-04-05 1:48 ` [PATCH v8 09/23] mm/shmem: Allows file-back mem to be uffd wr-protected on thps Peter Xu
2022-04-05 1:48 ` [PATCH v8 10/23] mm/shmem: Handle uffd-wp during fork() Peter Xu
2022-04-06 6:16 ` kernel test robot
2022-04-06 12:18 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 11/23] mm/hugetlb: Introduce huge pte version of uffd-wp helpers Peter Xu
2022-04-05 1:49 ` [PATCH v8 12/23] mm/hugetlb: Hook page faults for uffd write protection Peter Xu
2022-04-05 1:49 ` [PATCH v8 13/23] mm/hugetlb: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2022-04-05 1:49 ` [PATCH v8 14/23] mm/hugetlb: Handle UFFDIO_WRITEPROTECT Peter Xu
2022-04-05 1:49 ` [PATCH v8 15/23] mm/hugetlb: Handle pte markers in page faults Peter Xu
2022-04-06 13:37 ` kernel test robot
2022-04-06 15:02 ` Peter Xu
2022-04-05 1:49 ` [PATCH v8 16/23] mm/hugetlb: Allow uffd wr-protect none ptes Peter Xu
2022-04-05 1:49 ` [PATCH v8 17/23] mm/hugetlb: Only drop uffd-wp special pte if required Peter Xu
2022-04-05 1:49 ` [PATCH v8 18/23] mm/hugetlb: Handle uffd-wp during fork() Peter Xu
2022-04-05 1:49 ` [PATCH v8 19/23] mm/khugepaged: Don't recycle vma pgtable if uffd-wp registered Peter Xu
2022-04-05 1:49 ` [PATCH v8 20/23] mm/pagemap: Recognize uffd-wp bit for shmem/hugetlbfs Peter Xu
2022-04-05 1:49 ` [PATCH v8 21/23] mm/uffd: Enable write protection for shmem & hugetlbfs Peter Xu
2022-04-05 1:49 ` [PATCH v8 22/23] mm: Enable PTE markers by default Peter Xu
2022-04-19 15:13 ` Johannes Weiner
2022-04-19 19:59 ` Peter Xu
2022-04-19 20:14 ` Johannes Weiner
2022-04-19 20:28 ` Peter Xu
2022-04-19 21:24 ` Johannes Weiner
2022-04-19 22:01 ` Peter Xu
2022-04-20 13:46 ` Johannes Weiner
2022-04-20 14:25 ` Peter Xu
2022-04-05 1:49 ` [PATCH v8 23/23] selftests/uffd: Enable uffd-wp for shmem/hugetlbfs Peter Xu
2022-04-05 22:16 ` [PATCH v8 00/23] userfaultfd-wp: Support shmem and hugetlbfs Andrew Morton
2022-04-05 22:42 ` Peter Xu
2022-04-05 22:49 ` Andrew Morton
2022-04-05 23:02 ` Peter Xu
2022-04-05 23:08 ` Andrew Morton
2022-05-10 19:05 ` Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220405014847.14295-1-peterx@redhat.com \
--to=peterx@redhat.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=axelrasmussen@google.com \
--cc=david@redhat.com \
--cc=hughd@google.com \
--cc=jglisse@redhat.com \
--cc=kirill@shutemov.name \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
--cc=nadav.amit@gmail.com \
--cc=rppt@linux.vnet.ibm.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).