From: Peter Xu <peterx@redhat.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Axel Rasmussen <axelrasmussen@google.com>,
Nadav Amit <nadav.amit@gmail.com>,
Mike Rapoport <rppt@linux.vnet.ibm.com>,
Hugh Dickins <hughd@google.com>,
Mike Kravetz <mike.kravetz@oracle.com>,
"Kirill A . Shutemov" <kirill@shutemov.name>,
Alistair Popple <apopple@nvidia.com>,
Jerome Glisse <jglisse@redhat.com>,
Matthew Wilcox <willy@infradead.org>,
Andrew Morton <akpm@linux-foundation.org>,
peterx@redhat.com, David Hildenbrand <david@redhat.com>,
Andrea Arcangeli <aarcange@redhat.com>
Subject: [PATCH v6 07/23] mm/shmem: Persist uffd-wp bit across zapping for file-backed
Date: Mon, 15 Nov 2021 15:55:06 +0800 [thread overview]
Message-ID: <20211115075522.73795-8-peterx@redhat.com> (raw)
In-Reply-To: <20211115075522.73795-1-peterx@redhat.com>
File-backed memory is prone to being unmapped at any time. It means all
information in the pte will be dropped, including the uffd-wp flag.
To persist the uffd-wp flag, we'll use the pte markers. This patch teaches the
zap code to understand uffd-wp and know when to keep or drop the uffd-wp bit.
Add a new flag ZAP_FLAG_DROP_MARKER and set it in zap_details when we don't
want to persist such an information, for example, when destroying the whole
vma, or punching a hole in a shmem file. For the rest cases we should never
drop the uffd-wp bit, or the wr-protect information will get lost.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/linux/mm.h | 20 +++++++++++++++++
include/linux/mm_inline.h | 45 +++++++++++++++++++++++++++++++++++++++
mm/memory.c | 38 +++++++++++++++++++++++++++++++--
mm/rmap.c | 8 +++++++
4 files changed, 109 insertions(+), 2 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7e4a9e7d807..015e287063a8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1825,12 +1825,23 @@ static inline bool can_do_mlock(void) { return false; }
extern int user_shm_lock(size_t, struct ucounts *);
extern void user_shm_unlock(size_t, struct ucounts *);
+typedef unsigned int __bitwise zap_flags_t;
+
+/*
+ * Whether to drop the pte markers, for example, the uffd-wp information for
+ * file-backed memory. This should only be specified when we will completely
+ * drop the page in the mm, either by truncation or unmapping of the vma. By
+ * default, the flag is not set.
+ */
+#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
+
/*
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
struct address_space *zap_mapping; /* Check page->mapping if set */
struct page *single_page; /* Locked page to be unmapped */
+ zap_flags_t zap_flags; /* Extra flags for zapping */
};
/*
@@ -1847,6 +1858,15 @@ zap_skip_check_mapping(struct zap_details *details, struct page *page)
(details->zap_mapping != page_rmapping(page));
}
+static inline bool
+zap_drop_file_uffd_wp(struct zap_details *details)
+{
+ if (!details)
+ return false;
+
+ return details->zap_flags & ZAP_FLAG_DROP_MARKER;
+}
+
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e2ec68b0515c..ca861e910938 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -4,6 +4,8 @@
#include <linux/huge_mm.h>
#include <linux/swap.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/swapops.h>
/**
* folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
@@ -135,4 +137,47 @@ static __always_inline void del_page_from_lru_list(struct page *page,
{
lruvec_del_folio(lruvec, page_folio(page));
}
+
+/*
+ * If this pte is wr-protected by uffd-wp in any form, arm the special pte to
+ * replace a none pte. NOTE! This should only be called when *pte is already
+ * cleared so we will never accidentally replace something valuable. Meanwhile
+ * none pte also means we are not demoting the pte so tlb flushed is not needed.
+ * E.g., when pte cleared the caller should have taken care of the tlb flush.
+ *
+ * Must be called with pgtable lock held so that no thread will see the none
+ * pte, and if they see it, they'll fault and serialize at the pgtable lock.
+ *
+ * This function is a no-op if PTE_MARKER_UFFD_WP is not enabled.
+ */
+static inline void
+pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *pte, pte_t pteval)
+{
+#ifdef CONFIG_PTE_MARKER_UFFD_WP
+ bool arm_uffd_pte = false;
+
+ /* The current status of the pte should be "cleared" before calling */
+ WARN_ON_ONCE(!pte_none(*pte));
+
+ if (vma_is_anonymous(vma))
+ return;
+
+ /* A uffd-wp wr-protected normal pte */
+ if (unlikely(pte_present(pteval) && pte_uffd_wp(pteval)))
+ arm_uffd_pte = true;
+
+ /*
+ * A uffd-wp wr-protected swap pte. Note: this should even cover an
+ * existing pte marker with uffd-wp bit set.
+ */
+ if (unlikely(pte_swp_uffd_wp_any(pteval)))
+ arm_uffd_pte = true;
+
+ if (unlikely(arm_uffd_pte))
+ set_pte_at(vma->vm_mm, addr, pte,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+#endif
+}
+
#endif
diff --git a/mm/memory.c b/mm/memory.c
index e8557d43a87d..fef6a91c5dfb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -73,6 +73,7 @@
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
#include <trace/events/kmem.h>
@@ -1306,6 +1307,21 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
return ret;
}
+/*
+ * This function makes sure that we'll replace the none pte with an uffd-wp
+ * swap special pte marker when necessary. Must be with the pgtable lock held.
+ */
+static inline void
+zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte,
+ struct zap_details *details, pte_t pteval)
+{
+ if (zap_drop_file_uffd_wp(details))
+ return;
+
+ pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
+}
+
static unsigned long zap_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
@@ -1343,6 +1359,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
+ zap_install_uffd_wp_if_needed(vma, addr, pte, details,
+ ptent);
if (unlikely(!page))
continue;
@@ -1373,6 +1391,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
page = pfn_swap_entry_to_page(entry);
if (unlikely(zap_skip_check_mapping(details, page)))
continue;
+ /*
+ * Both device private/exclusive mappings should only
+ * work with anonymous page so far, so we don't need to
+ * consider uffd-wp bit when zap. For more information,
+ * see zap_install_uffd_wp_if_needed().
+ */
+ WARN_ON_ONCE(!vma_is_anonymous(vma));
rss[mm_counter(page)]--;
if (is_device_private_entry(entry))
page_remove_rmap(page, false);
@@ -1383,13 +1408,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue;
rss[mm_counter(page)]--;
} else if (is_pte_marker_entry(entry)) {
- /* By default, simply drop all pte markers when zap */
+ /* Currently there's only uffd-wp marker bit */
+ WARN_ON_ONCE(!(pte_marker_get(entry) & PTE_MARKER_UFFD_WP));
+ /* Only drop the uffd-wp marker if explicitly requested */
+ if (!zap_drop_file_uffd_wp(details))
+ continue;
} else if (!non_swap_entry(entry)) {
rss[MM_SWAPENTS]--;
if (unlikely(!free_swap_and_cache(entry)))
print_bad_pte(vma, addr, ptent, NULL);
}
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+ zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
} while (pte++, addr += PAGE_SIZE, addr != end);
add_mm_rss_vec(mm, rss);
@@ -1600,12 +1630,15 @@ void unmap_vmas(struct mmu_gather *tlb,
unsigned long end_addr)
{
struct mmu_notifier_range range;
+ struct zap_details details = {
+ .zap_flags = ZAP_FLAG_DROP_MARKER,
+ };
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
start_addr, end_addr);
mmu_notifier_invalidate_range_start(&range);
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
- unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
+ unmap_single_vma(tlb, vma, start_addr, end_addr, &details);
mmu_notifier_invalidate_range_end(&range);
}
@@ -3350,6 +3383,7 @@ void unmap_mapping_page(struct page *page)
details.zap_mapping = mapping;
details.single_page = page;
+ details.zap_flags = ZAP_FLAG_DROP_MARKER;
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
diff --git a/mm/rmap.c b/mm/rmap.c
index 163ac4e6bcee..89068e957486 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -73,6 +73,7 @@
#include <linux/page_idle.h>
#include <linux/memremap.h>
#include <linux/userfaultfd_k.h>
+#include <linux/mm_inline.h>
#include <asm/tlbflush.h>
@@ -1517,6 +1518,13 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
pteval = ptep_clear_flush(vma, address, pvmw.pte);
}
+ /*
+ * Now the pte is cleared. If this is uffd-wp armed pte, we
+ * may want to replace a none pte with a marker pte if it's
+ * file-backed, so we don't lose the tracking information.
+ */
+ pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
+
/* Move the dirty bit to the page. Now the pte is gone. */
if (pte_dirty(pteval))
set_page_dirty(page);
--
2.32.0
next prev parent reply other threads:[~2021-11-15 7:56 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-15 7:54 [PATCH v6 00/23] userfaultfd-wp: Support shmem and hugetlbfs Peter Xu
2021-11-15 7:55 ` [PATCH v6 01/23] mm: Introduce PTE_MARKER swap entry Peter Xu
2021-12-03 3:30 ` Alistair Popple
2021-12-03 4:21 ` Peter Xu
2021-12-03 5:35 ` Alistair Popple
2021-12-03 6:45 ` Peter Xu
2021-12-07 2:12 ` Alistair Popple
2021-12-07 2:30 ` Peter Xu
2021-11-15 7:55 ` [PATCH v6 02/23] mm: Teach core mm about pte markers Peter Xu
2021-11-15 7:55 ` [PATCH v6 03/23] mm: Check against orig_pte for finish_fault() Peter Xu
2021-12-16 5:01 ` Alistair Popple
2021-12-16 5:38 ` Peter Xu
2021-12-16 5:50 ` Peter Xu
2021-12-16 6:23 ` Alistair Popple
2021-12-16 7:06 ` Peter Xu
2021-12-16 7:45 ` Alistair Popple
2021-12-16 8:04 ` Peter Xu
2021-11-15 7:55 ` [PATCH v6 04/23] mm/uffd: PTE_MARKER_UFFD_WP Peter Xu
2021-12-16 5:18 ` Alistair Popple
2021-12-16 5:45 ` Peter Xu
2021-11-15 7:55 ` [PATCH v6 05/23] mm/shmem: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2021-11-15 7:55 ` [PATCH v6 06/23] mm/shmem: Handle uffd-wp special pte in page fault handler Peter Xu
2021-12-16 5:56 ` Alistair Popple
2021-12-16 6:17 ` Peter Xu
2021-12-16 6:30 ` Alistair Popple
2021-11-15 7:55 ` Peter Xu [this message]
2021-11-15 8:00 ` [PATCH v6 08/23] mm/shmem: Allow uffd wr-protect none pte for file-backed mem Peter Xu
2021-11-15 8:00 ` [PATCH v6 09/23] mm/shmem: Allows file-back mem to be uffd wr-protected on thps Peter Xu
2021-11-15 8:01 ` [PATCH v6 10/23] mm/shmem: Handle uffd-wp during fork() Peter Xu
2021-11-15 8:01 ` [PATCH v6 11/23] mm/hugetlb: Introduce huge pte version of uffd-wp helpers Peter Xu
2021-11-15 8:01 ` [PATCH v6 12/23] mm/hugetlb: Hook page faults for uffd write protection Peter Xu
2021-11-15 8:01 ` [PATCH v6 13/23] mm/hugetlb: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2021-11-15 8:02 ` [PATCH v6 14/23] mm/hugetlb: Handle UFFDIO_WRITEPROTECT Peter Xu
2021-11-15 8:02 ` [PATCH v6 15/23] mm/hugetlb: Handle pte markers in page faults Peter Xu
2021-11-15 8:02 ` [PATCH v6 16/23] mm/hugetlb: Allow uffd wr-protect none ptes Peter Xu
2021-11-15 8:02 ` [PATCH v6 17/23] mm/hugetlb: Only drop uffd-wp special pte if required Peter Xu
2021-11-15 8:02 ` [PATCH v6 18/23] mm/hugetlb: Handle uffd-wp during fork() Peter Xu
2021-11-15 8:03 ` [PATCH v6 19/23] mm/khugepaged: Don't recycle vma pgtable if uffd-wp registered Peter Xu
2021-11-15 8:03 ` [PATCH v6 20/23] mm/pagemap: Recognize uffd-wp bit for shmem/hugetlbfs Peter Xu
2021-11-15 8:03 ` [PATCH v6 21/23] mm/uffd: Enable write protection for shmem & hugetlbfs Peter Xu
2021-11-15 8:03 ` [PATCH v6 22/23] mm: Enable PTE markers by default Peter Xu
2021-11-15 8:04 ` [PATCH v6 23/23] selftests/uffd: Enable uffd-wp for shmem/hugetlbfs Peter Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211115075522.73795-8-peterx@redhat.com \
--to=peterx@redhat.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=axelrasmussen@google.com \
--cc=david@redhat.com \
--cc=hughd@google.com \
--cc=jglisse@redhat.com \
--cc=kirill@shutemov.name \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
--cc=nadav.amit@gmail.com \
--cc=rppt@linux.vnet.ibm.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.