From: Peter Xu <peterx@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Mike Kravetz <mike.kravetz@oracle.com>,
Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@redhat.com>,
Matthew Wilcox <willy@infradead.org>,
peterx@redhat.com, Alistair Popple <apopple@nvidia.com>,
Nadav Amit <nadav.amit@gmail.com>,
Axel Rasmussen <axelrasmussen@google.com>,
Andrea Arcangeli <aarcange@redhat.com>,
"Kirill A . Shutemov" <kirill@shutemov.name>,
Hugh Dickins <hughd@google.com>,
Jerome Glisse <jglisse@redhat.com>,
Mike Rapoport <rppt@linux.vnet.ibm.com>
Subject: [PATCH v8 01/23] mm: Introduce PTE_MARKER swap entry
Date: Mon, 4 Apr 2022 21:46:24 -0400 [thread overview]
Message-ID: <20220405014646.13522-2-peterx@redhat.com> (raw)
In-Reply-To: <20220405014646.13522-1-peterx@redhat.com>
This patch introduces a new swap entry type called PTE_MARKER. It can be
installed for any pte that maps a file-backed memory when the pte is
temporarily zapped, so as to maintain per-pte information.
The information that kept in the pte is called a "marker". Here we define the
marker as "unsigned long" just to match pgoff_t, however it will only work if
it still fits in swp_offset(), which is e.g. currently 58 bits on x86_64.
A new config CONFIG_PTE_MARKER is introduced too; it's by default off. A bunch
of helpers are defined altogether to service the rest of the pte marker code.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/asm-generic/hugetlb.h | 9 ++++
include/linux/swap.h | 15 ++++++-
include/linux/swapops.h | 78 +++++++++++++++++++++++++++++++++++
mm/Kconfig | 6 +++
4 files changed, 107 insertions(+), 1 deletion(-)
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 8e1e6244a89d..f39cad20ffc6 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -2,6 +2,9 @@
#ifndef _ASM_GENERIC_HUGETLB_H
#define _ASM_GENERIC_HUGETLB_H
+#include <linux/swap.h>
+#include <linux/swapops.h>
+
static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
{
return mk_pte(page, pgprot);
@@ -80,6 +83,12 @@ static inline int huge_pte_none(pte_t pte)
}
#endif
+/* Please refer to comments above pte_none_mostly() for the usage */
+static inline int huge_pte_none_mostly(pte_t pte)
+{
+ return huge_pte_none(pte) || is_pte_marker(pte);
+}
+
#ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
static inline pte_t huge_pte_wrprotect(pte_t pte)
{
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7daae5a4b3e1..5553189d0215 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -55,6 +55,19 @@ static inline int current_is_kswapd(void)
* actions on faults.
*/
+/*
+ * PTE markers are used to persist information onto PTEs that are mapped with
+ * file-backed memories. As its name "PTE" hints, it should only be applied to
+ * the leaves of pgtables.
+ */
+#ifdef CONFIG_PTE_MARKER
+#define SWP_PTE_MARKER_NUM 1
+#define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
+ SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
+#else
+#define SWP_PTE_MARKER_NUM 0
+#endif
+
/*
* Unaddressable device memory support. See include/linux/hmm.h and
* Documentation/vm/hmm.rst. Short description is we need struct pages for
@@ -107,7 +120,7 @@ static inline int current_is_kswapd(void)
#define MAX_SWAPFILES \
((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
- SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM)
/*
* Magic header for a swap area. The first part of the union is
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 32d517a28969..7a00627845f0 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -274,6 +274,84 @@ static inline int is_readable_migration_entry(swp_entry_t entry)
#endif
+typedef unsigned long pte_marker;
+
+#define PTE_MARKER_MASK (0)
+
+#ifdef CONFIG_PTE_MARKER
+
+static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+{
+ return swp_entry(SWP_PTE_MARKER, marker);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_PTE_MARKER;
+}
+
+static inline pte_marker pte_marker_get(swp_entry_t entry)
+{
+ return swp_offset(entry) & PTE_MARKER_MASK;
+}
+
+static inline bool is_pte_marker(pte_t pte)
+{
+ return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
+}
+
+#else /* CONFIG_PTE_MARKER */
+
+static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+{
+ /* This should never be called if !CONFIG_PTE_MARKER */
+ WARN_ON_ONCE(1);
+ return swp_entry(0, 0);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+ return false;
+}
+
+static inline pte_marker pte_marker_get(swp_entry_t entry)
+{
+ return 0;
+}
+
+static inline bool is_pte_marker(pte_t pte)
+{
+ return false;
+}
+
+#endif /* CONFIG_PTE_MARKER */
+
+static inline pte_t make_pte_marker(pte_marker marker)
+{
+ return swp_entry_to_pte(make_pte_marker_entry(marker));
+}
+
+/*
+ * This is a special version to check pte_none() just to cover the case when
+ * the pte is a pte marker. It existed because in many cases the pte marker
+ * should be seen as a none pte; it's just that we have stored some information
+ * onto the none pte so it becomes not-none any more.
+ *
+ * It should be used when the pte is file-backed, ram-based and backing
+ * userspace pages, like shmem. It is not needed upon pgtables that do not
+ * support pte markers at all. For example, it's not needed on anonymous
+ * memory, kernel-only memory (including when the system is during-boot),
+ * non-ram based generic file-system. It's fine to be used even there, but the
+ * extra pte marker check will be pure overhead.
+ *
+ * For systems configured with !CONFIG_PTE_MARKER this will be automatically
+ * optimized to pte_none().
+ */
+static inline int pte_none_mostly(pte_t pte)
+{
+ return pte_none(pte) || is_pte_marker(pte);
+}
+
static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
{
struct page *p = pfn_to_page(swp_offset(entry));
diff --git a/mm/Kconfig b/mm/Kconfig
index 034d87953600..a1688b9314b2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -909,6 +909,12 @@ config ANON_VMA_NAME
area from being merged with adjacent virtual memory areas due to the
difference in their name.
+config PTE_MARKER
+ bool "Marker PTEs support"
+
+ help
+ Allows to create marker PTEs for file-backed memory.
+
source "mm/damon/Kconfig"
endmenu
--
2.32.0
next prev parent reply other threads:[~2022-04-05 1:47 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-05 1:46 [PATCH v8 00/23] userfaultfd-wp: Support shmem and hugetlbfs Peter Xu
2022-04-05 1:46 ` Peter Xu [this message]
2022-04-12 1:07 ` [PATCH v8 01/23] mm: Introduce PTE_MARKER swap entry Alistair Popple
2022-04-12 19:45 ` Peter Xu
2022-04-13 0:30 ` Alistair Popple
2022-04-13 13:44 ` Peter Xu
2022-04-19 8:25 ` Alistair Popple
2022-04-19 19:44 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 02/23] mm: Teach core mm about pte markers Peter Xu
2022-04-12 1:22 ` Alistair Popple
2022-04-12 19:53 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 03/23] mm: Check against orig_pte for finish_fault() Peter Xu
2022-04-12 2:05 ` Alistair Popple
2022-04-12 19:54 ` Peter Xu
[not found] ` <CGME20220413140330eucas1p167da41e079712b829ef8237dc27b049c@eucas1p1.samsung.com>
2022-04-13 14:03 ` Marek Szyprowski
2022-04-13 16:43 ` Peter Xu
2022-04-14 7:51 ` Marek Szyprowski
2022-04-14 16:30 ` Peter Xu
2022-04-14 20:57 ` Andrew Morton
2022-04-14 21:08 ` Peter Xu
2022-04-15 14:21 ` Guenter Roeck
2022-04-15 14:41 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 04/23] mm/uffd: PTE_MARKER_UFFD_WP Peter Xu
2022-04-06 1:41 ` kernel test robot
2022-04-05 1:48 ` [PATCH v8 05/23] mm/shmem: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2022-04-05 1:48 ` [PATCH v8 06/23] mm/shmem: Handle uffd-wp special pte in page fault handler Peter Xu
2022-05-11 16:30 ` David Hildenbrand
2022-05-12 16:34 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 07/23] mm/shmem: Persist uffd-wp bit across zapping for file-backed Peter Xu
2022-04-05 1:48 ` [PATCH v8 08/23] mm/shmem: Allow uffd wr-protect none pte for file-backed mem Peter Xu
2022-04-05 1:48 ` [PATCH v8 09/23] mm/shmem: Allows file-back mem to be uffd wr-protected on thps Peter Xu
2022-04-05 1:48 ` [PATCH v8 10/23] mm/shmem: Handle uffd-wp during fork() Peter Xu
2022-04-06 6:16 ` kernel test robot
2022-04-06 12:18 ` Peter Xu
2022-04-05 1:48 ` [PATCH v8 11/23] mm/hugetlb: Introduce huge pte version of uffd-wp helpers Peter Xu
2022-04-05 1:49 ` [PATCH v8 12/23] mm/hugetlb: Hook page faults for uffd write protection Peter Xu
2022-04-05 1:49 ` [PATCH v8 13/23] mm/hugetlb: Take care of UFFDIO_COPY_MODE_WP Peter Xu
2022-04-05 1:49 ` [PATCH v8 14/23] mm/hugetlb: Handle UFFDIO_WRITEPROTECT Peter Xu
2022-04-05 1:49 ` [PATCH v8 15/23] mm/hugetlb: Handle pte markers in page faults Peter Xu
2022-04-06 13:37 ` kernel test robot
2022-04-06 15:02 ` Peter Xu
2022-04-05 1:49 ` [PATCH v8 16/23] mm/hugetlb: Allow uffd wr-protect none ptes Peter Xu
2022-04-05 1:49 ` [PATCH v8 17/23] mm/hugetlb: Only drop uffd-wp special pte if required Peter Xu
2022-04-05 1:49 ` [PATCH v8 18/23] mm/hugetlb: Handle uffd-wp during fork() Peter Xu
2022-04-05 1:49 ` [PATCH v8 19/23] mm/khugepaged: Don't recycle vma pgtable if uffd-wp registered Peter Xu
2022-04-05 1:49 ` [PATCH v8 20/23] mm/pagemap: Recognize uffd-wp bit for shmem/hugetlbfs Peter Xu
2022-04-05 1:49 ` [PATCH v8 21/23] mm/uffd: Enable write protection for shmem & hugetlbfs Peter Xu
2022-04-05 1:49 ` [PATCH v8 22/23] mm: Enable PTE markers by default Peter Xu
2022-04-19 15:13 ` Johannes Weiner
2022-04-19 19:59 ` Peter Xu
2022-04-19 20:14 ` Johannes Weiner
2022-04-19 20:28 ` Peter Xu
2022-04-19 21:24 ` Johannes Weiner
2022-04-19 22:01 ` Peter Xu
2022-04-20 13:46 ` Johannes Weiner
2022-04-20 14:25 ` Peter Xu
2022-04-05 1:49 ` [PATCH v8 23/23] selftests/uffd: Enable uffd-wp for shmem/hugetlbfs Peter Xu
2022-04-05 22:16 ` [PATCH v8 00/23] userfaultfd-wp: Support shmem and hugetlbfs Andrew Morton
2022-04-05 22:42 ` Peter Xu
2022-04-05 22:49 ` Andrew Morton
2022-04-05 23:02 ` Peter Xu
2022-04-05 23:08 ` Andrew Morton
2022-05-10 19:05 ` Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220405014646.13522-2-peterx@redhat.com \
--to=peterx@redhat.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=axelrasmussen@google.com \
--cc=david@redhat.com \
--cc=hughd@google.com \
--cc=jglisse@redhat.com \
--cc=kirill@shutemov.name \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
--cc=nadav.amit@gmail.com \
--cc=rppt@linux.vnet.ibm.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).