linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Alistair Popple <apopple@nvidia.com>,
	Tiberiu Georgescu <tiberiu.georgescu@nutanix.com>,
	ivan.teterevkov@nutanix.com,
	Mike Rapoport <rppt@linux.vnet.ibm.com>,
	Hugh Dickins <hughd@google.com>,
	peterx@redhat.com, Matthew Wilcox <willy@infradead.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	David Hildenbrand <david@redhat.com>,
	"Kirill A . Shutemov" <kirill@shutemov.name>,
	Andrew Morton <akpm@linux-foundation.org>,
	Mike Kravetz <mike.kravetz@oracle.com>
Subject: [PATCH RFC 1/4] mm: Introduce PTE_MARKER swap entry
Date: Fri,  6 Aug 2021 23:25:18 -0400	[thread overview]
Message-ID: <20210807032521.7591-2-peterx@redhat.com> (raw)
In-Reply-To: <20210807032521.7591-1-peterx@redhat.com>

This patch introduces a new swap entry type called PTE_MARKER.  It can be
installed for any pte that maps a file-backed memory when the pte is
temporarily zapped, so as to maintain per-pte information.

The information that kept in the pte is called a "marker".  Here we define the
marker as "unsigned long" just to match pgoff_t, however it will only work if
it still fits in swp_offset(), which is e.g. currently 58 bits on x86_64.

The first marker bit that is introduced together with the new swap pte is the
PAGEOUT marker.  When that bit is set, it means this pte used to point to a
page which got swapped out.  It's mostly a definition so the swap type is not
totally nothing, however the functions are not implemented yet to handle the
new swap type.

A new config CONFIG_PTE_MARKER is introduced too; it's by default off.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/linux/swap.h    | 14 ++++++++++++-
 include/linux/swapops.h | 45 +++++++++++++++++++++++++++++++++++++++++
 mm/Kconfig              | 17 ++++++++++++++++
 3 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6f5a43251593..545dc8e0b0fb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -55,6 +55,18 @@ static inline int current_is_kswapd(void)
  * actions on faults.
  */
 
+/*
+ * PTE markers are used to persist information onto PTEs that are mapped with
+ * file-backed memories.
+ */
+#ifdef CONFIG_PTE_MARKER
+#define SWP_PTE_MARKER_NUM 1
+#define SWP_PTE_MARKER     (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
+			    SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
+#else
+#define SWP_PTE_MARKER_NUM 0
+#endif
+
 /*
  * Unaddressable device memory support. See include/linux/hmm.h and
  * Documentation/vm/hmm.rst. Short description is we need struct pages for
@@ -100,7 +112,7 @@ static inline int current_is_kswapd(void)
 
 #define MAX_SWAPFILES \
 	((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
-	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM)
 
 /*
  * Magic header for a swap area. The first part of the union is
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index d356ab4047f7..3fec83449e1e 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -247,6 +247,51 @@ static inline int is_writable_migration_entry(swp_entry_t entry)
 
 #endif
 
+#ifdef CONFIG_PTE_MARKER
+
+#ifdef CONFIG_PTE_MARKER_PAGEOUT
+/* When this bit is set, it means this page is swapped out previously */
+#define  PTE_MARKER_PAGEOUT  (1UL << 0)
+#else
+#define  PTE_MARKER_PAGEOUT  0
+#endif
+
+#define  PTE_MARKER_MASK     (PTE_MARKER_PAGEOUT)
+
+static inline swp_entry_t make_pte_marker_entry(unsigned long marker)
+{
+	return swp_entry(SWP_PTE_MARKER, marker);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+	return swp_type(entry) == SWP_PTE_MARKER;
+}
+
+static inline unsigned long pte_marker_get(swp_entry_t entry)
+{
+	return swp_offset(entry) & PTE_MARKER_MASK;
+}
+
+#else /* CONFIG_PTE_MARKER */
+
+static inline swp_entry_t make_pte_marker_entry(unsigned long marker)
+{
+	return swp_entry(0, 0);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+	return false;
+}
+
+static inline unsigned long pte_marker_get(swp_entry_t entry)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PTE_MARKER */
+
 static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
 {
 	struct page *p = pfn_to_page(swp_offset(entry));
diff --git a/mm/Kconfig b/mm/Kconfig
index 40a9bfcd5062..6043d8f1c066 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -889,4 +889,21 @@ config IO_MAPPING
 config SECRETMEM
 	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
 
+config PTE_MARKER
+        def_bool n
+        bool "Marker PTEs support"
+
+	help
+	  Allows to create marker PTEs for file-backed memory.
+
+config PTE_MARKER_PAGEOUT
+        def_bool n
+        depends on PTE_MARKER
+        bool "Shmem pagemap PM_SWAP support"
+
+	help
+	  Allows to create marker PTEs for file-backed memory when the page is
+	  swapped out.  It's required for pagemap to work correctly with shmem
+	  on page swapping.
+
 endmenu
-- 
2.32.0



  reply	other threads:[~2021-08-07  3:25 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-07  3:25 [PATCH RFC 0/4] mm: Enable PM_SWAP for shmem with PTE_MARKER Peter Xu
2021-08-07  3:25 ` Peter Xu [this message]
2021-08-07  3:25 ` [PATCH RFC 2/4] mm: Check against orig_pte for finish_fault() Peter Xu
2021-08-07  3:25 ` [PATCH RFC 3/4] mm: Handle PTE_MARKER page faults Peter Xu
2021-08-07  3:25 ` [PATCH RFC 4/4] mm: Install marker pte when page out for shmem pages Peter Xu
2021-08-13 15:18   ` Tiberiu Georgescu
2021-08-13 16:01     ` Peter Xu
2021-08-18 18:02       ` Tiberiu Georgescu
2021-08-17  9:04 ` [PATCH RFC 0/4] mm: Enable PM_SWAP for shmem with PTE_MARKER David Hildenbrand
2021-08-17 17:09   ` Peter Xu
2021-08-17 18:46     ` David Hildenbrand
2021-08-17 20:24       ` Peter Xu
2021-08-18  8:24         ` David Hildenbrand
2021-08-18 17:52           ` Tiberiu Georgescu
2021-08-18 18:13             ` David Hildenbrand
2021-08-19 14:54               ` Tiberiu Georgescu
2021-08-19 17:26                 ` David Hildenbrand
2021-08-20 16:49                   ` Tiberiu Georgescu
2021-08-20 19:12                     ` Peter Xu
2021-08-25 13:40                       ` Tiberiu Georgescu
2021-08-25 14:59                         ` Peter Xu
2021-08-26 15:01                           ` Tiberiu Georgescu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210807032521.7591-2-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=ivan.teterevkov@nutanix.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=tiberiu.georgescu@nutanix.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).