From: Ryan Roberts <ryan.roberts@arm.com>
To: Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>, Joey Gouly <joey.gouly@arm.com>,
Ard Biesheuvel <ardb@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Anshuman Khandual <anshuman.khandual@arm.com>,
David Hildenbrand <david@redhat.com>,
Peter Xu <peterx@redhat.com>, Mike Rapoport <rppt@linux.ibm.com>,
Shivansh Vij <shivanshvij@outlook.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org
Subject: [PATCH v2 1/3] arm64/mm: Refactor PMD_PRESENT_INVALID and PTE_PROT_NONE bits
Date: Mon, 29 Apr 2024 15:02:05 +0100 [thread overview]
Message-ID: <20240429140208.238056-2-ryan.roberts@arm.com> (raw)
In-Reply-To: <20240429140208.238056-1-ryan.roberts@arm.com>
Currently the PMD_PRESENT_INVALID and PTE_PROT_NONE functionality
explicitly occupy 2 bits in the PTE when PTE_VALID/PMD_SECT_VALID is
clear. This has 2 significant consequences:
- PTE_PROT_NONE consumes a precious SW PTE bit that could be used for
other things.
- The swap pte layout must reserve those same 2 bits and ensure they
are both always zero for a swap pte. It would be nice to reclaim at
least one of those bits.
Note that while PMD_PRESENT_INVALID technically only applies to pmds,
the swap pte layout is common to ptes and pmds so we are currently
effectively reserving that bit at both levels.
Let's replace PMD_PRESENT_INVALID with a more generic PTE_INVALID bit,
which occupies the same position (bit 59) but applies uniformly to
page/block descriptors at any level. This bit is only interpretted when
PTE_VALID is clear. If it is set, then the pte is still considered
present; pte_present() returns true and all the fields in the pte follow
the HW interpretation (e.g. SW can safely call pte_pfn(), etc). But
crucially, the HW treats the pte as invalid and will fault if it hits.
With this in place, we can remove PTE_PROT_NONE entirely and instead
represent PROT_NONE as a present but invalid pte (PTE_VALID=0,
PTE_INVALID=1) with PTE_USER=0 and PTE_UXN=1. This is a unique
combination that is not used anywhere else.
The net result is a clearer, simpler, more generic encoding scheme that
applies uniformly to all levels. Additionally we free up a PTE SW bit a
swap pte bit (bit 58 in both cases).
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
arch/arm64/include/asm/pgtable-prot.h | 11 ++------
arch/arm64/include/asm/pgtable.h | 38 ++++++++++++---------------
2 files changed, 19 insertions(+), 30 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index dd9ee67d1d87..de62e6881154 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -18,14 +18,7 @@
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#define PTE_DEVMAP (_AT(pteval_t, 1) << 57)
-#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
-
-/*
- * This bit indicates that the entry is present i.e. pmd_page()
- * still points to a valid huge page in memory even if the pmd
- * has been invalidated.
- */
-#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
+#define PTE_INVALID (_AT(pteval_t, 1) << 59) /* only when !PTE_VALID */
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
@@ -103,7 +96,7 @@ static inline bool __pure lpa2_is_enabled(void)
__val; \
})
-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
#define PAGE_SHARED __pgprot(_PAGE_SHARED)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index afdd56d26ad7..8dd4637d6b56 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -105,7 +105,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
/*
* The following only work if pte_present(). Undefined behaviour otherwise.
*/
-#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
+#define pte_present(pte) (pte_valid(pte) || pte_invalid(pte))
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
@@ -132,6 +132,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
+#define pte_invalid(pte) ((pte_val(pte) & (PTE_VALID | PTE_INVALID)) == PTE_INVALID)
/*
* Execute-only user mappings do not have the PTE_USER bit set. All valid
* kernel mappings have the PTE_UXN bit set.
@@ -261,6 +262,13 @@ static inline pte_t pte_mkpresent(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_VALID));
}
+static inline pte_t pte_mkinvalid(pte_t pte)
+{
+ pte = set_pte_bit(pte, __pgprot(PTE_INVALID));
+ pte = clear_pte_bit(pte, __pgprot(PTE_VALID));
+ return pte;
+}
+
static inline pmd_t pmd_mkcont(pmd_t pmd)
{
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
@@ -469,7 +477,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
*/
static inline int pte_protnone(pte_t pte)
{
- return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE;
+ return pte_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte);
}
static inline int pmd_protnone(pmd_t pmd)
@@ -478,12 +486,7 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
-#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID))
-
-static inline int pmd_present(pmd_t pmd)
-{
- return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd);
-}
+#define pmd_present(pmd) pte_present(pmd_pte(pmd))
/*
* THP definitions.
@@ -508,14 +511,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-
-static inline pmd_t pmd_mkinvalid(pmd_t pmd)
-{
- pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID));
- pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID));
-
- return pmd;
-}
+#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
@@ -1027,7 +1023,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
* in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK.
*/
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
- PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP |
+ PTE_INVALID | PTE_VALID | PTE_WRITE | PTE_GP |
PTE_ATTRINDX_MASK;
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
@@ -1076,17 +1072,17 @@ static inline int pgd_devmap(pgd_t pgd)
#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{
- return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
+ return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte));
}
static inline bool pmd_user_accessible_page(pmd_t pmd)
{
- return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+ return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
}
static inline bool pud_user_accessible_page(pud_t pud)
{
- return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud));
+ return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud));
}
#endif
@@ -1250,7 +1246,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
* bits 2: remember PG_anon_exclusive
* bits 3-7: swap type
* bits 8-57: swap offset
- * bit 58: PTE_PROT_NONE (must be zero)
+ * bit 59: PTE_INVALID (must be zero)
*/
#define __SWP_TYPE_SHIFT 3
#define __SWP_TYPE_BITS 5
--
2.25.1
next prev parent reply other threads:[~2024-04-29 14:02 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-29 14:02 [PATCH v2 0/3] arm64/mm: Enable userfaultfd write-protect Ryan Roberts
2024-04-29 14:02 ` Ryan Roberts [this message]
2024-04-29 16:12 ` [PATCH v2 1/3] arm64/mm: Refactor PMD_PRESENT_INVALID and PTE_PROT_NONE bits David Hildenbrand
2024-04-29 16:20 ` Catalin Marinas
2024-04-29 17:15 ` Ryan Roberts
2024-04-30 11:11 ` Catalin Marinas
2024-04-30 11:35 ` Ryan Roberts
2024-04-30 13:28 ` Catalin Marinas
2024-04-30 13:34 ` Ryan Roberts
2024-04-30 11:37 ` David Hildenbrand
2024-04-30 12:53 ` Ryan Roberts
2024-04-30 12:58 ` David Hildenbrand
2024-04-30 13:30 ` Will Deacon
2024-04-30 14:02 ` Ryan Roberts
2024-04-30 15:04 ` Will Deacon
2024-04-30 15:39 ` Ryan Roberts
2024-04-29 14:02 ` [PATCH v2 2/3] arm64/mm: Move PTE_INVALID to overlay PTE_NS Ryan Roberts
2024-04-29 16:34 ` Catalin Marinas
2024-04-29 14:02 ` [PATCH v2 3/3] arm64/mm: Add uffd write-protect support Ryan Roberts
2024-04-29 16:08 ` David Hildenbrand
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240429140208.238056-2-ryan.roberts@arm.com \
--to=ryan.roberts@arm.com \
--cc=anshuman.khandual@arm.com \
--cc=ardb@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=david@redhat.com \
--cc=joey.gouly@arm.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=peterx@redhat.com \
--cc=rppt@linux.ibm.com \
--cc=shivanshvij@outlook.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox