* [PATCH 1/8] x86/mm: Always allocate a whole page for PAE PGDs
2025-04-14 17:32 [PATCH 0/8] x86/mm: Simplify PAE page table handling Dave Hansen
@ 2025-04-14 17:32 ` Dave Hansen
2025-04-14 17:32 ` [PATCH 2/8] x86/mm: Always "broadcast" PMD setting operations Dave Hansen
` (6 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2025-04-14 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: x86, tglx, bp, joro, luto, peterz, kirill.shutemov,
rick.p.edgecombe, jgross, Dave Hansen
From: Dave Hansen <dave.hansen@linux.intel.com>
A hardware PAE PGD is only 32 bytes. A PGD is PAGE_SIZE in the other
paging modes. But for reasons*, the kernel _sometimes_ allocates a
whole page even though it only ever uses 32 bytes.
Make PAE less weird. Just allocate a page like the other paging modes.
This was already being done for PTI (and Xen in the past) and nobody
screamed that loudly about it so it can't be that bad.
* The original reason for PAGE_SIZE allocations for the PAE PGDs was
Xen's need to detect page table writes. But 32-bit PTI forced it too
for reasons I'm unclear about.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
---
b/arch/x86/mm/pgtable.c | 62 +++---------------------------------------------
1 file changed, 4 insertions(+), 58 deletions(-)
diff -puN arch/x86/mm/pgtable.c~no-pae-kmem_cache arch/x86/mm/pgtable.c
--- a/arch/x86/mm/pgtable.c~no-pae-kmem_cache 2025-04-09 11:49:39.531879317 -0700
+++ b/arch/x86/mm/pgtable.c 2025-04-09 11:49:39.534879427 -0700
@@ -318,68 +318,15 @@ static void pgd_prepopulate_user_pmd(str
{
}
#endif
-/*
- * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
- * assumes that pgd should be in one page.
- *
- * But kernel with PAE paging that is not running as a Xen domain
- * only needs to allocate 32 bytes for pgd instead of one page.
- */
-#ifdef CONFIG_X86_PAE
-
-#include <linux/slab.h>
-
-#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
-#define PGD_ALIGN 32
-
-static struct kmem_cache *pgd_cache;
-
-void __init pgtable_cache_init(void)
-{
- /*
- * When PAE kernel is running as a Xen domain, it does not use
- * shared kernel pmd. And this requires a whole page for pgd.
- */
- if (!SHARED_KERNEL_PMD)
- return;
-
- /*
- * when PAE kernel is not running as a Xen domain, it uses
- * shared kernel pmd. Shared kernel pmd does not require a whole
- * page for pgd. We are able to just allocate a 32-byte for pgd.
- * During boot time, we create a 32-byte slab for pgd table allocation.
- */
- pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
- SLAB_PANIC, NULL);
-}
static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
{
/*
- * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
- * We allocate one page for pgd.
+ * PTI and Xen need a whole page for the PAE PGD
+ * even though the hardware only needs 32 bytes.
+ *
+ * For simplicity, allocate a page for all users.
*/
- if (!SHARED_KERNEL_PMD)
- return __pgd_alloc(mm, PGD_ALLOCATION_ORDER);
-
- /*
- * Now PAE kernel is not running as a Xen domain. We can allocate
- * a 32-byte slab for pgd to save memory space.
- */
- return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
-}
-
-static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- if (!SHARED_KERNEL_PMD)
- __pgd_free(mm, pgd);
- else
- kmem_cache_free(pgd_cache, pgd);
-}
-#else
-
-static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
-{
return __pgd_alloc(mm, PGD_ALLOCATION_ORDER);
}
@@ -387,7 +334,6 @@ static inline void _pgd_free(struct mm_s
{
__pgd_free(mm, pgd);
}
-#endif /* CONFIG_X86_PAE */
pgd_t *pgd_alloc(struct mm_struct *mm)
{
_
^ permalink raw reply [flat|nested] 13+ messages in thread* [PATCH 2/8] x86/mm: Always "broadcast" PMD setting operations
2025-04-14 17:32 [PATCH 0/8] x86/mm: Simplify PAE page table handling Dave Hansen
2025-04-14 17:32 ` [PATCH 1/8] x86/mm: Always allocate a whole page for PAE PGDs Dave Hansen
@ 2025-04-14 17:32 ` Dave Hansen
2025-04-15 8:25 ` Kirill A. Shutemov
2025-04-14 17:32 ` [PATCH 3/8] x86/mm: Always tell core mm to sync kernel mappings Dave Hansen
` (5 subsequent siblings)
7 siblings, 1 reply; 13+ messages in thread
From: Dave Hansen @ 2025-04-14 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: x86, tglx, bp, joro, luto, peterz, kirill.shutemov,
rick.p.edgecombe, jgross, Dave Hansen
From: Dave Hansen <dave.hansen@linux.intel.com>
Kernel PMDs can either be shared across processes or private to a
process. On 64-bit, they are always shared. 32-bit non-PAE hardware
does not have PMDs, but the kernel logically squishes them into the
PGD and treats them as private. Here are the four cases:
64-bit: Shared
32-bit: non-PAE: Private
32-bit: PAE+ PTI: Private
32-bit: PAE+noPTI: Shared
Note that 32-bit is all "Private" except for PAE+noPTI being an
oddball. The 32-bit+PAE+noPTI case will be made like the rest of
32-bit shortly.
But until that can be done, temporarily treat the 32-bit+PAE+noPTI
case as Private. This will do unnecessary walks across pgd_list and
unnecessary PTE setting but should be otherwise harmless.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
---
b/arch/x86/mm/pat/set_memory.c | 4 ++--
b/arch/x86/mm/pgtable.c | 11 +++--------
2 files changed, 5 insertions(+), 10 deletions(-)
diff -puN arch/x86/mm/pat/set_memory.c~always-sync-kernel-mapping-updates arch/x86/mm/pat/set_memory.c
--- a/arch/x86/mm/pat/set_memory.c~always-sync-kernel-mapping-updates 2025-04-09 12:00:17.126319212 -0700
+++ b/arch/x86/mm/pat/set_memory.c 2025-04-09 12:53:28.082212490 -0700
@@ -889,7 +889,7 @@ static void __set_pmd_pte(pte_t *kpte, u
/* change init_mm */
set_pte_atomic(kpte, pte);
#ifdef CONFIG_X86_32
- if (!SHARED_KERNEL_PMD) {
+ {
struct page *page;
list_for_each_entry(page, &pgd_list, lru) {
@@ -1293,7 +1293,7 @@ static int collapse_pmd_page(pmd_t *pmd,
/* Queue the page table to be freed after TLB flush */
list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables);
- if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) {
+ if (IS_ENABLED(CONFIG_X86_32)) {
struct page *page;
/* Update all PGD tables to use the same large page */
diff -puN arch/x86/mm/pgtable.c~always-sync-kernel-mapping-updates arch/x86/mm/pgtable.c
--- a/arch/x86/mm/pgtable.c~always-sync-kernel-mapping-updates 2025-04-09 12:00:17.128319285 -0700
+++ b/arch/x86/mm/pgtable.c 2025-04-09 12:53:09.217519767 -0700
@@ -97,18 +97,13 @@ static void pgd_ctor(struct mm_struct *m
KERNEL_PGD_PTRS);
}
- /* list required to sync kernel mapping updates */
- if (!SHARED_KERNEL_PMD) {
- pgd_set_mm(pgd, mm);
- pgd_list_add(pgd);
- }
+ /* List used to sync kernel mapping updates */
+ pgd_set_mm(pgd, mm);
+ pgd_list_add(pgd);
}
static void pgd_dtor(pgd_t *pgd)
{
- if (SHARED_KERNEL_PMD)
- return;
-
spin_lock(&pgd_lock);
pgd_list_del(pgd);
spin_unlock(&pgd_lock);
_
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 2/8] x86/mm: Always "broadcast" PMD setting operations
2025-04-14 17:32 ` [PATCH 2/8] x86/mm: Always "broadcast" PMD setting operations Dave Hansen
@ 2025-04-15 8:25 ` Kirill A. Shutemov
2025-04-15 14:10 ` Dave Hansen
0 siblings, 1 reply; 13+ messages in thread
From: Kirill A. Shutemov @ 2025-04-15 8:25 UTC (permalink / raw)
To: Dave Hansen
Cc: linux-kernel, x86, tglx, bp, joro, luto, peterz, rick.p.edgecombe,
jgross
On Mon, Apr 14, 2025 at 10:32:35AM -0700, Dave Hansen wrote:
>
> From: Dave Hansen <dave.hansen@linux.intel.com>
>
> Kernel PMDs can either be shared across processes or private to a
> process. On 64-bit, they are always shared. 32-bit non-PAE hardware
> does not have PMDs, but the kernel logically squishes them into the
> PGD and treats them as private. Here are the four cases:
>
> 64-bit: Shared
> 32-bit: non-PAE: Private
> 32-bit: PAE+ PTI: Private
> 32-bit: PAE+noPTI: Shared
>
> Note that 32-bit is all "Private" except for PAE+noPTI being an
> oddball. The 32-bit+PAE+noPTI case will be made like the rest of
> 32-bit shortly.
>
> But until that can be done, temporarily treat the 32-bit+PAE+noPTI
> case as Private. This will do unnecessary walks across pgd_list and
> unnecessary PTE setting but should be otherwise harmless.
>
> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
> ---
>
> b/arch/x86/mm/pat/set_memory.c | 4 ++--
> b/arch/x86/mm/pgtable.c | 11 +++--------
> 2 files changed, 5 insertions(+), 10 deletions(-)
>
> diff -puN arch/x86/mm/pat/set_memory.c~always-sync-kernel-mapping-updates arch/x86/mm/pat/set_memory.c
> --- a/arch/x86/mm/pat/set_memory.c~always-sync-kernel-mapping-updates 2025-04-09 12:00:17.126319212 -0700
> +++ b/arch/x86/mm/pat/set_memory.c 2025-04-09 12:53:28.082212490 -0700
> @@ -889,7 +889,7 @@ static void __set_pmd_pte(pte_t *kpte, u
> /* change init_mm */
> set_pte_atomic(kpte, pte);
> #ifdef CONFIG_X86_32
> - if (!SHARED_KERNEL_PMD) {
> + {
> struct page *page;
>
> list_for_each_entry(page, &pgd_list, lru) {
Removing the condition, but leaving the block looks sloppy.
Maybe convert #ifdef to IS_ENABLED() while you are there, so it would
justify the block?
--
Kiryl Shutsemau / Kirill A. Shutemov
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 2/8] x86/mm: Always "broadcast" PMD setting operations
2025-04-15 8:25 ` Kirill A. Shutemov
@ 2025-04-15 14:10 ` Dave Hansen
0 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2025-04-15 14:10 UTC (permalink / raw)
To: Kirill A. Shutemov, Dave Hansen
Cc: linux-kernel, x86, tglx, bp, joro, luto, peterz, rick.p.edgecombe,
jgross
[-- Attachment #1: Type: text/plain, Size: 559 bytes --]
On 4/15/25 01:25, Kirill A. Shutemov wrote:
>> #ifdef CONFIG_X86_32
>> - if (!SHARED_KERNEL_PMD) {
>> + {
>> struct page *page;
>>
>> list_for_each_entry(page, &pgd_list, lru) {
> Removing the condition, but leaving the block looks sloppy.
>
> Maybe convert #ifdef to IS_ENABLED() while you are there, so it would
> justify the block?
It does, and it's right at the beginning of the function. Simplifying
the code here also made it _less_ self-documenting so it needs a better
comment too.
I'll tack the attached patch on to the end of the series.
[-- Attachment #2: kill-CONFIG_X86_32-ifdef.patch --]
[-- Type: text/x-patch, Size: 2073 bytes --]
This block of code used to be:
if (SHARED_KERNEL_PMD)
But it was zapped when 32-bit kernels transitioned to private
(non-shared) PMDs. It also made it rather unclear what the block
of code is doing in the first place.
Remove the #ifdef and replace it with IS_ENABLED(). Unindent the
code block and add an actually useful comment about what it is
doing.
Suggested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
b/arch/x86/mm/pat/set_memory.c | 41 +++++++++++++++++++++--------------------
1 file changed, 21 insertions(+), 20 deletions(-)
diff -puN arch/x86/mm/pat/set_memory.c~kill-CONFIG_X86_32-ifdef arch/x86/mm/pat/set_memory.c
--- a/arch/x86/mm/pat/set_memory.c~kill-CONFIG_X86_32-ifdef 2025-04-15 06:45:17.579717047 -0700
+++ b/arch/x86/mm/pat/set_memory.c 2025-04-15 06:53:27.890709422 -0700
@@ -881,31 +881,32 @@ phys_addr_t slow_virt_to_phys(void *__vi
}
EXPORT_SYMBOL_GPL(slow_virt_to_phys);
-/*
- * Set the new pmd in all the pgds we know about:
- */
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{
+ struct page *page;
+
/* change init_mm */
set_pte_atomic(kpte, pte);
-#ifdef CONFIG_X86_32
- {
- struct page *page;
-
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- p4d = p4d_offset(pgd, address);
- pud = pud_offset(p4d, address);
- pmd = pmd_offset(pud, address);
- set_pte_atomic((pte_t *)pmd, pte);
- }
+
+ if (IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /*
+ * 32-bit mm_structs don't share kernel PMD pages.
+ * Propagate the change to each relevant PMD entry:
+ */
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
+ pmd = pmd_offset(pud, address);
+ set_pte_atomic((pte_t *)pmd, pte);
}
-#endif
}
static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
_
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3/8] x86/mm: Always tell core mm to sync kernel mappings
2025-04-14 17:32 [PATCH 0/8] x86/mm: Simplify PAE page table handling Dave Hansen
2025-04-14 17:32 ` [PATCH 1/8] x86/mm: Always allocate a whole page for PAE PGDs Dave Hansen
2025-04-14 17:32 ` [PATCH 2/8] x86/mm: Always "broadcast" PMD setting operations Dave Hansen
@ 2025-04-14 17:32 ` Dave Hansen
2025-04-15 8:28 ` Kirill A. Shutemov
2025-04-14 17:32 ` [PATCH 4/8] x86/mm: Simplify PAE PGD sharing macros Dave Hansen
` (4 subsequent siblings)
7 siblings, 1 reply; 13+ messages in thread
From: Dave Hansen @ 2025-04-14 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: x86, tglx, bp, joro, luto, peterz, kirill.shutemov,
rick.p.edgecombe, jgross, Dave Hansen
From: Dave Hansen <dave.hansen@linux.intel.com>
Each mm_struct has its own copy of the page tables. When core mm code
makes changes to a copy of the page tables those changes sometimes
need to be synchronized with other mms' copies of the page tables. But
when this synchronization actually needs to happen is highly
architecture and configuration specific.
In cases where kernel PMDs are shared across processes
(SHARED_KERNEL_PMD) the core mm does not itself need to do that
synchronization for kernel PMD changes. The x86 code communicates
this by clearing the PGTBL_PMD_MODIFIED bit cleared in those
configs to avoid expensive synchronization.
The kernel is moving toward never sharing kernel PMDs on 32-bit.
Prepare for that and make 32-bit PAE always set PGTBL_PMD_MODIFIED,
even if there is no modification to synchronize. This obviously adds
some synchronization overhead in cases where the kernel page tables
are being changed.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
---
b/arch/x86/include/asm/pgtable-3level_types.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -puN arch/x86/include/asm/pgtable-3level_types.h~always-set-ARCH_PAGE_TABLE_SYNC_MASK arch/x86/include/asm/pgtable-3level_types.h
--- a/arch/x86/include/asm/pgtable-3level_types.h~always-set-ARCH_PAGE_TABLE_SYNC_MASK 2025-04-09 11:49:40.552916845 -0700
+++ b/arch/x86/include/asm/pgtable-3level_types.h 2025-04-09 11:49:40.555916955 -0700
@@ -29,7 +29,7 @@ typedef union {
#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
-#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
+#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
_
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 3/8] x86/mm: Always tell core mm to sync kernel mappings
2025-04-14 17:32 ` [PATCH 3/8] x86/mm: Always tell core mm to sync kernel mappings Dave Hansen
@ 2025-04-15 8:28 ` Kirill A. Shutemov
2025-04-15 14:12 ` Dave Hansen
0 siblings, 1 reply; 13+ messages in thread
From: Kirill A. Shutemov @ 2025-04-15 8:28 UTC (permalink / raw)
To: Dave Hansen
Cc: linux-kernel, x86, tglx, bp, joro, luto, peterz, rick.p.edgecombe,
jgross
On Mon, Apr 14, 2025 at 10:32:37AM -0700, Dave Hansen wrote:
>
> From: Dave Hansen <dave.hansen@linux.intel.com>
>
> Each mm_struct has its own copy of the page tables. When core mm code
> makes changes to a copy of the page tables those changes sometimes
> need to be synchronized with other mms' copies of the page tables. But
> when this synchronization actually needs to happen is highly
> architecture and configuration specific.
>
> In cases where kernel PMDs are shared across processes
> (SHARED_KERNEL_PMD) the core mm does not itself need to do that
> synchronization for kernel PMD changes. The x86 code communicates
> this by clearing the PGTBL_PMD_MODIFIED bit cleared in those
> configs to avoid expensive synchronization.
>
> The kernel is moving toward never sharing kernel PMDs on 32-bit.
> Prepare for that and make 32-bit PAE always set PGTBL_PMD_MODIFIED,
> even if there is no modification to synchronize. This obviously adds
> some synchronization overhead in cases where the kernel page tables
> are being changed.
>
> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
> ---
>
> b/arch/x86/include/asm/pgtable-3level_types.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff -puN arch/x86/include/asm/pgtable-3level_types.h~always-set-ARCH_PAGE_TABLE_SYNC_MASK arch/x86/include/asm/pgtable-3level_types.h
> --- a/arch/x86/include/asm/pgtable-3level_types.h~always-set-ARCH_PAGE_TABLE_SYNC_MASK 2025-04-09 11:49:40.552916845 -0700
> +++ b/arch/x86/include/asm/pgtable-3level_types.h 2025-04-09 11:49:40.555916955 -0700
> @@ -29,7 +29,7 @@ typedef union {
>
> #define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
>
> -#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
> +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
The new definition is the same between pgtable-2level_types.h and
pgtable-3level_types.h.
Move it to the common pgtable_32_types.h.
--
Kiryl Shutsemau / Kirill A. Shutemov
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 3/8] x86/mm: Always tell core mm to sync kernel mappings
2025-04-15 8:28 ` Kirill A. Shutemov
@ 2025-04-15 14:12 ` Dave Hansen
0 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2025-04-15 14:12 UTC (permalink / raw)
To: Kirill A. Shutemov, Dave Hansen
Cc: linux-kernel, x86, tglx, bp, joro, luto, peterz, rick.p.edgecombe,
jgross
On 4/15/25 01:28, Kirill A. Shutemov wrote:
>> #define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
>>
>> -#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
>> +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
> The new definition is the same between pgtable-2level_types.h and
> pgtable-3level_types.h.
>
> Move it to the common pgtable_32_types.h.
Good catch, thanks. I'll add another patch to clean this up.
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 4/8] x86/mm: Simplify PAE PGD sharing macros
2025-04-14 17:32 [PATCH 0/8] x86/mm: Simplify PAE page table handling Dave Hansen
` (2 preceding siblings ...)
2025-04-14 17:32 ` [PATCH 3/8] x86/mm: Always tell core mm to sync kernel mappings Dave Hansen
@ 2025-04-14 17:32 ` Dave Hansen
2025-04-14 17:32 ` [PATCH 5/8] x86/mm: Fix up comments around PMD preallocation Dave Hansen
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2025-04-14 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: x86, tglx, bp, joro, luto, peterz, kirill.shutemov,
rick.p.edgecombe, jgross, Dave Hansen
From: Dave Hansen <dave.hansen@linux.intel.com>
There are a few too many levels of abstraction here.
First, just expand the PREALLOCATED_PMDS macro in place to make it
clear that it is only conditional on PTI.
Second, MAX_PREALLOCATED_PMDS is only used in one spot for an
on-stack allocation. It has a *maximum* value of 4. Do not bother
with the macro MAX() magic. Just set it to 4.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
---
b/arch/x86/mm/pgtable.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff -puN arch/x86/mm/pgtable.c~simplify-PREALLOCATED_PMDS arch/x86/mm/pgtable.c
--- a/arch/x86/mm/pgtable.c~simplify-PREALLOCATED_PMDS 2025-04-09 11:49:41.053935260 -0700
+++ b/arch/x86/mm/pgtable.c 2025-04-09 11:49:41.056935370 -0700
@@ -68,12 +68,6 @@ static inline void pgd_list_del(pgd_t *p
list_del(&ptdesc->pt_list);
}
-#define UNSHARED_PTRS_PER_PGD \
- (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-#define MAX_UNSHARED_PTRS_PER_PGD \
- MAX_T(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
-
-
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
{
virt_to_ptdesc(pgd)->pt_mm = mm;
@@ -132,8 +126,9 @@ static void pgd_dtor(pgd_t *pgd)
* not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
* and initialize the kernel pmds here.
*/
-#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
-#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD
+#define PREALLOCATED_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
+ PTRS_PER_PGD : KERNEL_PGD_BOUNDARY)
+#define MAX_PREALLOCATED_PMDS PTRS_PER_PGD
/*
* We allocate separate PMDs for the kernel part of the user page-table
_
^ permalink raw reply [flat|nested] 13+ messages in thread* [PATCH 5/8] x86/mm: Fix up comments around PMD preallocation
2025-04-14 17:32 [PATCH 0/8] x86/mm: Simplify PAE page table handling Dave Hansen
` (3 preceding siblings ...)
2025-04-14 17:32 ` [PATCH 4/8] x86/mm: Simplify PAE PGD sharing macros Dave Hansen
@ 2025-04-14 17:32 ` Dave Hansen
2025-04-14 17:32 ` [PATCH 6/8] x86/mm: Preallocate all PAE page tables Dave Hansen
` (2 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2025-04-14 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: x86, tglx, bp, joro, luto, peterz, kirill.shutemov,
rick.p.edgecombe, jgross, Dave Hansen
From: Dave Hansen <dave.hansen@linux.intel.com>
The "paravirt environment" is no longer in the tree. Axe that part of the
comment. Also add a blurb to remind readers that "USER_PMDS" refer to
the PTI user *copy* of the page tables, not the user *portion*.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
---
b/arch/x86/mm/pgtable.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff -puN arch/x86/mm/pgtable.c~simplify-PREALLOCATED_PMDS-2 arch/x86/mm/pgtable.c
--- a/arch/x86/mm/pgtable.c~simplify-PREALLOCATED_PMDS-2 2025-04-09 11:49:41.550953527 -0700
+++ b/arch/x86/mm/pgtable.c 2025-04-09 11:49:41.553953638 -0700
@@ -121,16 +121,17 @@ static void pgd_dtor(pgd_t *pgd)
* processor notices the update. Since this is expensive, and
* all 4 top-level entries are used almost immediately in a
* new process's life, we just pre-populate them here.
- *
- * Also, if we're in a paravirt environment where the kernel pmd is
- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
- * and initialize the kernel pmds here.
*/
#define PREALLOCATED_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
PTRS_PER_PGD : KERNEL_PGD_BOUNDARY)
#define MAX_PREALLOCATED_PMDS PTRS_PER_PGD
/*
+ * "USER_PMDS" are the PMDs for the user copy of the page tables when
+ * PTI is enabled. They do not exist when PTI is disabled. Note that
+ * this is distinct from the user _portion_ of the kernel page tables
+ * which always exists.
+ *
* We allocate separate PMDs for the kernel part of the user page-table
* when PTI is enabled. We need them to map the per-process LDT into the
* user-space page-table.
_
^ permalink raw reply [flat|nested] 13+ messages in thread* [PATCH 6/8] x86/mm: Preallocate all PAE page tables
2025-04-14 17:32 [PATCH 0/8] x86/mm: Simplify PAE page table handling Dave Hansen
` (4 preceding siblings ...)
2025-04-14 17:32 ` [PATCH 5/8] x86/mm: Fix up comments around PMD preallocation Dave Hansen
@ 2025-04-14 17:32 ` Dave Hansen
2025-04-14 17:32 ` [PATCH 7/8] x86/mm: Remove duplicated PMD preallocation macro Dave Hansen
2025-04-14 17:32 ` [PATCH 8/8] x86/mm: Remove now unused SHARED_KERNEL_PMD Dave Hansen
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2025-04-14 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: x86, tglx, bp, joro, luto, peterz, kirill.shutemov,
rick.p.edgecombe, jgross, Dave Hansen
From: Dave Hansen <dave.hansen@linux.intel.com>
Finally, move away from having PAE kernels share any PMDs across
processes.
This was already the default on PTI kernels which are the common
case.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
---
b/arch/x86/mm/pgtable.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff -puN arch/x86/mm/pgtable.c~simplify-PREALLOCATED_PMDS-3 arch/x86/mm/pgtable.c
--- a/arch/x86/mm/pgtable.c~simplify-PREALLOCATED_PMDS-3 2025-04-09 11:49:42.035971354 -0700
+++ b/arch/x86/mm/pgtable.c 2025-04-09 11:49:42.038971465 -0700
@@ -80,16 +80,11 @@ struct mm_struct *pgd_page_get_mm(struct
static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
- /* If the pgd points to a shared pagetable level (either the
- ptes in non-PAE, or shared PMD in PAE), then just copy the
- references from swapper_pg_dir. */
- if (CONFIG_PGTABLE_LEVELS == 2 ||
- (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
- CONFIG_PGTABLE_LEVELS >= 4) {
+ /* PAE preallocates all its PMDs. No cloning needed. */
+ if (!IS_ENABLED(CONFIG_X86_PAE))
clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
- }
/* List used to sync kernel mapping updates */
pgd_set_mm(pgd, mm);
@@ -122,8 +117,7 @@ static void pgd_dtor(pgd_t *pgd)
* all 4 top-level entries are used almost immediately in a
* new process's life, we just pre-populate them here.
*/
-#define PREALLOCATED_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
- PTRS_PER_PGD : KERNEL_PGD_BOUNDARY)
+#define PREALLOCATED_PMDS PTRS_PER_PGD
#define MAX_PREALLOCATED_PMDS PTRS_PER_PGD
/*
_
^ permalink raw reply [flat|nested] 13+ messages in thread* [PATCH 7/8] x86/mm: Remove duplicated PMD preallocation macro
2025-04-14 17:32 [PATCH 0/8] x86/mm: Simplify PAE page table handling Dave Hansen
` (5 preceding siblings ...)
2025-04-14 17:32 ` [PATCH 6/8] x86/mm: Preallocate all PAE page tables Dave Hansen
@ 2025-04-14 17:32 ` Dave Hansen
2025-04-14 17:32 ` [PATCH 8/8] x86/mm: Remove now unused SHARED_KERNEL_PMD Dave Hansen
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2025-04-14 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: x86, tglx, bp, joro, luto, peterz, kirill.shutemov,
rick.p.edgecombe, jgross, Dave Hansen
From: Dave Hansen <dave.hansen@linux.intel.com>
MAX_PREALLOCATED_PMDS and PREALLOCATED_PMDS are now identical. Just
use PREALLOCATED_PMDS and remove "MAX".
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
---
b/arch/x86/mm/pgtable.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff -puN arch/x86/mm/pgtable.c~simplify-PREALLOCATED_PMDS-4 arch/x86/mm/pgtable.c
--- a/arch/x86/mm/pgtable.c~simplify-PREALLOCATED_PMDS-4 2025-04-09 11:49:42.535989732 -0700
+++ b/arch/x86/mm/pgtable.c 2025-04-09 11:49:42.538989843 -0700
@@ -118,7 +118,6 @@ static void pgd_dtor(pgd_t *pgd)
* new process's life, we just pre-populate them here.
*/
#define PREALLOCATED_PMDS PTRS_PER_PGD
-#define MAX_PREALLOCATED_PMDS PTRS_PER_PGD
/*
* "USER_PMDS" are the PMDs for the user copy of the page tables when
@@ -154,7 +153,6 @@ void pud_populate(struct mm_struct *mm,
/* No need to prepopulate any pagetable entries in non-PAE modes. */
#define PREALLOCATED_PMDS 0
-#define MAX_PREALLOCATED_PMDS 0
#define PREALLOCATED_USER_PMDS 0
#define MAX_PREALLOCATED_USER_PMDS 0
#endif /* CONFIG_X86_PAE */
@@ -324,7 +322,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
- pmd_t *pmds[MAX_PREALLOCATED_PMDS];
+ pmd_t *pmds[PREALLOCATED_PMDS];
pgd = _pgd_alloc(mm);
_
^ permalink raw reply [flat|nested] 13+ messages in thread* [PATCH 8/8] x86/mm: Remove now unused SHARED_KERNEL_PMD
2025-04-14 17:32 [PATCH 0/8] x86/mm: Simplify PAE page table handling Dave Hansen
` (6 preceding siblings ...)
2025-04-14 17:32 ` [PATCH 7/8] x86/mm: Remove duplicated PMD preallocation macro Dave Hansen
@ 2025-04-14 17:32 ` Dave Hansen
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2025-04-14 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: x86, tglx, bp, joro, luto, peterz, kirill.shutemov,
rick.p.edgecombe, jgross, Dave Hansen
From: Dave Hansen <dave.hansen@linux.intel.com>
All the users of SHARED_KERNEL_PMD are gone. Zap it.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
---
b/arch/x86/include/asm/pgtable-2level_types.h | 2 --
b/arch/x86/include/asm/pgtable-3level_types.h | 2 --
b/arch/x86/include/asm/pgtable_64_types.h | 2 --
3 files changed, 6 deletions(-)
diff -puN arch/x86/include/asm/pgtable-2level_types.h~zap-SHARED_KERNEL_PMD arch/x86/include/asm/pgtable-2level_types.h
--- a/arch/x86/include/asm/pgtable-2level_types.h~zap-SHARED_KERNEL_PMD 2025-04-14 09:00:36.905759325 -0700
+++ b/arch/x86/include/asm/pgtable-2level_types.h 2025-04-14 09:00:36.912759582 -0700
@@ -18,8 +18,6 @@ typedef union {
} pte_t;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD 0
-
#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
diff -puN arch/x86/include/asm/pgtable-3level_types.h~zap-SHARED_KERNEL_PMD arch/x86/include/asm/pgtable-3level_types.h
--- a/arch/x86/include/asm/pgtable-3level_types.h~zap-SHARED_KERNEL_PMD 2025-04-14 09:00:36.908759435 -0700
+++ b/arch/x86/include/asm/pgtable-3level_types.h 2025-04-14 09:00:36.913759618 -0700
@@ -27,8 +27,6 @@ typedef union {
} pmd_t;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
-
#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
diff -puN arch/x86/include/asm/pgtable_64_types.h~zap-SHARED_KERNEL_PMD arch/x86/include/asm/pgtable_64_types.h
--- a/arch/x86/include/asm/pgtable_64_types.h~zap-SHARED_KERNEL_PMD 2025-04-14 09:00:36.910759508 -0700
+++ b/arch/x86/include/asm/pgtable_64_types.h 2025-04-14 09:00:36.913759618 -0700
@@ -46,8 +46,6 @@ extern unsigned int ptrs_per_p4d;
#endif /* !__ASSEMBLER__ */
-#define SHARED_KERNEL_PMD 0
-
#ifdef CONFIG_X86_5LEVEL
/*
_
^ permalink raw reply [flat|nested] 13+ messages in thread