* [PATCH v2 01/16] arm64: provide cpu_replace_ttbr1_phys()
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 02/16] arm64: make clean_dcache_range_nopatch() visible Russell King (Oracle)
` (14 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Provide a version of cpu_replace_ttbr1_phys() which operates using a
physical address rather than the virtual address of the page tables.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/mmu_context.h | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a6fb325424e7..42085b2b1d92 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -152,7 +152,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
-static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
+static inline void cpu_replace_ttbr1_phys(phys_addr_t pgd_phys, pgd_t *idmap)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
@@ -160,9 +160,10 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
unsigned long daif;
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
- phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
+ phys_addr_t ttbr1 = phys_to_ttbr(pgd_phys);
- if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) {
+ if (system_supports_cnp() &&
+ !WARN_ON(pgd_phys != virt_to_phys(lm_alias(swapper_pg_dir)))) {
/*
* cpu_replace_ttbr1() is used when there's a boot CPU
* up (i.e. cpufeature framework is not up yet) and
@@ -189,6 +190,11 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
cpu_uninstall_idmap();
}
+static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
+{
+ cpu_replace_ttbr1_phys(virt_to_phys(pgdp), idmap);
+}
+
/*
* It would be nice to return ASIDs back to the allocator, but unfortunately
* that introduces a race with a generation rollover where we could erroneously
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 02/16] arm64: make clean_dcache_range_nopatch() visible
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 01/16] arm64: provide cpu_replace_ttbr1_phys() Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 03/16] arm64: place kernel in its own L0 page table entry Russell King (Oracle)
` (13 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
When we hook into the kernel text patching code, we will need to call
clean_dcache_range_nopatch() to ensure that the patching of the
replicated kernel text is properly visible to other CPUs. Make this
function available to the replication code.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/cacheflush.h | 2 ++
arch/arm64/kernel/alternative.c | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 37185e978aeb..ac9ad56d5212 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -104,6 +104,8 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
}
#define flush_icache_range flush_icache_range
+void clean_dcache_range_nopatch(u64 start, u64 end);
+
/*
* Copy user data from/to a page which is mapped into a different
* processes address space. Really, we want to allow our "user
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 8ff6610af496..ea3f4104771d 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -121,7 +121,7 @@ static noinstr void patch_alternative(struct alt_instr *alt,
* accidentally call into the cache.S code, which is patched by us at
* runtime.
*/
-static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
+noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
{
u64 cur, d_size, ctr_el0;
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 03/16] arm64: place kernel in its own L0 page table entry
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 01/16] arm64: provide cpu_replace_ttbr1_phys() Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 02/16] arm64: make clean_dcache_range_nopatch() visible Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 04/16] arm64: text replication: add init function Russell King (Oracle)
` (12 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Kernel text replication needs to maintain separate per-node page
tables for the kernel text. In order to do this without affecting
other kernel memory mappings, placing the kernel such that it does
not share a L0 page table entry with any other mapping is desirable.
Prior to this commit, the layout without KASLR was:
+----------+
| vmalloc |
+----------+
| Kernel |
+----------+ MODULES_END, VMALLOC_START, KIMAGE_VADDR =
| Modules | MODULES_VADDR + MODULES_VSIZE
+----------+ MODULES_VADDR = _PAGE_END(VA_BITS_MIN)
| VA space |
+----------+ 0
This becomes:
+----------+
| vmalloc |
+----------+ VMALLOC_START = MODULES_END + PGDIR_SIZE
| Kernel |
+----------+ MODULES_END, KIMAGE_VADDR = _PAGE_END(VA_BITS_MIN) +
| Modules | max(PGDIR_SIZE, MODULES_VSIZE)
+----------+ MODULES_VADDR = MODULES_END - MODULES_VSIZE
| VA space |
+----------+ 0
This assumes MODULES_VSIZE (128M) <= PGDIR_SIZE.
One side effect of this change is that KIMAGE_VADDR's definition now
includes PGDIR_SIZE (to leave room for the modules) but this is not
defined when asm/memory.h is included. This means KIMAGE_VADDR can
not be used in inline functions within this file, so we convert
kaslr_offset() and kaslr_enabled() to be macros instead.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/memory.h | 28 +++++++++++++++++++++-------
arch/arm64/include/asm/pgtable.h | 2 +-
arch/arm64/kernel/kaslr.c | 1 +
3 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index fde4186cc387..9410ec4e4207 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -43,9 +43,26 @@
#define VA_BITS (CONFIG_ARM64_VA_BITS)
#define _PAGE_OFFSET(va) (-(UL(1) << (va)))
#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
-#define KIMAGE_VADDR (MODULES_END)
-#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
-#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
+
+/*
+ * Setting KIMAGE_VADDR has got a lot harder, ideally we'd like to use
+ * min(PGDIR_SIZE, MODULES_VSIZE) but this can't work because this is used
+ * both in assembly as C, where it causes problems. min_t() solves the
+ * C problems but can't be used in assembly.
+ * CONFIG_ARM64_4K_PAGES, PGDIR_SIZE is 2M, 1G, 512G
+ * CONFIG_ARM64_16K_PAGES, PGDIR_SIZE is 32M, 64G or 128T
+ * CONFIG_ARM64_64K_PAGES, PGDIR_SIZE is 512M or 4T
+ */
+#if (CONFIG_ARM64_4K_PAGES && CONFIG_PGTABLE_LEVELS < 4) || \
+ (CONFIG_ARM64_16K_PAGES && CONFIG_PGTABLE_LEVELS < 3) || \
+ (CONFIG_ARM64_64K_PAGES && CONFIG_PGTABLE_LEVELS < 2)
+#define KIMAGE_OFFSET MODULES_VSIZE
+#else
+#define KIMAGE_OFFSET PGDIR_SIZE
+#endif
+#define KIMAGE_VADDR (_PAGE_END(VA_BITS_MIN) + KIMAGE_OFFSET)
+#define MODULES_END (KIMAGE_VADDR)
+#define MODULES_VADDR (MODULES_END - MODULES_VSIZE)
#define MODULES_VSIZE (SZ_2G)
#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
@@ -199,10 +216,7 @@ extern u64 kimage_vaddr;
/* the offset between the kernel virtual and physical mappings */
extern u64 kimage_voffset;
-static inline unsigned long kaslr_offset(void)
-{
- return kimage_vaddr - KIMAGE_VADDR;
-}
+#define kaslr_offset() ((unsigned long)(kimage_vaddr - KIMAGE_VADDR))
#ifdef CONFIG_RANDOMIZE_BASE
void kaslr_init(void);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0bd18de9fd97..cb526e69299d 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -21,7 +21,7 @@
* VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
* and fixed mappings
*/
-#define VMALLOC_START (MODULES_END)
+#define VMALLOC_START (MODULES_END + PGDIR_SIZE)
#define VMALLOC_END (VMEMMAP_START - SZ_256M)
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 94a269cd1f07..6ffea2ce1a11 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -9,6 +9,7 @@
#include <asm/cpufeature.h>
#include <asm/memory.h>
+#include <asm/pgtable.h>
u16 __initdata memstart_offset_seed;
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 04/16] arm64: text replication: add init function
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (2 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 03/16] arm64: place kernel in its own L0 page table entry Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 05/16] arm64: text replication: add sanity checks Russell King (Oracle)
` (11 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
A simple patch that adds an empty function for kernel text replication
initialisation and hooks it into the initialisation path.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/ktext.h | 20 ++++++++++++++++++++
arch/arm64/mm/Makefile | 2 ++
arch/arm64/mm/init.c | 3 +++
arch/arm64/mm/ktext.c | 8 ++++++++
4 files changed, 33 insertions(+)
create mode 100644 arch/arm64/include/asm/ktext.h
create mode 100644 arch/arm64/mm/ktext.c
diff --git a/arch/arm64/include/asm/ktext.h b/arch/arm64/include/asm/ktext.h
new file mode 100644
index 000000000000..1a5f7452a3bf
--- /dev/null
+++ b/arch/arm64/include/asm/ktext.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022, Oracle and/or its affiliates.
+ */
+#ifndef ASM_KTEXT_H
+#define ASM_KTEXT_H
+
+#ifdef CONFIG_REPLICATE_KTEXT
+
+void ktext_replication_init(void);
+
+#else
+
+static inline void ktext_replication_init(void)
+{
+}
+
+#endif
+
+#endif
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index dbd1bc95967d..41e705027c57 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -14,3 +14,5 @@ KASAN_SANITIZE_physaddr.o += n
obj-$(CONFIG_KASAN) += kasan_init.o
KASAN_SANITIZE_kasan_init.o := n
+
+obj-$(CONFIG_REPLICATE_KTEXT) += ktext.o
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d31c3a9290c5..df90738fd6bb 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -36,6 +36,7 @@
#include <asm/fixmap.h>
#include <asm/kasan.h>
#include <asm/kernel-pgtable.h>
+#include <asm/ktext.h>
#include <asm/kvm_host.h>
#include <asm/memory.h>
#include <asm/numa.h>
@@ -425,6 +426,8 @@ void __init bootmem_init(void)
arch_numa_init();
+ ktext_replication_init();
+
/*
* must be done after arch_numa_init() which calls numa_init() to
* initialize node_online_map that gets used in hugetlb_cma_reserve()
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
new file mode 100644
index 000000000000..3a8d37c9abc4
--- /dev/null
+++ b/arch/arm64/mm/ktext.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022, Oracle and/or its affiliates.
+ */
+
+void __init ktext_replication_init(void)
+{
+}
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 05/16] arm64: text replication: add sanity checks
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (3 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 04/16] arm64: text replication: add init function Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 06/16] arm64: text replication: copy initial kernel text Russell King (Oracle)
` (10 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
The kernel text and modules must be in separate L0 page table entries.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/mm/ktext.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 3a8d37c9abc4..901f159c65e6 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -3,6 +3,27 @@
* Copyright (C) 2022, Oracle and/or its affiliates.
*/
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#include <asm/ktext.h>
+#include <asm/memory.h>
+
void __init ktext_replication_init(void)
{
+ int kidx = pgd_index((phys_addr_t)KERNEL_START);
+
+ /*
+ * If we've messed up and the kernel shares a L0 entry with the
+ * module or vmalloc area, then don't even attempt to use text
+ * replication.
+ */
+ if (pgd_index(MODULES_VADDR) == kidx) {
+ pr_warn("Kernel is located in the same L0 index as modules - text replication disabled\n");
+ return;
+ }
+ if (pgd_index(VMALLOC_START) == kidx) {
+ pr_warn("Kernel is located in the same L0 index as vmalloc - text replication disabled\n");
+ return;
+ }
}
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 06/16] arm64: text replication: copy initial kernel text
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (4 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 05/16] arm64: text replication: add sanity checks Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 07/16] arm64: text replication: add node text patching Russell King (Oracle)
` (9 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Allocate memory on the appropriate node for the per-node copies of the
kernel text, and copy the kernel text to that memory. Clean and
invalidate the caches to the point of unification so that the copied
text is correctly visible to the target node.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/mm/ktext.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 901f159c65e6..4c803b89fcfe 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -4,14 +4,23 @@
*/
#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/numa.h>
#include <linux/pgtable.h>
+#include <linux/string.h>
+#include <asm/cacheflush.h>
#include <asm/ktext.h>
#include <asm/memory.h>
+static void *kernel_texts[MAX_NUMNODES];
+
+/* Allocate memory for the replicated kernel texts. */
void __init ktext_replication_init(void)
{
+ size_t size = _etext - _stext;
int kidx = pgd_index((phys_addr_t)KERNEL_START);
+ int nid;
/*
* If we've messed up and the kernel shares a L0 entry with the
@@ -26,4 +35,16 @@ void __init ktext_replication_init(void)
pr_warn("Kernel is located in the same L0 index as vmalloc - text replication disabled\n");
return;
}
+
+ for_each_node(nid) {
+ /* Nothing to do for node 0 */
+ if (!nid)
+ continue;
+
+ /* Allocate and copy initial kernel text for this node */
+ kernel_texts[nid] = memblock_alloc_node(size, PAGE_SIZE, nid);
+ memcpy(kernel_texts[nid], _stext, size);
+ caches_clean_inval_pou((u64)kernel_texts[nid],
+ (u64)kernel_texts[nid] + size);
+ }
}
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 07/16] arm64: text replication: add node text patching
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (5 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 06/16] arm64: text replication: copy initial kernel text Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 08/16] arm64: text replication: add node 0 page table definitions Russell King (Oracle)
` (8 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Add support for text patching on our replicated texts.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/ktext.h | 12 +++++++
arch/arm64/kernel/alternative.c | 2 ++
arch/arm64/kernel/patching.c | 7 +++-
arch/arm64/mm/ktext.c | 58 +++++++++++++++++++++++++++++++++
4 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/ktext.h b/arch/arm64/include/asm/ktext.h
index 1a5f7452a3bf..289e11289c06 100644
--- a/arch/arm64/include/asm/ktext.h
+++ b/arch/arm64/include/asm/ktext.h
@@ -5,9 +5,13 @@
#ifndef ASM_KTEXT_H
#define ASM_KTEXT_H
+#include <linux/kprobes.h>
+
#ifdef CONFIG_REPLICATE_KTEXT
void ktext_replication_init(void);
+void __kprobes ktext_replication_patch(u32 *tp, __le32 insn);
+void ktext_replication_patch_alternative(__le32 *src, int nr_inst);
#else
@@ -15,6 +19,14 @@ static inline void ktext_replication_init(void)
{
}
+static inline void __kprobes ktext_replication_patch(u32 *tp, __le32 insn)
+{
+}
+
+static inline void ktext_replication_patch_alternative(__le32 *src, int nr_inst)
+{
+}
+
#endif
#endif
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index ea3f4104771d..6f17e2b4e1c3 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -15,6 +15,7 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
+#include <asm/ktext.h>
#include <asm/module.h>
#include <asm/sections.h>
#include <asm/vdso.h>
@@ -174,6 +175,7 @@ static void __apply_alternatives(const struct alt_region *region,
alt_cb(alt, origptr, updptr, nr_inst);
if (!is_module) {
+ ktext_replication_patch_alternative(updptr, nr_inst);
clean_dcache_range_nopatch((u64)origptr,
(u64)(origptr + nr_inst));
}
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index b4835f6d594b..627fff6ddda2 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -10,6 +10,7 @@
#include <asm/fixmap.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
+#include <asm/ktext.h>
#include <asm/patching.h>
#include <asm/sections.h>
@@ -115,9 +116,13 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
return -EINVAL;
ret = aarch64_insn_write(tp, insn);
- if (ret == 0)
+ if (ret == 0) {
+ /* Also patch the other nodes */
+ ktext_replication_patch(tp, cpu_to_le32(insn));
+
caches_clean_inval_pou((uintptr_t)tp,
(uintptr_t)tp + AARCH64_INSN_SIZE);
+ }
return ret;
}
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 4c803b89fcfe..04b5ceddae4e 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -3,8 +3,10 @@
* Copyright (C) 2022, Oracle and/or its affiliates.
*/
+#include <linux/kallsyms.h>
#include <linux/kernel.h>
#include <linux/memblock.h>
+#include <linux/mm.h>
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/string.h>
@@ -15,6 +17,62 @@
static void *kernel_texts[MAX_NUMNODES];
+void __kprobes ktext_replication_patch(u32 *tp, __le32 insn)
+{
+ unsigned long offset;
+ int nid, this_nid;
+ __le32 *p;
+
+ if (!is_kernel_text((unsigned long)tp))
+ return;
+
+ offset = (unsigned long)tp - (unsigned long)_stext;
+
+ this_nid = numa_node_id();
+ if (this_nid) {
+ /* The cache maintenance by aarch64_insn_patch_text_nosync()
+ * will occur on this node. We need it to occur on node 0.
+ */
+ p = (void *)lm_alias(_stext) + offset;
+ caches_clean_inval_pou((u64)p, (u64)p + AARCH64_INSN_SIZE);
+ }
+
+ for_each_node(nid) {
+ if (!kernel_texts[nid])
+ continue;
+
+ p = kernel_texts[nid] + offset;
+ WRITE_ONCE(*p, insn);
+ caches_clean_inval_pou((u64)p, (u64)p + AARCH64_INSN_SIZE);
+ }
+}
+
+/* Copy the patched alternative from the node0 image to the other
+ * modes. src is the node 0 linear-mapping address.
+ */
+void ktext_replication_patch_alternative(__le32 *src, int nr_inst)
+{
+ unsigned long offset;
+ size_t size;
+ int nid;
+ __le32 *p;
+
+ offset = (unsigned long)src - (unsigned long)lm_alias(_stext);
+ if (offset >= _etext - _stext)
+ return;
+
+ size = AARCH64_INSN_SIZE * nr_inst;
+
+ for_each_node(nid) {
+ if (!kernel_texts[nid])
+ continue;
+
+ p = kernel_texts[nid] + offset;
+ memcpy(p, src, size);
+ clean_dcache_range_nopatch((u64)p, (u64)p + size);
+ }
+}
+
/* Allocate memory for the replicated kernel texts. */
void __init ktext_replication_init(void)
{
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 08/16] arm64: text replication: add node 0 page table definitions
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (6 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 07/16] arm64: text replication: add node text patching Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 09/16] arm64: text replication: add swapper page directory helpers Russell King (Oracle)
` (7 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Add a struct definition for the level zero page table group (the
optional trampoline page tables, reserved page tables, and swapper page
tables).
Add a symbol and extern declaration for the node 0 page table group.
Add an array of pointers to per-node page tables, which will default to
using the node 0 page table group.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/pgtable.h | 14 ++++++++++++++
arch/arm64/kernel/vmlinux.lds.S | 3 +++
arch/arm64/mm/ktext.c | 4 ++++
3 files changed, 21 insertions(+)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index cb526e69299d..1e72067d1e9e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -615,6 +615,20 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
+struct pgtables {
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ pgd_t tramp_pg_dir[PTRS_PER_PGD];
+#endif
+ pgd_t reserved_pg_dir[PTRS_PER_PGD];
+ pgd_t swapper_pg_dir[PTRS_PER_PGD];
+};
+
+extern struct pgtables pgtable_node0;
+
+#ifdef CONFIG_REPLICATE_KTEXT
+extern struct pgtables *pgtables[MAX_NUMNODES];
+#endif
+
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
static inline bool in_swapper_pgdir(void *addr)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 3cd7e76cc562..d3c7ed76adbf 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -212,6 +212,9 @@ SECTIONS
idmap_pg_dir = .;
. += PAGE_SIZE;
+ /* pgtable struct - covers the tramp, reserved and swapper pgdirs */
+ pgtable_node0 = .;
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
tramp_pg_dir = .;
. += PAGE_SIZE;
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 04b5ceddae4e..48d7943d6907 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -15,6 +15,10 @@
#include <asm/ktext.h>
#include <asm/memory.h>
+struct pgtables *pgtables[MAX_NUMNODES] = {
+ [0 ... MAX_NUMNODES - 1] = &pgtable_node0,
+};
+
static void *kernel_texts[MAX_NUMNODES];
void __kprobes ktext_replication_patch(u32 *tp, __le32 insn)
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 09/16] arm64: text replication: add swapper page directory helpers
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (7 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 08/16] arm64: text replication: add node 0 page table definitions Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 10/16] arm64: text replication: create per-node kernel page tables Russell King (Oracle)
` (6 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Add a series of helpers for the swapper page directories - a set which
return those for the calling CPU, and those which take the NUMA node
number.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/pgtable.h | 19 +++++++++++++++++++
arch/arm64/kernel/hibernate.c | 2 +-
arch/arm64/mm/ktext.c | 20 ++++++++++++++++++++
3 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 1e72067d1e9e..5cfff64e4944 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -627,6 +627,25 @@ extern struct pgtables pgtable_node0;
#ifdef CONFIG_REPLICATE_KTEXT
extern struct pgtables *pgtables[MAX_NUMNODES];
+
+pgd_t *swapper_pg_dir_node(void);
+phys_addr_t __swapper_pg_dir_node_phys(int nid);
+phys_addr_t swapper_pg_dir_node_phys(void);
+#else
+static inline pgd_t *swapper_pg_dir_node(void)
+{
+ return swapper_pg_dir;
+}
+
+static inline phys_addr_t __swapper_pg_dir_node_phys(int nid)
+{
+ return __pa_symbol(swapper_pg_dir);
+}
+
+static inline phys_addr_t swapper_pg_dir_node_phys(void)
+{
+ return __pa_symbol(swapper_pg_dir);
+}
#endif
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 02870beb271e..be69515da802 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -113,7 +113,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
return -EOVERFLOW;
arch_hdr_invariants(&hdr->invariants);
- hdr->ttbr1_el1 = __pa_symbol(swapper_pg_dir);
+ hdr->ttbr1_el1 = swapper_pg_dir_node_phys();
hdr->reenter_kernel = _cpu_resume;
/* We can't use __hyp_get_vectors() because kvm may still be loaded */
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 48d7943d6907..7b9a1f1b12a1 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -21,6 +21,26 @@ struct pgtables *pgtables[MAX_NUMNODES] = {
static void *kernel_texts[MAX_NUMNODES];
+static pgd_t *__swapper_pg_dir_node(int nid)
+{
+ return pgtables[nid]->swapper_pg_dir;
+}
+
+pgd_t *swapper_pg_dir_node(void)
+{
+ return __swapper_pg_dir_node(numa_node_id());
+}
+
+phys_addr_t __swapper_pg_dir_node_phys(int nid)
+{
+ return __pa(__swapper_pg_dir_node(nid));
+}
+
+phys_addr_t swapper_pg_dir_node_phys(void)
+{
+ return __swapper_pg_dir_node_phys(numa_node_id());
+}
+
void __kprobes ktext_replication_patch(u32 *tp, __le32 insn)
{
unsigned long offset;
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 10/16] arm64: text replication: create per-node kernel page tables
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (8 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 09/16] arm64: text replication: add swapper page directory helpers Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 11/16] arm64: text replication: boot secondary CPUs with appropriate TTBR1 Russell King (Oracle)
` (5 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Allocate the level 0 page tables for the per-node kernel text
replication, but copy all level 0 table entries from the NUMA node 0
table. Therefore, for the time being, each node's level 0 page tables
will contain identical entries, and thus other nodes will continue
to use the node 0 kernel text.
Since the level 0 page tables can be updated at runtime to add entries
for vmalloc and module space, propagate these updates to the other
swapper page tables. The exception is if we see an update for the
level 0 entry which points to the kernel mapping.
We also need to setup a copy of the trampoline page tables as well, as
the assembly code relies on the two page tables being a fixed offset
apart.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/ktext.h | 12 ++++++++++
arch/arm64/mm/ktext.c | 42 +++++++++++++++++++++++++++++++++-
arch/arm64/mm/mmu.c | 5 ++++
3 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/ktext.h b/arch/arm64/include/asm/ktext.h
index 289e11289c06..386f9812d3c1 100644
--- a/arch/arm64/include/asm/ktext.h
+++ b/arch/arm64/include/asm/ktext.h
@@ -7,11 +7,15 @@
#include <linux/kprobes.h>
+#include <asm/pgtable-types.h>
+
#ifdef CONFIG_REPLICATE_KTEXT
void ktext_replication_init(void);
void __kprobes ktext_replication_patch(u32 *tp, __le32 insn);
void ktext_replication_patch_alternative(__le32 *src, int nr_inst);
+void ktext_replication_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
+void ktext_replication_init_tramp(void);
#else
@@ -27,6 +31,14 @@ static inline void ktext_replication_patch_alternative(__le32 *src, int nr_inst)
{
}
+static inline void ktext_replication_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+}
+
+static inline void ktext_replication_init_tramp(void)
+{
+}
+
#endif
#endif
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 7b9a1f1b12a1..9efd21eb3308 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -14,6 +14,7 @@
#include <asm/cacheflush.h>
#include <asm/ktext.h>
#include <asm/memory.h>
+#include <asm/pgalloc.h>
struct pgtables *pgtables[MAX_NUMNODES] = {
[0 ... MAX_NUMNODES - 1] = &pgtable_node0,
@@ -97,7 +98,7 @@ void ktext_replication_patch_alternative(__le32 *src, int nr_inst)
}
}
-/* Allocate memory for the replicated kernel texts. */
+/* Allocate page tables and memory for the replicated kernel texts. */
void __init ktext_replication_init(void)
{
size_t size = _etext - _stext;
@@ -128,5 +129,44 @@ void __init ktext_replication_init(void)
memcpy(kernel_texts[nid], _stext, size);
caches_clean_inval_pou((u64)kernel_texts[nid],
(u64)kernel_texts[nid] + size);
+
+ /* Allocate the pagetables for this node */
+ pgtables[nid] = memblock_alloc_node(sizeof(*pgtables[0]),
+ PGD_SIZE, nid);
+
+ /* Copy initial swapper page directory */
+ memcpy(pgtables[nid]->swapper_pg_dir, swapper_pg_dir, PGD_SIZE);
+ }
+}
+
+void ktext_replication_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ unsigned long idx = pgdp - swapper_pg_dir;
+ int nid;
+
+ if (WARN_ON_ONCE(idx >= PTRS_PER_PGD) ||
+ WARN_ON_ONCE(idx == pgd_index((phys_addr_t)KERNEL_START)))
+ return;
+
+ for_each_node(nid) {
+ if (pgtables[nid]->swapper_pg_dir == swapper_pg_dir)
+ continue;
+
+ WRITE_ONCE(pgtables[nid]->swapper_pg_dir[idx], pgd);
+ }
+}
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+void __init ktext_replication_init_tramp(void)
+{
+ int nid;
+
+ for_each_node(nid) {
+ /* Nothing to do for node 0 */
+ if (pgtables[nid]->tramp_pg_dir == tramp_pg_dir)
+ continue;
+
+ memcpy(pgtables[nid]->tramp_pg_dir, tramp_pg_dir, PGD_SIZE);
}
}
+#endif
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 95d360805f8a..4ffa2650afd6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -31,6 +31,7 @@
#include <asm/fixmap.h>
#include <asm/kasan.h>
#include <asm/kernel-pgtable.h>
+#include <asm/ktext.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <linux/sizes.h>
@@ -81,6 +82,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
pgd_t *fixmap_pgdp;
spin_lock(&swapper_pgdir_lock);
+ ktext_replication_set_swapper_pgd(pgdp, pgd);
fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp));
WRITE_ONCE(*fixmap_pgdp, pgd);
/*
@@ -694,6 +696,9 @@ static int __init map_entry_trampoline(void)
__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO);
+ /* Copy trampoline page tables to other numa nodes */
+ ktext_replication_init_tramp();
+
return 0;
}
core_initcall(map_entry_trampoline);
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 11/16] arm64: text replication: boot secondary CPUs with appropriate TTBR1
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (9 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 10/16] arm64: text replication: create per-node kernel page tables Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:30 ` [PATCH v2 12/16] arm64: text replication: update cnp support Russell King (Oracle)
` (4 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Arrange for secondary CPUs to boot with TTBR1 pointing at the
appropriate per-node copy of the kernel page tables for the CPUs NUMA
node.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/smp.h | 1 +
arch/arm64/kernel/asm-offsets.c | 1 +
arch/arm64/kernel/head.S | 3 ++-
arch/arm64/kernel/smp.c | 3 +++
4 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 9b31e6d0da17..9635e473969b 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -79,6 +79,7 @@ asmlinkage void secondary_start_kernel(void);
struct secondary_data {
struct task_struct *task;
long status;
+ phys_addr_t ttbr1;
};
extern struct secondary_data secondary_data;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5ff1942b04fc..ce9d265bc099 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -121,6 +121,7 @@ int main(void)
DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
BLANK();
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
+ DEFINE(CPU_BOOT_TTBR1, offsetof(struct secondary_data, ttbr1));
BLANK();
DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask));
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 757a0de07f91..52643080a2c2 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -648,7 +648,8 @@ SYM_FUNC_START_LOCAL(secondary_startup)
ldr_l x0, vabits_actual
#endif
bl __cpu_setup // initialise processor
- adrp x1, swapper_pg_dir
+ adr_l x1, secondary_data
+ ldr x1, [x1, #CPU_BOOT_TTBR1]
adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =__secondary_switched
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index edd63894d61e..95b649eb5999 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -119,6 +119,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* page tables.
*/
secondary_data.task = idle;
+ secondary_data.ttbr1 = __swapper_pg_dir_node_phys(cpu_to_node(cpu));
+ dcache_clean_poc((uintptr_t)&secondary_data,
+ (uintptr_t)&secondary_data + sizeof(secondary_data));
update_cpu_boot_status(CPU_MMU_OFF);
/* Now bring the CPU into our world */
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 12/16] arm64: text replication: update cnp support
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (10 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 11/16] arm64: text replication: boot secondary CPUs with appropriate TTBR1 Russell King (Oracle)
@ 2023-07-27 15:30 ` Russell King (Oracle)
2023-07-27 15:31 ` [PATCH v2 13/16] arm64: text replication: setup page tables for copied kernel Russell King (Oracle)
` (3 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:30 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Add changes for CNP (Common Not Private) support of kernel text
replication. Although text replication has only been tested on
dual-socket Ampere A1 systems, provided the different NUMA nodes
are not part of the same inner shareable domain, CNP should not
be a problem.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/mmu_context.h | 2 +-
arch/arm64/kernel/cpufeature.c | 2 +-
arch/arm64/kernel/suspend.c | 3 ++-
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 42085b2b1d92..36f6b1b65ae5 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -163,7 +163,7 @@ static inline void cpu_replace_ttbr1_phys(phys_addr_t pgd_phys, pgd_t *idmap)
phys_addr_t ttbr1 = phys_to_ttbr(pgd_phys);
if (system_supports_cnp() &&
- !WARN_ON(pgd_phys != virt_to_phys(lm_alias(swapper_pg_dir)))) {
+ !WARN_ON(pgd_phys != swapper_pg_dir_node_phys())) {
/*
* cpu_replace_ttbr1() is used when there's a boot CPU
* up (i.e. cpufeature framework is not up yet) and
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f9d456fe132d..e7dc7ec849ab 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3417,7 +3417,7 @@ subsys_initcall_sync(init_32bit_el0_mask);
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
+ cpu_replace_ttbr1_phys(swapper_pg_dir_node_phys(), idmap_pg_dir);
}
/*
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 0fbdf5fe64d8..49fa80bafd6d 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -55,7 +55,8 @@ void notrace __cpu_suspend_exit(void)
/* Restore CnP bit in TTBR1_EL1 */
if (system_supports_cnp())
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
+ cpu_replace_ttbr1_phys(swapper_pg_dir_node_phys(),
+ idmap_pg_dir);
/*
* PSTATE was not saved over suspend/resume, re-enable any detected
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 13/16] arm64: text replication: setup page tables for copied kernel
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (11 preceding siblings ...)
2023-07-27 15:30 ` [PATCH v2 12/16] arm64: text replication: update cnp support Russell King (Oracle)
@ 2023-07-27 15:31 ` Russell King (Oracle)
2023-07-27 15:31 ` [PATCH v2 14/16] arm64: text replication: include most of read-only data as well Russell King (Oracle)
` (2 subsequent siblings)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:31 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Setup page table entries in each non-boot NUMA node page table to
point at each node's own copy of the kernel text. This switches
each node to use its own unique copy of the kernel text.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/include/asm/ktext.h | 1 +
arch/arm64/mm/ktext.c | 8 +++++
arch/arm64/mm/mmu.c | 53 ++++++++++++++++++++++++++++------
3 files changed, 53 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/include/asm/ktext.h b/arch/arm64/include/asm/ktext.h
index 386f9812d3c1..6ece59ca90a2 100644
--- a/arch/arm64/include/asm/ktext.h
+++ b/arch/arm64/include/asm/ktext.h
@@ -16,6 +16,7 @@ void __kprobes ktext_replication_patch(u32 *tp, __le32 insn);
void ktext_replication_patch_alternative(__le32 *src, int nr_inst);
void ktext_replication_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
void ktext_replication_init_tramp(void);
+void create_kernel_nid_map(pgd_t *pgdp, void *ktext);
#else
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 9efd21eb3308..6692759e78a8 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -136,6 +136,14 @@ void __init ktext_replication_init(void)
/* Copy initial swapper page directory */
memcpy(pgtables[nid]->swapper_pg_dir, swapper_pg_dir, PGD_SIZE);
+
+ /* Clear the kernel mapping */
+ memset(&pgtables[nid]->swapper_pg_dir[kidx], 0,
+ sizeof(pgtables[nid]->swapper_pg_dir[kidx]));
+
+ /* Create kernel mapping pointing at our local copy */
+ create_kernel_nid_map(pgtables[nid]->swapper_pg_dir,
+ kernel_texts[nid]);
}
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4ffa2650afd6..fb9c476605d1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -640,6 +640,16 @@ void mark_rodata_ro(void)
debug_checkwx();
}
+static void __init create_kernel_mapping(pgd_t *pgdp, phys_addr_t pa_start,
+ void *va_start, void *va_end,
+ pgprot_t prot, int flags)
+{
+ size_t size = va_end - va_start;
+
+ __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size,
+ prot, early_pgtable_alloc, flags);
+}
+
static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
pgprot_t prot, struct vm_struct *vma,
int flags, unsigned long vm_flags)
@@ -650,8 +660,7 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
BUG_ON(!PAGE_ALIGNED(pa_start));
BUG_ON(!PAGE_ALIGNED(size));
- __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
- early_pgtable_alloc, flags);
+ create_kernel_mapping(pgdp, pa_start, va_start, va_end, prot, flags);
if (!(vm_flags & VM_NO_GUARD))
size += PAGE_SIZE;
@@ -720,14 +729,8 @@ static bool arm64_early_this_cpu_has_bti(void)
ID_AA64PFR1_EL1_BT_SHIFT);
}
-/*
- * Create fine-grained mappings for the kernel.
- */
-static void __init map_kernel(pgd_t *pgdp)
+static pgprot_t __init kernel_text_pgprot(void)
{
- static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
- vmlinux_initdata, vmlinux_data;
-
/*
* External debuggers may need to write directly to the text
* mapping to install SW breakpoints. Allow this (only) when
@@ -743,6 +746,38 @@ static void __init map_kernel(pgd_t *pgdp)
if (arm64_early_this_cpu_has_bti())
text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+ return text_prot;
+}
+
+#ifdef CONFIG_REPLICATE_KTEXT
+void __init create_kernel_nid_map(pgd_t *pgdp, void *ktext)
+{
+ pgprot_t text_prot = kernel_text_pgprot();
+
+ create_kernel_mapping(pgdp, __pa(ktext), _stext, _etext, text_prot, 0);
+ create_kernel_mapping(pgdp, __pa_symbol(__start_rodata),
+ __start_rodata, __inittext_begin,
+ PAGE_KERNEL, NO_CONT_MAPPINGS);
+ create_kernel_mapping(pgdp, __pa_symbol(__inittext_begin),
+ __inittext_begin, __inittext_end,
+ text_prot, 0);
+ create_kernel_mapping(pgdp, __pa_symbol(__initdata_begin),
+ __initdata_begin, __initdata_end,
+ PAGE_KERNEL, 0);
+ create_kernel_mapping(pgdp, __pa_symbol(_data), _data, _end,
+ PAGE_KERNEL, 0);
+}
+#endif
+
+/*
+ * Create fine-grained mappings for the kernel.
+ */
+static void __init map_kernel(pgd_t *pgdp)
+{
+ static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
+ vmlinux_initdata, vmlinux_data;
+ pgprot_t text_prot = kernel_text_pgprot();
+
/*
* Only rodata will be remapped with different permissions later on,
* all other segments are allowed to use contiguous mappings.
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 14/16] arm64: text replication: include most of read-only data as well
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (12 preceding siblings ...)
2023-07-27 15:31 ` [PATCH v2 13/16] arm64: text replication: setup page tables for copied kernel Russell King (Oracle)
@ 2023-07-27 15:31 ` Russell King (Oracle)
2023-07-27 15:31 ` [PATCH v2 15/16] arm64: text replication: early kernel option to enable replication Russell King (Oracle)
2023-07-27 15:31 ` [PATCH v2 16/16] arm64: text replication: add Kconfig Russell King (Oracle)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:31 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Include as much of the read-only data in the replication as we can
without needing to move away from the generic RO_DATA() macro in
the linker script.
Unfortunately, the read-only data section is immedaitely followed
by the read-only after init data with no page alignment, which
means we can't have separate mappings for the read-only data
section and everything else. Changing that would mean replacing
the generic RO_DATA() macro which increases the maintenance burden.
however, this is likely not worth the effort as the majority of
read-only data will be covered.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/mm/ktext.c | 2 +-
arch/arm64/mm/mmu.c | 21 ++++++++++++++++++---
2 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 6692759e78a8..6265a2db449b 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -101,7 +101,7 @@ void ktext_replication_patch_alternative(__le32 *src, int nr_inst)
/* Allocate page tables and memory for the replicated kernel texts. */
void __init ktext_replication_init(void)
{
- size_t size = _etext - _stext;
+ size_t size = __end_rodata - _stext;
int kidx = pgd_index((phys_addr_t)KERNEL_START);
int nid;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index fb9c476605d1..bf674bdaf336 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -752,11 +752,26 @@ static pgprot_t __init kernel_text_pgprot(void)
#ifdef CONFIG_REPLICATE_KTEXT
void __init create_kernel_nid_map(pgd_t *pgdp, void *ktext)
{
+ phys_addr_t pa_ktext;
+ size_t ro_offset;
+ void *ro_end;
pgprot_t text_prot = kernel_text_pgprot();
- create_kernel_mapping(pgdp, __pa(ktext), _stext, _etext, text_prot, 0);
- create_kernel_mapping(pgdp, __pa_symbol(__start_rodata),
- __start_rodata, __inittext_begin,
+ pa_ktext = __pa(ktext);
+ ro_offset = __pa_symbol(__start_rodata) - __pa_symbol(_stext);
+ /*
+ * We must not cover the read-only data after init, since this
+ * is written to during boot, and thus must be shared between
+ * the NUMA nodes.
+ */
+ ro_end = PTR_ALIGN_DOWN((void *)__start_ro_after_init, PAGE_SIZE);
+
+ create_kernel_mapping(pgdp, pa_ktext, _stext, _etext, text_prot, 0);
+ create_kernel_mapping(pgdp, pa_ktext + ro_offset,
+ __start_rodata, ro_end,
+ PAGE_KERNEL, NO_CONT_MAPPINGS);
+ create_kernel_mapping(pgdp, __pa_symbol(ro_end),
+ ro_end, __inittext_begin,
PAGE_KERNEL, NO_CONT_MAPPINGS);
create_kernel_mapping(pgdp, __pa_symbol(__inittext_begin),
__inittext_begin, __inittext_end,
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 15/16] arm64: text replication: early kernel option to enable replication
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (13 preceding siblings ...)
2023-07-27 15:31 ` [PATCH v2 14/16] arm64: text replication: include most of read-only data as well Russell King (Oracle)
@ 2023-07-27 15:31 ` Russell King (Oracle)
2023-07-27 15:31 ` [PATCH v2 16/16] arm64: text replication: add Kconfig Russell King (Oracle)
15 siblings, 0 replies; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:31 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Provide an early kernel option "ktext=" which allows the kernel text
replication to be enabled. This takes a boolean argument.
The way this has been implemented means that we take all the same paths
through the kernel at runtime whether kernel text replication has been
enabled or not; this allows the performance effects of the code changes
to be evaluated separately from the act of running with replicating the
kernel text.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
.../admin-guide/kernel-parameters.txt | 5 +++++
arch/arm64/mm/ktext.c | 18 ++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..ceb351a6ff2a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2494,6 +2494,11 @@
0: force disabled
1: force enabled
+ ktext= [ARM64] Control kernel text replication on NUMA
+ machines. Default: disabled.
+ 0: disable kernel text replication
+ 1: enable kernel text replication
+
kunit.enable= [KUNIT] Enable executing KUnit tests. Requires
CONFIG_KUNIT to be set to be fully enabled. The
default value can be overridden via
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 6265a2db449b..f5580ae14ca4 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -98,6 +98,21 @@ void ktext_replication_patch_alternative(__le32 *src, int nr_inst)
}
}
+static bool ktext_enabled;
+
+static int __init parse_ktext(char *str)
+{
+ bool enabled;
+ int ret = strtobool(str, &enabled);
+
+ if (ret)
+ return ret;
+
+ ktext_enabled = enabled;
+ return 0;
+}
+early_param("ktext", parse_ktext);
+
/* Allocate page tables and memory for the replicated kernel texts. */
void __init ktext_replication_init(void)
{
@@ -119,6 +134,9 @@ void __init ktext_replication_init(void)
return;
}
+ if (!ktext_enabled)
+ return;
+
for_each_node(nid) {
/* Nothing to do for node 0 */
if (!nid)
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 16/16] arm64: text replication: add Kconfig
2023-07-27 15:29 [PATCH v2 00/17] arm64 kernel text replication Russell King (Oracle)
` (14 preceding siblings ...)
2023-07-27 15:31 ` [PATCH v2 15/16] arm64: text replication: early kernel option to enable replication Russell King (Oracle)
@ 2023-07-27 15:31 ` Russell King (Oracle)
2023-07-27 15:36 ` Randy Dunlap
15 siblings, 1 reply; 19+ messages in thread
From: Russell King (Oracle) @ 2023-07-27 15:31 UTC (permalink / raw)
To: Catalin Marinas, Jonathan Corbet, Will Deacon; +Cc: linux-arm-kernel, linux-doc
Add the Kconfig symbol for kernel text replication. This unfortunately
requires KASAN and kernel text randomisation options to be disabled at
the moment.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm64/Kconfig | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7856c3a3e35a..53143324c392 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -160,7 +160,7 @@ config ARM64
select HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
- select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
+ select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48 && !REPLICATE_KTEXT)
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
@@ -1426,6 +1426,13 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
+config REPLICATE_KTEXT
+ bool "Replicate kernel text across numa nodes"
+ depends on NUMA
+ help
+ Say Y here to enable replicating the kernel text across multiple
+ nodes in a NUMA cluster. This trades memory for speed.
+
source "kernel/Kconfig.hz"
config ARCH_SPARSEMEM_ENABLE
@@ -2173,6 +2180,7 @@ config RELOCATABLE
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image"
+ depends on !REPLICATE_KTEXT
select RELOCATABLE
help
Randomizes the virtual address at which the kernel image is
--
2.30.2
^ permalink raw reply related [flat|nested] 19+ messages in thread