* [PATCH v3 1/2] x86/e820: Expose API to update e820 kexec and firmware tables externally.
2024-04-26 0:41 [PATCH v3 0/2] Apply RMP table fixups for kexec Ashish Kalra
@ 2024-04-26 0:41 ` Ashish Kalra
2024-04-26 0:43 ` [PATCH v3 2/2] x86/sev: Add callback to apply RMP table fixups for kexec Ashish Kalra
1 sibling, 0 replies; 3+ messages in thread
From: Ashish Kalra @ 2024-04-26 0:41 UTC (permalink / raw)
To: tglx, mingo, bp, dave.hansen, x86, hpa, jbohac, dyoung, luto
Cc: rafael, peterz, adrian.hunter, sathyanarayanan.kuppuswamy,
jun.nakajima, rick.p.edgecombe, thomas.lendacky, michael.roth,
seanjc, kai.huang, bhe, kirill.shutemov, jroedel, ardb,
kevinloughlin, kexec, linux-coco, linux-kernel
From: Ashish Kalra <ashish.kalra@amd.com>
Export a new API helper function e820__range_update_table() to update both
e820_table_kexec and e820_table_firmware. Move all current users of
e820__range_update_kexec() to use this new helper function.
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
arch/x86/include/asm/e820/api.h | 2 ++
arch/x86/kernel/e820.c | 6 +++---
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index e8f58ddd06d9..eeb44e71aa66 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -18,6 +18,8 @@ extern void e820__range_add (u64 start, u64 size, enum e820_type type);
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
+extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
+
extern void e820__print_table(char *who);
extern int e820__update_table(struct e820_table *table);
extern void e820__update_table_print(void);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 6f1b379e3b38..872e133d2718 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -532,9 +532,9 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum
return __e820__range_update(e820_table, start, size, old_type, new_type);
}
-static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
+u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
{
- return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
+ return __e820__range_update(t, start, size, old_type, new_type);
}
/* Remove a range of memory from the E820 table: */
@@ -806,7 +806,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
addr = memblock_phys_alloc(size, align);
if (addr) {
- e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_kexec, addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
e820__update_table_kexec();
}
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH v3 2/2] x86/sev: Add callback to apply RMP table fixups for kexec
2024-04-26 0:41 [PATCH v3 0/2] Apply RMP table fixups for kexec Ashish Kalra
2024-04-26 0:41 ` [PATCH v3 1/2] x86/e820: Expose API to update e820 kexec and firmware tables externally Ashish Kalra
@ 2024-04-26 0:43 ` Ashish Kalra
1 sibling, 0 replies; 3+ messages in thread
From: Ashish Kalra @ 2024-04-26 0:43 UTC (permalink / raw)
To: tglx, mingo, bp, dave.hansen, x86, hpa, jbohac, dyoung, luto
Cc: rafael, peterz, adrian.hunter, sathyanarayanan.kuppuswamy,
jun.nakajima, rick.p.edgecombe, thomas.lendacky, michael.roth,
seanjc, kai.huang, bhe, kirill.shutemov, jroedel, ardb,
kevinloughlin, kexec, linux-coco, linux-kernel
From: Ashish Kalra <ashish.kalra@amd.com>
Handle cases where the RMP table placement in the BIOS is
not 2M aligned and then the kexec kernel could try to allocate
from within that chunk and that causes a fatal RMP fault.
The kexec failure is illustrated below from the kernel logs:
[ 0.000000] SEV-SNP: RMP table physical range [0x0000007ffe800000 - 0x000000807f0fffff]
[ 0.000000] BIOS-provided physical RAM map:
[ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000008efff] usable
[ 0.000000] BIOS-e820: [mem 0x000000000008f000-0x000000000008ffff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x0000000000090000-0x000000000009ffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000005a14afff] usable
[ 0.000000] BIOS-e820: [mem 0x000000005a14b000-0x000000005a34afff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000005a34b000-0x0000000067acefff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000067acf000-0x000000006dfcefff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000006dfcf000-0x000000006edfefff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x000000006edff000-0x000000006effefff] ACPI data
[ 0.000000] BIOS-e820: [mem 0x000000006efff000-0x000000006effffff] usable
[ 0.000000] BIOS-e820: [mem 0x000000006f000000-0x000000006f00afff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x000000006f00b000-0x000000006fffffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000070000000-0x000000008fffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000aa000000-0x00000000aaffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000c5000000-0x00000000c5ffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000e0000000-0x00000000e0ffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000fd000000-0x00000000ffffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000407fcfffff] usable
[ 0.000000] BIOS-e820: [mem 0x000000407fd00000-0x000000407fffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x0000004080000000-0x0000007ffe7fffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000007ffe800000-0x000000807f0fffff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000807f100000-0x000000807f1fefff] usable
As seen here in the e820 memory map, the end range of RMP table is not aligned to
2MB and not reserved and usable as RAM.
Subsequently, kexec -s (KEXEC_FILE_LOAD syscall) loads it's purgatory code and
boot_param, command line and other setup data into this RAM region as seen in the
kexec logs below, which leads to fatal RMP fault during kexec boot.
[ 173.113085] Loaded purgatory at 0x807f1fa000
[ 173.113099] Loaded boot_param, command line and misc at 0x807f1f8000 bufsz=0x1350 memsz=0x2000
[ 173.113107] Loaded 64bit kernel at 0x7ffae00000 bufsz=0xd06200 memsz=0x3894000
[ 173.113291] Loaded initrd at 0x7ff6c89000 bufsz=0x4176014 memsz=0x4176014
[ 173.113296] E820 memmap:
[ 173.113298] 0000000000000000-000000000008efff (1)
[ 173.113300] 000000000008f000-000000000008ffff (4)
[ 173.113302] 0000000000090000-000000000009ffff (1)
[ 173.113303] 0000000000100000-000000005a14afff (1)
[ 173.113305] 000000005a14b000-000000005a34afff (2)
[ 173.113306] 000000005a34b000-0000000067acefff (1)
[ 173.113308] 0000000067acf000-000000006dfcefff (2)
[ 173.113309] 000000006dfcf000-000000006edfefff (4)
[ 173.113311] 000000006edff000-000000006effefff (3)
[ 173.113312] 000000006efff000-000000006effffff (1)
[ 173.113314] 000000006f000000-000000006f00afff (4)
[ 173.113315] 000000006f00b000-000000006fffffff (1)
[ 173.113317] 0000000070000000-000000008fffffff (2)
[ 173.113318] 00000000aa000000-00000000aaffffff (2)
[ 173.113319] 00000000c5000000-00000000c5ffffff (2)
[ 173.113321] 00000000e0000000-00000000e0ffffff (2)
[ 173.113322] 00000000fd000000-00000000ffffffff (2)
[ 173.113324] 0000000100000000-000000407fcfffff (1)
[ 173.113325] 000000407fd00000-000000407fffffff (2)
[ 173.113327] 0000004080000000-0000007ffe7fffff (1)
[ 173.113328] 0000007ffe800000-000000807f0fffff (2)
[ 173.113330] 000000807f100000-000000807f1fefff (1)
[ 173.113331] 000000807f1ff000-000000807fffffff (2)
[ 173.690528] nr_segments = 4
[ 173.690533] segment[0]: buf=0x00000000e626d1a2 bufsz=0x4000 mem=0x807f1fa000 memsz=0x5000
[ 173.690546] segment[1]: buf=0x0000000029c67bd6 bufsz=0x1350 mem=0x807f1f8000 memsz=0x2000
[ 173.690552] segment[2]: buf=0x0000000045c60183 bufsz=0xd06200 mem=0x7ffae00000 memsz=0x3894000
[ 173.697994] segment[3]: buf=0x000000006e54f08d bufsz=0x4176014 mem=0x7ff6c89000 memsz=0x4177000
[ 173.708672] kexec_file_load: type:0, start:0x807f1fa150 head:0x1184d0002 flags:0x0
Check if RMP table start & end physical range in e820 tables
are not aligned to 2MB and in that case map this range to reserved in all
the three e820 tables.
Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
arch/x86/include/asm/sev.h | 2 ++
arch/x86/mm/mem_encrypt.c | 13 ++++++++++++
arch/x86/virt/svm/sev.c | 42 ++++++++++++++++++++++++++++++++++++++
3 files changed, 57 insertions(+)
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 7f57382afee4..24300927a476 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
void kdump_sev_callback(void);
+void snp_rmptable_e820_fixup(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
+static inline void snp_rmptable_e820_fixup(void) {}
#endif
#endif
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 6f3b3e028718..d88c942dd311 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -102,6 +102,19 @@ void __init mem_encrypt_setup_arch(void)
phys_addr_t total_mem = memblock_phys_mem_size();
unsigned long size;
+ /*
+ * Invoke callback to do RMP table fixups which needs to be called
+ * during setup_arch() after the e820 tables have been setup
+ * in e820__memory_setup() and this function is appropriate to
+ * invoke the callback to apply any memory encryption platform specific
+ * quirks. The callback to do RMP table fixups cannot be invoked from
+ * snp_init() as snp_init() is called from sme_enable() in
+ * startup_64() which is before setup_arch() and e820 tables
+ * have still not been setup.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ snp_rmptable_e820_fixup();
+
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index ab0e8448bb6e..1b4b99b26bec 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -163,6 +163,48 @@ bool snp_probe_rmptable_info(void)
return true;
}
+static void __init __snp_e820_tables_fixup(u64 pa)
+{
+ if (IS_ALIGNED(pa, PMD_SIZE))
+ return;
+
+ /*
+ * Check if RMP table start and end physical range
+ * in e820_tables are not aligned to 2MB and in that case map
+ * this range in all the three e820 tables to be reserved.
+ * The e820_table needs to be updated as it is converted to
+ * kernel memory resources and used by KEXEC_FILE_LOAD syscall
+ * to load kexec segments. The e820_table_firmware needs to be
+ * updated as it is exposed to sysfs and used by KEXEC_LOAD
+ * syscall to load kexec segments and e820_table_kexec needs
+ * to be updated as it passed to kexec-ed kernel.
+ */
+ pa = ALIGN_DOWN(pa, PMD_SIZE);
+ if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) {
+ pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
+ e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ }
+}
+
+/*
+ * Callback to do any RMP table fixups, needs to be called
+ * after e820__memory_setup(), after the e820 tables are
+ * setup/populated and before e820__reserve_resources(), before
+ * the e820 map has been converted to the standard Linux memory
+ * resources and e820 map is no longer used and modifying it
+ * has no effect. Handle cases where the RMP table placement in
+ * the BIOS is not 2M aligned and then the kexec kernel could
+ * try to allocate from within that chunk and that causes a
+ * fatal RMP fault.
+ */
+void __init snp_rmptable_e820_fixup(void)
+{
+ __snp_e820_tables_fixup(probed_rmp_base);
+ __snp_e820_tables_fixup(probed_rmp_base + probed_rmp_size);
+}
+
/*
* Do the necessary preparations which are verified by the firmware as
* described in the SNP_INIT_EX firmware command description in the SNP
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread