* [PATCH 1/3] arm64: kdump: Allocate crash low memory in the bottom-up direction
2023-07-21 8:17 [PATCH 0/3] arm64: kdump: Restore the write protection for the crashkernel memory region thunder.leizhen
@ 2023-07-21 8:17 ` thunder.leizhen
2023-07-21 21:22 ` kernel test robot
2023-07-21 8:17 ` [PATCH 2/3] arm64: kdump: use page-level mapping for crashkernel region thunder.leizhen
2023-07-21 8:17 ` [PATCH 3/3] arm64: kdump: add support access protection " thunder.leizhen
2 siblings, 1 reply; 5+ messages in thread
From: thunder.leizhen @ 2023-07-21 8:17 UTC (permalink / raw)
To: Dave Young, Baoquan He, Vivek Goyal, Eric W . Biederman, kexec,
linux-kernel, Catalin Marinas, Will Deacon, linux-arm-kernel
Cc: Zhen Lei
From: Zhen Lei <thunder.leizhen@huawei.com>
arm64_memblock_init()
reserve_crashkernel() (1)
paging_init()
map_mem() (2)
unflatten_device_tree or parse ACPI (3)
bootmem_init()
zone_sizes_init()
Update arm64_dma_phys_limit (4)
late_reserve_crashkernel() (5)
For most arm64 platforms, DMA-capable devices can access the whole low 4G
memory without SMMU enabled. So we can directly use SZ_4G as upper limit
to do memblock alloc. However, DMA zone does not cover all the 32-bit
addressable memory on some specific platforms (e.g. 30-bit on Raspberry
Pi 4), and the upper limit of DMA zone (arm64_dma_phys_limit) is updated
after map_mem(), see (3)(4) above. Let's change the allocation direction
of low memory from top-town to bottom-up. In this way, as long as DMA zone
has continuous free memory that meets the size, the memory reserved for
crash will not exceed DMA zone. Of course, it's possible that the DMA zone
is not enough, so add late_reserve_crashkernel() to perform fall back if
need:
1. For case crashkernel=X(offset is not specified)
Fall back to reserve region above DMA zone, and reserve default size of
memory in DMA zone.
2. For case crashkernel=X,high
Fall back to searching the low memory with the specified size in
crashkernel=,high.
In reserve_crashkernel(), the allocation policy is as follows:
low high
|<---DMA---|--------------------->|
| |
|<<<-------------(1)--------------| top-town
|----------------(2)----------->>>| bottom-up
(1) crashkernel=Y,high, upper limit is known, top-town.
(2) crashkernel=Y,low, crashkernel=X, upper limit is unknown, bottom-up.
(x) crashkernel=X@offset, fixed.
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
arch/arm64/mm/init.c | 212 ++++++++++++++++++++++++++++++++-----------
1 file changed, 160 insertions(+), 52 deletions(-)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d31c3a9290c5524..d2ab377520b2742 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -69,23 +69,168 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;
#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit
#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1)
-#define CRASH_HIGH_SEARCH_BASE SZ_4G
+#define CRASHKERNEL_TYPE_FIXED_BASE 1
+#define CRASHKERNEL_TYPE_HIGH 2
#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
+static int crashkernel_type __initdata;
+
+static phys_addr_t __init crashkernel_phys_alloc_range(phys_addr_t size,
+ phys_addr_t start,
+ phys_addr_t end)
+{
+ phys_addr_t base;
+ bool old_direction;
+
+ old_direction = memblock_bottom_up();
+ if (!end) {
+ /* The upper limit is unknown, let's allocate from bottom to up */
+ end = CRASH_ADDR_HIGH_MAX;
+ memblock_set_bottom_up(true);
+ }
+ base = memblock_phys_alloc_range(size, CRASH_ALIGN, start, end);
+ memblock_set_bottom_up(old_direction);
+
+ return base;
+}
+
+static void __init crashkernel_low_rollback(void)
+{
+ if (crashk_low_res.end) {
+ release_resource(&crashk_low_res);
+ memblock_phys_free(crashk_low_res.start, resource_size(&crashk_low_res));
+ crashk_low_res.start = 0;
+ crashk_low_res.end = 0;
+ }
+}
+
+static void __init crashkernel_rollback(void)
+{
+ release_resource(&crashk_res);
+ memblock_phys_free(crashk_res.start, resource_size(&crashk_res));
+ crashk_res.start = 0;
+ crashk_res.end = 0;
+
+ crashkernel_low_rollback();
+}
+
+static void __init late_reserve_crashkernel(void)
+{
+ struct resource *res;
+ unsigned long long low_base, low_size;
+ unsigned long long crash_base, crash_size;
+
+ res = &crashk_res;
+ if (!res->end)
+ return;
+
+ crash_size = resource_size(res);
+ if (crashkernel_type == CRASHKERNEL_TYPE_HIGH) {
+ /*
+ * CRASH_ADDR_LOW_MAX
+ * |
+ * |<----DMA---->|------------|
+ * |-high-| //case1
+ * |-high-| //case2
+ * |-high-| //case3
+ */
+ if (crashk_res.end < CRASH_ADDR_LOW_MAX) /* case 1 */
+ crashkernel_low_rollback();
+ else if (crashk_res.start >= CRASH_ADDR_LOW_MAX) /* case 3 */
+ res = &crashk_low_res;
+
+ low_size = crashk_low_res.end ? resource_size(&crashk_low_res) : 0;
+ }
+
+ /* All crashkernel memory is reserved as expected */
+ if (res->end < CRASH_ADDR_LOW_MAX)
+ goto ok;
+
+ crashkernel_rollback();
+
+ /* For details, see Documentation/arch/arm64/kdump.rst */
+ if (crashkernel_type == CRASHKERNEL_TYPE_FIXED_BASE) {
+ pr_warn("crashkernel reservation failed - memory range is invalid\n");
+ return;
+ } else if (crashkernel_type == CRASHKERNEL_TYPE_HIGH) {
+ /* Above case 3(low memory is not enough) */
+ if (res == &crashk_low_res) {
+ pr_warn("cannot allocate crashkernel low memory (size:0x%llx)\n", low_size);
+ return;
+ }
+
+ /*
+ * Above case 2. Fall back to searching the low memory with
+ * the specified size in crashkernel=,high
+ */
+ crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+ 0, CRASH_ADDR_LOW_MAX);
+ if (!crash_base) {
+ pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size);
+ return;
+ }
+ } else {
+ /*
+ * Fall back to reserve region above DMA zone and allocate default
+ * size of memory in DMA zone.
+ */
+ low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+ low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
+ if (!low_base) {
+ pr_warn("cannot allocate crashkernel low memory (size:0x%llx)\n", low_size);
+ return;
+ }
+
+ crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+ CRASH_ADDR_LOW_MAX, CRASH_ADDR_HIGH_MAX);
+ if (!crash_base) {
+ pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size);
+ memblock_phys_free(low_base, low_size);
+ return;
+ }
+
+ crashk_low_res.start = low_base;
+ crashk_low_res.end = low_base + low_size - 1;
+ insert_resource(&iomem_resource, &crashk_low_res);
+ }
+
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ insert_resource(&iomem_resource, &crashk_res);
+
+ok:
+ crash_base = crashk_res.start;
+ crash_size = resource_size(&crashk_res);
+ pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+ crash_base, crash_base + crash_size, crash_size >> 20);
+
+ if (crashk_low_res.end) {
+ low_base = crashk_low_res.start;
+ low_size = resource_size(&crashk_low_res);
+ pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
+ low_base, low_base + low_size, low_size >> 20);
+ }
+
+ /*
+ * The crashkernel memory will be removed from the kernel linear
+ * map. Inform kmemleak so that it won't try to access it.
+ */
+ kmemleak_ignore_phys(crash_base);
+ if (crashk_low_res.end)
+ kmemleak_ignore_phys(crashk_low_res.start);
+}
+
static int __init reserve_crashkernel_low(unsigned long long low_size)
{
unsigned long long low_base;
- low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
+ low_base = crashkernel_phys_alloc_range(low_size, 0, CRASH_ADDR_LOW_MAX);
if (!low_base) {
pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
return -ENOMEM;
}
- pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
- low_base, low_base + low_size, low_size >> 20);
-
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
insert_resource(&iomem_resource, &crashk_low_res);
@@ -102,12 +247,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
*/
static void __init reserve_crashkernel(void)
{
- unsigned long long crash_low_size = 0, search_base = 0;
+ unsigned long long crash_low_size = 0;
unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
unsigned long long crash_base, crash_size;
char *cmdline = boot_command_line;
- bool fixed_base = false;
- bool high = false;
int ret;
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
@@ -131,9 +274,8 @@ static void __init reserve_crashkernel(void)
else if (ret)
return;
- search_base = CRASH_HIGH_SEARCH_BASE;
crash_max = CRASH_ADDR_HIGH_MAX;
- high = true;
+ crashkernel_type = CRASHKERNEL_TYPE_HIGH;
} else if (ret || !crash_size) {
/* The specified value is invalid */
return;
@@ -143,67 +285,31 @@ static void __init reserve_crashkernel(void)
/* User specifies base address explicitly. */
if (crash_base) {
- fixed_base = true;
- search_base = crash_base;
+ crashkernel_type = CRASHKERNEL_TYPE_FIXED_BASE;
crash_max = crash_base + crash_size;
}
-retry:
- crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
- search_base, crash_max);
+ crash_base = crashkernel_phys_alloc_range(crash_size, crash_base, crash_max);
if (!crash_base) {
/*
* For crashkernel=size[KMG]@offset[KMG], print out failure
* message if can't reserve the specified region.
*/
- if (fixed_base) {
+ if (crashkernel_type == CRASHKERNEL_TYPE_FIXED_BASE) {
pr_warn("crashkernel reservation failed - memory is in use.\n");
return;
}
- /*
- * For crashkernel=size[KMG], if the first attempt was for
- * low memory, fall back to high memory, the minimum required
- * low memory will be reserved later.
- */
- if (!high && crash_max == CRASH_ADDR_LOW_MAX) {
- crash_max = CRASH_ADDR_HIGH_MAX;
- search_base = CRASH_ADDR_LOW_MAX;
- crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
- goto retry;
- }
+ pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size);
- /*
- * For crashkernel=size[KMG],high, if the first attempt was
- * for high memory, fall back to low memory.
- */
- if (high && crash_max == CRASH_ADDR_HIGH_MAX) {
- crash_max = CRASH_ADDR_LOW_MAX;
- search_base = 0;
- goto retry;
- }
- pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
- crash_size);
return;
}
- if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size &&
- reserve_crashkernel_low(crash_low_size)) {
+ if (crash_low_size && reserve_crashkernel_low(crash_low_size)) {
memblock_phys_free(crash_base, crash_size);
return;
}
- pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
- crash_base, crash_base + crash_size, crash_size >> 20);
-
- /*
- * The crashkernel memory will be removed from the kernel linear
- * map. Inform kmemleak so that it won't try to access it.
- */
- kmemleak_ignore_phys(crash_base);
- if (crashk_low_res.end)
- kmemleak_ignore_phys(crashk_low_res.start);
-
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
@@ -408,6 +514,8 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
+ reserve_crashkernel();
+
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
}
@@ -454,7 +562,7 @@ void __init bootmem_init(void)
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
*/
- reserve_crashkernel();
+ late_reserve_crashkernel();
memblock_dump_all();
}
--
2.25.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/3] arm64: kdump: use page-level mapping for crashkernel region
2023-07-21 8:17 [PATCH 0/3] arm64: kdump: Restore the write protection for the crashkernel memory region thunder.leizhen
2023-07-21 8:17 ` [PATCH 1/3] arm64: kdump: Allocate crash low memory in the bottom-up direction thunder.leizhen
@ 2023-07-21 8:17 ` thunder.leizhen
2023-07-21 8:17 ` [PATCH 3/3] arm64: kdump: add support access protection " thunder.leizhen
2 siblings, 0 replies; 5+ messages in thread
From: thunder.leizhen @ 2023-07-21 8:17 UTC (permalink / raw)
To: Dave Young, Baoquan He, Vivek Goyal, Eric W . Biederman, kexec,
linux-kernel, Catalin Marinas, Will Deacon, linux-arm-kernel
Cc: Zhen Lei
From: Zhen Lei <thunder.leizhen@huawei.com>
Use page-level mappings for crashkernel region so that we can use
set_memory_valid() to do access protection for it.
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
arch/arm64/mm/mmu.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 95d360805f8aeb3..e0a197ebe14837d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -594,6 +594,11 @@ static void __init map_mem(pgd_t *pgdp)
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE
+ if (crashk_res.end)
+ memblock_mark_nomap(crashk_res.start, resource_size(&crashk_res));
+#endif
+
/* map all the memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
@@ -621,6 +626,22 @@ static void __init map_mem(pgd_t *pgdp)
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
arm64_kfence_map_pool(early_kfence_pool, pgdp);
+
+ /*
+ * Use page-level mappings here so that we can shrink the region
+ * in page granularity and put back unused memory to buddy system
+ * through /sys/kernel/kexec_crash_size interface.
+ */
+#ifdef CONFIG_KEXEC_CORE
+ if (crashk_res.end) {
+ __map_memblock(pgdp, crashk_res.start,
+ crashk_res.end + 1,
+ PAGE_KERNEL,
+ NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+ memblock_clear_nomap(crashk_res.start,
+ resource_size(&crashk_res));
+ }
+#endif
}
void mark_rodata_ro(void)
--
2.25.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 3/3] arm64: kdump: add support access protection for crashkernel region
2023-07-21 8:17 [PATCH 0/3] arm64: kdump: Restore the write protection for the crashkernel memory region thunder.leizhen
2023-07-21 8:17 ` [PATCH 1/3] arm64: kdump: Allocate crash low memory in the bottom-up direction thunder.leizhen
2023-07-21 8:17 ` [PATCH 2/3] arm64: kdump: use page-level mapping for crashkernel region thunder.leizhen
@ 2023-07-21 8:17 ` thunder.leizhen
2 siblings, 0 replies; 5+ messages in thread
From: thunder.leizhen @ 2023-07-21 8:17 UTC (permalink / raw)
To: Dave Young, Baoquan He, Vivek Goyal, Eric W . Biederman, kexec,
linux-kernel, Catalin Marinas, Will Deacon, linux-arm-kernel
Cc: Zhen Lei
From: Zhen Lei <thunder.leizhen@huawei.com>
arch_kexec_protect_crashkres() and arch_kexec_unprotect_crashkres()
are meant to be called by kexec_load() in order to protect the memory
allocated for crash dump kernel once the image is loaded.
This is basically revert commit 0d124e96051b
("arm64: kdump : take off the protection on crashkernel memory region"),
except for the crashkernel region has been fallen back. Because we didn't
do page-level mapping for the newly allocated region.
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
arch/arm64/include/asm/kexec.h | 8 ++++++++
arch/arm64/kernel/machine_kexec.c | 26 ++++++++++++++++++++++++++
arch/arm64/mm/init.c | 4 ++++
3 files changed, 38 insertions(+)
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 9ac9572a3bbee2c..a55388ff045e980 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -102,6 +102,14 @@ void cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
int machine_kexec_post_load(struct kimage *image);
#define machine_kexec_post_load machine_kexec_post_load
+
+extern bool crash_fallback;
+
+void arch_kexec_protect_crashkres(void);
+#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
+
+void arch_kexec_unprotect_crashkres(void);
+#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
#endif
#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 078910db77a41b6..00392b48501d35c 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -269,6 +269,32 @@ void machine_crash_shutdown(struct pt_regs *regs)
pr_info("Starting crashdump kernel...\n");
}
+void arch_kexec_protect_crashkres(void)
+{
+ int i;
+
+ if (crash_fallback)
+ return;
+
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ set_memory_valid(
+ __phys_to_virt(kexec_crash_image->segment[i].mem),
+ kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+ int i;
+
+ if (crash_fallback)
+ return;
+
+ for (i = 0; i < kexec_crash_image->nr_segments; i++)
+ set_memory_valid(
+ __phys_to_virt(kexec_crash_image->segment[i].mem),
+ kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
+}
+
#ifdef CONFIG_HIBERNATION
/*
* To preserve the crash dump kernel image, the relevant memory segments
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d2ab377520b2742..b544ed0ab04193d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -74,6 +74,7 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;
#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
+bool crash_fallback;
static int crashkernel_type __initdata;
static phys_addr_t __init crashkernel_phys_alloc_range(phys_addr_t size,
@@ -199,6 +200,9 @@ static void __init late_reserve_crashkernel(void)
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
+ crash_fallback = true;
+ pr_info("cannot allocate all crashkernel memory as expected, fallen back.\n");
+
ok:
crash_base = crashk_res.start;
crash_size = resource_size(&crashk_res);
--
2.25.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 5+ messages in thread