* [kvm-unit-tests RFC PATCH 2/6] configure: Make arch_libdir a first-class entity
From: Chinmay Rath @ 2026-06-02 6:48 UTC (permalink / raw)
To: thuth
Cc: npiggin, harshpb, lvivier, linuxppc-dev, kvm, andrew.jones, sbhat,
Chinmay Rath
In-Reply-To: <20260602064806.3101025-1-rathc@linux.ibm.com>
From: Nicholas Piggin <npiggin@gmail.com>
arch_libdir was brought in to improve the heuristic determination of
the lib/ directory based on arch and testdir names, but it did not
entirely clean that mess up.
Remove the arch_libdir->arch->testdir heuristic and just require
everybody sets arch_libdir correctly. Fail if the lib/arch or
lib/arch/asm directories can not be found.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
Makefile | 2 +-
configure | 20 ++++++++++++--------
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/Makefile b/Makefile
index 42ef5826..8e002043 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ vpath %.s $(SRCDIR)
vpath %.S $(SRCDIR)
libdirs-get = $(shell [ -d "lib/$(1)" ] && echo "lib/$(1) lib/$(1)/asm")
-ARCH_LIBDIRS := $(call libdirs-get,$(ARCH_LIBDIR)) $(call libdirs-get,$(TEST_DIR))
+ARCH_LIBDIRS := $(call libdirs-get,$(ARCH_LIBDIR))
OBJDIRS := $(ARCH_LIBDIRS)
DESTDIR := $(PREFIX)/share/kvm-unit-tests/
diff --git a/configure b/configure
index 6d549d1e..aeb5570c 100755
--- a/configure
+++ b/configure
@@ -274,7 +274,6 @@ fi
arch_name=$arch
[ "$arch" = "aarch64" ] && arch="arm64"
[ "$arch_name" = "arm64" ] && arch_name="aarch64"
-arch_libdir=$arch
if [ "$arch" = "riscv" ]; then
echo "riscv32 or riscv64 must be specified"
@@ -373,8 +372,10 @@ fi
if [ "$arch" = "i386" ] || [ "$arch" = "x86_64" ]; then
testdir=x86
+ arch_libdir=x86
elif [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
testdir=arm
+ arch_libdir=$arch
if [ "$target" = "qemu" ]; then
: "${uart_early_addr:=0x9000000}"
elif [ "$target" = "kvmtool" ]; then
@@ -385,6 +386,7 @@ elif [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
fi
elif [ "$arch" = "ppc64" ]; then
testdir=powerpc
+ arch_libdir=ppc64
firmware="$testdir/boot_rom.bin"
if [ "$endian" != "little" ] && [ "$endian" != "big" ]; then
echo "You must provide endianness (big or little)!"
@@ -400,6 +402,7 @@ elif [ "$arch" = "riscv32" ] || [ "$arch" = "riscv64" ]; then
fi
elif [ "$arch" = "s390x" ]; then
testdir=s390x
+ arch_libdir=s390x
else
echo "arch $arch is not supported!"
arch=
@@ -409,6 +412,10 @@ if [ ! -d "$srcdir/$testdir" ]; then
echo "$srcdir/$testdir does not exist!"
exit 1
fi
+if [ ! -d "$srcdir/lib/$arch_libdir" ]; then
+ echo "$srcdir/lib/$arch_libdir does not exist!"
+ exit 1
+fi
if [ "$efi" = "y" ] && [ -f "$srcdir/$testdir/efi/run" ]; then
ln -fs "$srcdir/$testdir/efi/run" $testdir-run
@@ -471,15 +478,12 @@ fi
# link lib/asm for the architecture
rm -f lib/asm
asm="asm-generic"
-if [ -d "$srcdir/lib/$arch/asm" ]; then
- asm="$srcdir/lib/$arch/asm"
- mkdir -p "lib/$arch"
-elif [ -d "$srcdir/lib/$arch_libdir/asm" ]; then
+if [ -d "$srcdir/lib/$arch_libdir/asm" ]; then
asm="$srcdir/lib/$arch_libdir/asm"
mkdir -p "lib/$arch_libdir"
-elif [ -d "$srcdir/lib/$testdir/asm" ]; then
- asm="$srcdir/lib/$testdir/asm"
- mkdir -p "lib/$testdir"
+else
+ echo "$srcdir/lib/$arch_libdir/asm does not exist"
+ exit 1
fi
ln -sf "$asm" lib/asm
mkdir -p lib/generated lib/libfdt
--
2.53.0
^ permalink raw reply related
* [kvm-unit-tests RFC PATCH 1/6] powerpc: add pmu tests
From: Chinmay Rath @ 2026-06-02 6:48 UTC (permalink / raw)
To: thuth
Cc: npiggin, harshpb, lvivier, linuxppc-dev, kvm, andrew.jones, sbhat,
Chinmay Rath
In-Reply-To: <20260602064806.3101025-1-rathc@linux.ibm.com>
From: Nicholas Piggin <npiggin@gmail.com>
Add some initial PMU testing.
- PMC5/6 tests
- PMAE / PMI test
- BHRB basic tests
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
lib/powerpc/asm/processor.h | 2 +
lib/powerpc/asm/reg.h | 9 +
lib/powerpc/asm/setup.h | 1 +
lib/powerpc/setup.c | 20 ++
powerpc/Makefile.common | 3 +-
powerpc/pmu.c | 567 ++++++++++++++++++++++++++++++++++++
powerpc/unittests.cfg | 3 +
7 files changed, 604 insertions(+), 1 deletion(-)
create mode 100644 powerpc/pmu.c
diff --git a/lib/powerpc/asm/processor.h b/lib/powerpc/asm/processor.h
index 153126fe..08506438 100644
--- a/lib/powerpc/asm/processor.h
+++ b/lib/powerpc/asm/processor.h
@@ -17,6 +17,8 @@ extern bool cpu_has_hv;
extern bool cpu_has_power_mce;
extern bool cpu_has_siar;
extern bool cpu_has_heai;
+extern bool cpu_has_bhrb;
+extern bool cpu_has_p10_bhrb;
extern bool cpu_has_radix;
extern bool cpu_has_prefix;
extern bool cpu_has_sc_lev;
diff --git a/lib/powerpc/asm/reg.h b/lib/powerpc/asm/reg.h
index 69ef21ad..602fba1b 100644
--- a/lib/powerpc/asm/reg.h
+++ b/lib/powerpc/asm/reg.h
@@ -40,10 +40,19 @@
#define SPR_LPIDR 0x13f
#define SPR_HEIR 0x153
#define SPR_PTCR 0x1d0
+#define SPR_MMCRA 0x312
+#define MMCRA_BHRBRD UL(0x0000002000000000)
+#define MMCRA_IFM_MASK UL(0x00000000c0000000)
+#define SPR_PMC5 0x317
+#define SPR_PMC6 0x318
#define SPR_MMCR0 0x31b
#define MMCR0_FC UL(0x80000000)
+#define MMCR0_FCP UL(0x20000000)
#define MMCR0_PMAE UL(0x04000000)
+#define MMCR0_BHRBA UL(0x00200000)
+#define MMCR0_FCPC UL(0x00001000)
#define MMCR0_PMAO UL(0x00000080)
+#define MMCR0_FC56 UL(0x00000010)
#define SPR_SIAR 0x31c
/* Machine State Register definitions: */
diff --git a/lib/powerpc/asm/setup.h b/lib/powerpc/asm/setup.h
index 9ca318ce..8f0b58ed 100644
--- a/lib/powerpc/asm/setup.h
+++ b/lib/powerpc/asm/setup.h
@@ -10,6 +10,7 @@
#define NR_CPUS 8 /* arbitrarily set for now */
extern uint64_t tb_hz;
+extern uint64_t cpu_hz;
#define NR_MEM_REGIONS 8
#define MR_F_PRIMARY (1U << 0)
diff --git a/lib/powerpc/setup.c b/lib/powerpc/setup.c
index c1f0f9ad..ef4ebdbc 100644
--- a/lib/powerpc/setup.c
+++ b/lib/powerpc/setup.c
@@ -33,6 +33,7 @@ u32 initrd_size;
u32 cpu_to_hwid[NR_CPUS] = { [0 ... NR_CPUS-1] = (~0U) };
int nr_cpus_present;
uint64_t tb_hz;
+uint64_t cpu_hz;
struct mem_region mem_regions[NR_MEM_REGIONS];
phys_addr_t __physical_start, __physical_end;
@@ -42,6 +43,7 @@ struct cpu_set_params {
unsigned icache_bytes;
unsigned dcache_bytes;
uint64_t tb_hz;
+ uint64_t cpu_hz;
};
static void cpu_set(int fdtnode, u64 regval, void *info)
@@ -95,6 +97,19 @@ static void cpu_set(int fdtnode, u64 regval, void *info)
data = (u32 *)prop->data;
params->tb_hz = fdt32_to_cpu(*data);
+ prop = fdt_get_property(dt_fdt(), fdtnode,
+ "ibm,extended-clock-frequency", NULL);
+ if (prop) {
+ u64 *data64 = (u64 *)prop->data;
+ params->cpu_hz = fdt64_to_cpu(*data64);
+ } else {
+ prop = fdt_get_property(dt_fdt(), fdtnode,
+ "clock-frequency", NULL);
+ assert(prop != NULL);
+ data = (u32 *)prop->data;
+ params->cpu_hz = fdt32_to_cpu(*data);
+ }
+
read_common_info = true;
}
}
@@ -103,6 +118,8 @@ bool cpu_has_hv;
bool cpu_has_power_mce; /* POWER CPU machine checks */
bool cpu_has_siar;
bool cpu_has_heai;
+bool cpu_has_bhrb;
+bool cpu_has_p10_bhrb;
bool cpu_has_radix;
bool cpu_has_prefix;
bool cpu_has_sc_lev; /* sc interrupt has LEV field in SRR1 */
@@ -119,12 +136,14 @@ static void cpu_init_params(void)
__icache_bytes = params.icache_bytes;
__dcache_bytes = params.dcache_bytes;
tb_hz = params.tb_hz;
+ cpu_hz = params.cpu_hz;
switch (mfspr(SPR_PVR) & PVR_VERSION_MASK) {
case PVR_VER_POWER10:
cpu_has_prefix = true;
cpu_has_sc_lev = true;
cpu_has_pause_short = true;
+ cpu_has_p10_bhrb = true;
case PVR_VER_POWER9:
cpu_has_radix = true;
case PVR_VER_POWER8E:
@@ -133,6 +152,7 @@ static void cpu_init_params(void)
cpu_has_power_mce = true;
cpu_has_heai = true;
cpu_has_siar = true;
+ cpu_has_bhrb = true;
break;
default:
break;
diff --git a/powerpc/Makefile.common b/powerpc/Makefile.common
index db4a34f2..3b357982 100644
--- a/powerpc/Makefile.common
+++ b/powerpc/Makefile.common
@@ -18,7 +18,8 @@ tests-common = \
$(TEST_DIR)/sprs.elf \
$(TEST_DIR)/timebase.elf \
$(TEST_DIR)/interrupts.elf \
- $(TEST_DIR)/mmu.elf
+ $(TEST_DIR)/mmu.elf \
+ $(TEST_DIR)/pmu.elf
tests-all = $(tests-common) $(tests)
all: directories $(TEST_DIR)/boot_rom.bin $(tests-all)
diff --git a/powerpc/pmu.c b/powerpc/pmu.c
new file mode 100644
index 00000000..402ce569
--- /dev/null
+++ b/powerpc/pmu.c
@@ -0,0 +1,567 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test PMU
+ *
+ * Copyright 2024 Nicholas Piggin, IBM Corp.
+ */
+#include <libcflat.h>
+#include <util.h>
+#include <migrate.h>
+#include <alloc.h>
+#include <asm/setup.h>
+#include <asm/handlers.h>
+#include <asm/hcall.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/barrier.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include "alloc_phys.h"
+#include "vmalloc.h"
+
+static volatile bool got_interrupt;
+static volatile struct pt_regs recorded_regs;
+static volatile unsigned long recorded_mmcr0;
+
+static void illegal_handler(struct pt_regs *regs, void *data)
+{
+ got_interrupt = true;
+ regs_advance_insn(regs);
+}
+
+static void fault_handler(struct pt_regs *regs, void *data)
+{
+ got_interrupt = true;
+ regs_advance_insn(regs);
+}
+
+static void sc_handler(struct pt_regs *regs, void *data)
+{
+ got_interrupt = true;
+}
+
+static void reset_mmcr0(void)
+{
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_PMAE | MMCR0_PMAO));
+}
+
+static __attribute__((__noinline__)) unsigned long pmc5_count_nr_insns(unsigned long nr)
+{
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile("mtctr %0 ; 1: bdnz 1b" :: "r"(nr) : "ctr");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+
+ return mfspr(SPR_PMC5);
+}
+
+static void test_pmc5(void)
+{
+ unsigned long pmc5;
+ unsigned long mmcr;
+
+ reset_mmcr0();
+ mmcr = mfspr(SPR_MMCR0);
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mmcr & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 20 ; nop ; .endr" ::: "memory");
+ mtspr(SPR_MMCR0, mmcr);
+ pmc5 = mfspr(SPR_PMC5);
+
+ report_kfail(true, pmc5 == 21, "PMC5 counts instructions exactly %ld", pmc5);
+}
+
+static void test_pmc5_with_branch(void)
+{
+ unsigned long pmc5;
+ unsigned long mmcr;
+
+ reset_mmcr0();
+ mmcr = mfspr(SPR_MMCR0);
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mmcr & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 20 ; b $+4 ; .endr" ::: "memory");
+ mtspr(SPR_MMCR0, mmcr);
+ pmc5 = mfspr(SPR_PMC5);
+
+ /* TCG and POWER9 do not count instructions around faults correctly */
+ report_kfail(true, pmc5 == 21, "PMC5 counts instructions with branch %ld", pmc5);
+}
+
+static void test_pmc5_with_cond_branch(void)
+{
+ unsigned long pmc5;
+ unsigned long mmcr;
+
+ reset_mmcr0();
+ mmcr = mfspr(SPR_MMCR0);
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mmcr & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 10 ; nop ; .endr ; cmpdi %0,1 ; beq 1f ; .rep 10 ; nop ; .endr ; 1:" : : "r"(0) : "memory", "cr0");
+ mtspr(SPR_MMCR0, mmcr);
+ pmc5 = mfspr(SPR_PMC5);
+
+ /* TCG and POWER9 do not count instructions around faults correctly */
+ report_kfail(true, pmc5 == 24,
+ "PMC5 counts instructions wth conditional branch %ld", pmc5);
+}
+
+static void test_pmc5_with_ill(void)
+{
+ unsigned long pmc5_1, pmc5_2;
+
+ handle_exception(0x700, &illegal_handler, NULL);
+ handle_exception(0xe40, &illegal_handler, NULL);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".long 0x0" ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_1 = mfspr(SPR_PMC5);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 10 ; nop ; .endr ; .long 0x0 ; .rep 10 ; nop ; .endr " ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_2 = mfspr(SPR_PMC5);
+
+ /* TCG and POWER9 do not count instructions around faults correctly */
+ report_kfail(true, pmc5_1 + 20 == pmc5_2,
+ "PMC5 counts instructions with illegal instruction");
+
+ handle_exception(0x700, NULL, NULL);
+ handle_exception(0xe40, NULL, NULL);
+}
+
+static void test_pmc5_with_fault(void)
+{
+ unsigned long pmc5_1, pmc5_2;
+ unsigned long tmp;
+
+ setup_vm();
+
+ handle_exception(0x300, &fault_handler, NULL);
+ handle_exception(0x380, &fault_handler, NULL);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile("ld %0,0(%1)" : "=r"(tmp) : "r"(NULL) : "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_1 = mfspr(SPR_PMC5);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 10 ; nop ; .endr ; ld %0,0(%1) ; .rep 10 ; nop ; .endr " : "=r"(tmp) : "r"(NULL) : "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_2 = mfspr(SPR_PMC5);
+
+ /* TCG and POWER9 do not count instructions around faults correctly */
+ report_kfail(true, pmc5_1 + 20 == pmc5_2, "PMC5 counts instructions with fault");
+
+ handle_exception(0x300, NULL, NULL);
+ handle_exception(0x380, NULL, NULL);
+}
+
+static void test_pmc5_with_sc(void)
+{
+ unsigned long pmc5_1, pmc5_2;
+
+ handle_exception(0xc00, &sc_handler, NULL);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile("sc 0" ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_1 = mfspr(SPR_PMC5);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 10 ; nop ; .endr ; sc 0 ; .rep 10 ; nop ; .endr" ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_2 = mfspr(SPR_PMC5);
+
+ /* TCG does not count instructions around syscalls correctly */
+ report_kfail(host_is_tcg, pmc5_1 + 20 == pmc5_2,
+ "PMC5 counts instructions with syscall");
+
+ handle_exception(0xc00, NULL, NULL);
+}
+
+extern char next_insn[];
+
+static void test_pmc5_with_rfid(void)
+{
+ unsigned long pmc5;
+ unsigned long mmcr;
+
+ mtspr(SPR_SRR0, (unsigned long)next_insn);
+ mtspr(SPR_SRR1, mfmsr());
+ reset_mmcr0();
+ mmcr = mfspr(SPR_MMCR0);
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mmcr & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile("rfid ; trap ; .global next_insn ; next_insn: " ::: "memory");
+ mtspr(SPR_MMCR0, mmcr);
+ pmc5 = mfspr(SPR_PMC5);
+
+ /* TCG does not count instructions around syscalls correctly */
+ report_kfail(host_is_tcg, pmc5 == 2,
+ "PMC5 counts instructions with rfid %ld", pmc5);
+}
+
+static void test_pmc5_with_ldat(void)
+{
+ unsigned long pmc5_1, pmc5_2;
+ register unsigned long r4 asm("r4");
+ register unsigned long r5 asm("r5");
+ register unsigned long r6 asm("r6");
+ uint64_t val;
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 20 ; nop ; .endr" ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ pmc5_1 = mfspr(SPR_PMC5);
+
+ val = 0xdeadbeef;
+ r4 = 0;
+ r5 = 0xdeadbeef;
+ r6 = 100;
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 10 ; nop ; .endr ; "
+ "ldat %0,%3,0x10 ; "
+ ".rep 10 ; nop ; .endr"
+ : "=r"(r4), "+r"(r5), "+r"(r6)
+ : "r"(&val)
+ : "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ pmc5_2 = mfspr(SPR_PMC5);
+ assert(r4 == 0xdeadbeef);
+ assert(val == 0xdeadbeef);
+
+ /* TCG does not count instructions around syscalls correctly */
+ report_kfail(host_is_tcg, pmc5_1 != pmc5_2 + 1,
+ "PMC5 counts instructions with ldat");
+}
+
+static void test_pmc56(void)
+{
+ unsigned long tmp;
+
+ report_prefix_push("pmc56");
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_PMC6, 0);
+ report(mfspr(SPR_PMC5) == 0, "PMC5 zeroed");
+ report(mfspr(SPR_PMC6) == 0, "PMC6 zeroed");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_FC);
+ msleep(100);
+ report(mfspr(SPR_PMC5) == 0, "PMC5 frozen");
+ report(mfspr(SPR_PMC6) == 0, "PMC6 frozen");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_FC56);
+ mdelay(100);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ report(mfspr(SPR_PMC5) != 0, "PMC5 counting");
+ report(mfspr(SPR_PMC6) != 0, "PMC6 counting");
+
+ /* Dynamic frequency scaling could cause to be out, so don't fail. */
+ tmp = mfspr(SPR_PMC6);
+ report(true, "PMC6 ratio to reported clock frequency is %ld%%",
+ tmp * 1000 / cpu_hz);
+
+ tmp = pmc5_count_nr_insns(100);
+ tmp = pmc5_count_nr_insns(1000) - tmp;
+ report(tmp == 900, "PMC5 counts instructions precisely %ld", tmp);
+
+ test_pmc5();
+ test_pmc5_with_branch();
+ test_pmc5_with_cond_branch();
+ test_pmc5_with_ill();
+ test_pmc5_with_fault();
+ test_pmc5_with_sc();
+ test_pmc5_with_rfid();
+ test_pmc5_with_ldat();
+
+ report_prefix_pop();
+}
+
+static void dec_ignore_handler(struct pt_regs *regs, void *data)
+{
+ mtspr(SPR_DEC, 0x7fffffff);
+}
+
+static void pmi_handler(struct pt_regs *regs, void *data)
+{
+ got_interrupt = true;
+ memcpy((void *)&recorded_regs, regs, sizeof(struct pt_regs));
+ recorded_mmcr0 = mfspr(SPR_MMCR0);
+ if (mfspr(SPR_MMCR0) & MMCR0_PMAO) {
+ /* This may cause infinite interrupts, so clear it. */
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAO);
+ }
+}
+
+static void test_pmi(void)
+{
+ report_prefix_push("pmi");
+ handle_exception(0x900, &dec_ignore_handler, NULL);
+ handle_exception(0xf00, &pmi_handler, NULL);
+ reset_mmcr0();
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAO);
+ mtmsr(mfmsr() | MSR_EE);
+ mtmsr(mfmsr() & ~MSR_EE);
+ report(got_interrupt, "PMAO caused interrupt");
+ got_interrupt = false;
+ handle_exception(0xf00, NULL, NULL);
+ handle_exception(0x900, NULL, NULL);
+ report_prefix_pop();
+}
+
+static void clrbhrb(void)
+{
+ asm volatile("clrbhrb" ::: "memory");
+}
+
+static inline unsigned long mfbhrbe(int nr)
+{
+ unsigned long e;
+
+ asm volatile("mfbhrbe %0,%1" : "=r"(e) : "i"(nr) : "memory");
+
+ return e;
+}
+
+extern unsigned char dummy_branch_1[];
+extern unsigned char dummy_branch_2[];
+
+static __attribute__((__noinline__)) void bhrb_dummy(int i)
+{
+ asm volatile(
+ " cmpdi %0,1 \n\t"
+ " beq 1f \n\t"
+ ".global dummy_branch_1 \n\t"
+ "dummy_branch_1: \n\t"
+ " b 2f \n\t"
+ "1: trap \n\t"
+ ".global dummy_branch_2 \n\t"
+ "dummy_branch_2: \n\t"
+ "2: bne 3f \n\t"
+ " trap \n\t"
+ "3: nop \n\t"
+ : : "r"(i));
+}
+
+#define NR_BHRBE 16
+static unsigned long bhrbe[NR_BHRBE];
+static int nr_bhrbe;
+
+static void run_and_load_bhrb(void)
+{
+ int i;
+
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+ clrbhrb();
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_BHRBA);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FCP | MMCR0_FCPC));
+ mtspr(SPR_MMCRA, mfspr(SPR_MMCRA) & ~(MMCRA_BHRBRD | MMCRA_IFM_MASK));
+
+ if (cpu_has_p10_bhrb) {
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAE);
+ asm volatile("isync" ::: "memory");
+ enter_usermode();
+ bhrb_dummy(0);
+ exit_usermode();
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+ asm volatile("isync" ::: "memory");
+ } else {
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAE);
+ asm volatile("isync" ::: "memory");
+ mtmsr(mfmsr());
+ asm volatile(".rept 100 ; nop ; .endr");
+ bhrb_dummy(0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+ asm volatile("isync" ::: "memory");
+ }
+
+ bhrbe[0] = mfbhrbe(0);
+ bhrbe[1] = mfbhrbe(1);
+ bhrbe[2] = mfbhrbe(2);
+ bhrbe[3] = mfbhrbe(3);
+ bhrbe[4] = mfbhrbe(4);
+ bhrbe[5] = mfbhrbe(5);
+ bhrbe[6] = mfbhrbe(6);
+ bhrbe[7] = mfbhrbe(7);
+ bhrbe[8] = mfbhrbe(8);
+ bhrbe[9] = mfbhrbe(9);
+ bhrbe[10] = mfbhrbe(10);
+ bhrbe[11] = mfbhrbe(11);
+ bhrbe[12] = mfbhrbe(12);
+ bhrbe[13] = mfbhrbe(13);
+ bhrbe[14] = mfbhrbe(14);
+ bhrbe[15] = mfbhrbe(15);
+
+ for (i = 0; i < NR_BHRBE; i++) {
+ bhrbe[i] &= ~0x1UL; /* remove prediction bit */
+ if (!bhrbe[i])
+ break;
+ }
+ nr_bhrbe = i;
+}
+
+static void test_bhrb(void)
+{
+ int i;
+
+ if (cpu_has_p10_bhrb && !vm_available())
+ return;
+
+ report_prefix_push("bhrb");
+
+ /* TCG doesn't impelment BHRB yet */
+ handle_exception(0x700, &illegal_handler, NULL);
+ handle_exception(0xe40, &illegal_handler, NULL);
+ clrbhrb();
+ handle_exception(0x700, NULL, NULL);
+ handle_exception(0xe40, NULL, NULL);
+ if (got_interrupt) {
+ got_interrupt = false;
+ report_skip("BHRB support missing");
+ report_prefix_pop();
+ return;
+ }
+
+ if (vm_available()) {
+ handle_exception(0x900, &dec_ignore_handler, NULL);
+ setup_vm();
+ }
+ reset_mmcr0();
+ clrbhrb();
+ if (cpu_has_p10_bhrb) {
+ enter_usermode();
+ bhrb_dummy(0);
+ exit_usermode();
+ } else {
+ bhrb_dummy(0);
+ }
+ report(mfbhrbe(0) == 0, "BHRB is frozen");
+
+ /*
+ * BHRB may be cleared at any time (e.g., by OS or hypervisor)
+ * so this test could be occasionally incorrect. Try several
+ * times before giving up...
+ */
+
+ if (cpu_has_p10_bhrb) {
+ /*
+ * BHRB should have 8 entries:
+ * 1. enter_usermode blr
+ * 2. enter_usermode blr target
+ * 3. bl dummy
+ * 4. dummy unconditional
+ * 5. dummy conditional
+ * 6. dummy blr
+ * 7. dummy blr target
+ * 8. exit_usermode bl
+ *
+ * POWER10 often gives 4 entries, if other threads are
+ * running on the core, it seems to struggle.
+ */
+ for (i = 0; i < 200; i++) {
+ run_and_load_bhrb();
+ if (nr_bhrbe == 8)
+ break;
+ if (i > 100 && nr_bhrbe == 4)
+ break;
+ }
+ report(nr_bhrbe, "BHRB has been written");
+ report_kfail(!host_is_tcg, nr_bhrbe == 8,
+ "BHRB has written 8 entries");
+ if (nr_bhrbe == 8) {
+ report(bhrbe[4] == (unsigned long)dummy_branch_1,
+ "correct unconditional branch address");
+ report(bhrbe[3] == (unsigned long)dummy_branch_2,
+ "correct conditional branch address");
+ } else if (nr_bhrbe == 4) {
+ /* POWER10 workaround */
+ report(nr_bhrbe == 4, "BHRB has written 4 entries");
+ report(bhrbe[3] == (unsigned long)dummy_branch_2,
+ "correct conditional branch address");
+ }
+ } else {
+ /*
+ * BHRB should have 6 entries:
+ * 1. bl dummy
+ * 2. dummy unconditional
+ * 3. dummy conditional
+ * 4. dummy blr
+ * 5. dummy blr target
+ * 6. Final b loop before disabled.
+ *
+ * POWER9 often gives 4 entries, if other threads are
+ * running on the core, it seems to struggle.
+ */
+ for (i = 0; i < 200; i++) {
+ run_and_load_bhrb();
+ if (nr_bhrbe == 6)
+ break;
+ if (i > 100 && nr_bhrbe == 4)
+ break;
+ }
+ report(nr_bhrbe, "BHRB has been written");
+ report_kfail(!host_is_tcg, nr_bhrbe == 6,
+ "BHRB has written 6 entries");
+ if (nr_bhrbe == 6) {
+ report(bhrbe[4] == (unsigned long)dummy_branch_1,
+ "correct unconditional branch address");
+ report(bhrbe[3] == (unsigned long)dummy_branch_2,
+ "correct conditional branch address");
+ } else if (nr_bhrbe == 4) {
+ /* POWER9 workaround */
+ report(nr_bhrbe == 4, "BHRB has written 4 entries");
+ report(bhrbe[3] == (unsigned long)dummy_branch_2,
+ "correct conditional branch address");
+ }
+ }
+
+ handle_exception(0x900, NULL, NULL);
+
+ report_prefix_pop();
+}
+
+int main(int argc, char **argv)
+{
+ report_prefix_push("pmu");
+
+ test_pmc56();
+ test_pmi();
+ if (cpu_has_bhrb)
+ test_bhrb();
+
+ report_prefix_pop();
+
+ return report_summary();
+}
diff --git a/powerpc/unittests.cfg b/powerpc/unittests.cfg
index 2dd32edf..60c73086 100644
--- a/powerpc/unittests.cfg
+++ b/powerpc/unittests.cfg
@@ -75,6 +75,9 @@ file = interrupts.elf
file = mmu.elf
smp = 2
+[pmu]
+file = pmu.elf
+
[smp]
file = smp.elf
smp = 2
--
2.53.0
^ permalink raw reply related
* [kvm-unit-tests RFC PATCH 5/6] scripts/arch-run.bash: Fix run_panic() success exit status
From: Chinmay Rath @ 2026-06-02 6:48 UTC (permalink / raw)
To: thuth
Cc: npiggin, harshpb, lvivier, linuxppc-dev, kvm, andrew.jones, sbhat,
Chinmay Rath
In-Reply-To: <20260602064806.3101025-1-rathc@linux.ibm.com>
From: Nicholas Piggin <npiggin@gmail.com>
run_qemu_status() looks for "EXIT: STATUS=%d" if the harness command
returned 1, to determine the final status of the test. In the case of
panic tests, QEMU should terminate before successful exit status is
known, so the run_panic() command must produce the "EXIT: STATUS" line.
With this change, running a panic test returns 0 on success (panic),
and the run_test.sh unit test correctly displays it as PASS rather than
FAIL.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Andrew Jones <andrew.jones@linux.dev>
Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
scripts/arch-run.bash | 1 +
1 file changed, 1 insertion(+)
diff --git a/scripts/arch-run.bash b/scripts/arch-run.bash
index 01cc1ff2..41a903e4 100644
--- a/scripts/arch-run.bash
+++ b/scripts/arch-run.bash
@@ -313,6 +313,7 @@ run_panic ()
else
# some QEMU versions report multiple panic events
echo "PASS: guest panicked"
+ echo "EXIT: STATUS=1"
ret=1
fi
--
2.53.0
^ permalink raw reply related
* [kvm-unit-tests RFC PATCH 3/6] powerpc: Remove remnants of ppc64 directory and build structure
From: Chinmay Rath @ 2026-06-02 6:48 UTC (permalink / raw)
To: thuth
Cc: npiggin, harshpb, lvivier, linuxppc-dev, kvm, andrew.jones, sbhat,
Chinmay Rath
In-Reply-To: <20260602064806.3101025-1-rathc@linux.ibm.com>
From: Nicholas Piggin <npiggin@gmail.com>
This moves merges ppc64 directories and files into powerpc, and
merges the 3 makefiles into one.
The configure --arch=powerpc option is aliased to ppc64 for
good measure.
Acked-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
MAINTAINERS | 1 -
configure | 3 +-
lib/{ppc64 => powerpc}/asm-offsets.c | 0
lib/{ppc64 => powerpc}/asm/asm-offsets.h | 0
lib/{ppc64 => powerpc}/asm/atomic.h | 0
lib/{ppc64 => powerpc}/asm/barrier.h | 4 +-
lib/{ppc64 => powerpc}/asm/bitops.h | 4 +-
lib/{ppc64 => powerpc}/asm/io.h | 4 +-
lib/{ppc64 => powerpc}/asm/mmu.h | 0
lib/{ppc64 => powerpc}/asm/opal.h | 4 +-
lib/{ppc64 => powerpc}/asm/page.h | 6 +-
lib/{ppc64 => powerpc}/asm/pgtable-hwdef.h | 6 +-
lib/{ppc64 => powerpc}/asm/pgtable.h | 2 +-
lib/{ppc64 => powerpc}/asm/ptrace.h | 6 +-
lib/powerpc/asm/spinlock.h | 6 ++
lib/powerpc/asm/stack.h | 3 +
lib/{ppc64 => powerpc}/asm/vpa.h | 0
lib/{ppc64 => powerpc}/mmu.c | 0
lib/{ppc64 => powerpc}/opal-calls.S | 0
lib/{ppc64 => powerpc}/opal.c | 0
lib/{ppc64 => powerpc}/stack.c | 0
lib/ppc64/.gitignore | 1 -
lib/ppc64/asm/handlers.h | 1 -
lib/ppc64/asm/hcall.h | 1 -
lib/ppc64/asm/memory_areas.h | 6 --
lib/ppc64/asm/ppc_asm.h | 1 -
lib/ppc64/asm/processor.h | 1 -
lib/ppc64/asm/reg.h | 1 -
lib/ppc64/asm/rtas.h | 1 -
lib/ppc64/asm/setup.h | 1 -
lib/ppc64/asm/smp.h | 1 -
lib/ppc64/asm/spinlock.h | 6 --
lib/ppc64/asm/stack.h | 11 --
lib/ppc64/asm/time.h | 1 -
powerpc/Makefile | 111 ++++++++++++++++++++-
powerpc/Makefile.common | 95 ------------------
powerpc/Makefile.ppc64 | 31 ------
37 files changed, 139 insertions(+), 180 deletions(-)
rename lib/{ppc64 => powerpc}/asm-offsets.c (100%)
rename lib/{ppc64 => powerpc}/asm/asm-offsets.h (100%)
rename lib/{ppc64 => powerpc}/asm/atomic.h (100%)
rename lib/{ppc64 => powerpc}/asm/barrier.h (83%)
rename lib/{ppc64 => powerpc}/asm/bitops.h (69%)
rename lib/{ppc64 => powerpc}/asm/io.h (50%)
rename lib/{ppc64 => powerpc}/asm/mmu.h (100%)
rename lib/{ppc64 => powerpc}/asm/opal.h (90%)
rename lib/{ppc64 => powerpc}/asm/page.h (94%)
rename lib/{ppc64 => powerpc}/asm/pgtable-hwdef.h (93%)
rename lib/{ppc64 => powerpc}/asm/pgtable.h (99%)
rename lib/{ppc64 => powerpc}/asm/ptrace.h (89%)
create mode 100644 lib/powerpc/asm/spinlock.h
rename lib/{ppc64 => powerpc}/asm/vpa.h (100%)
rename lib/{ppc64 => powerpc}/mmu.c (100%)
rename lib/{ppc64 => powerpc}/opal-calls.S (100%)
rename lib/{ppc64 => powerpc}/opal.c (100%)
rename lib/{ppc64 => powerpc}/stack.c (100%)
delete mode 100644 lib/ppc64/.gitignore
delete mode 100644 lib/ppc64/asm/handlers.h
delete mode 100644 lib/ppc64/asm/hcall.h
delete mode 100644 lib/ppc64/asm/memory_areas.h
delete mode 100644 lib/ppc64/asm/ppc_asm.h
delete mode 100644 lib/ppc64/asm/processor.h
delete mode 100644 lib/ppc64/asm/reg.h
delete mode 100644 lib/ppc64/asm/rtas.h
delete mode 100644 lib/ppc64/asm/setup.h
delete mode 100644 lib/ppc64/asm/smp.h
delete mode 100644 lib/ppc64/asm/spinlock.h
delete mode 100644 lib/ppc64/asm/stack.h
delete mode 100644 lib/ppc64/asm/time.h
delete mode 100644 powerpc/Makefile.common
delete mode 100644 powerpc/Makefile.ppc64
diff --git a/MAINTAINERS b/MAINTAINERS
index b5562e99..00d7d90b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -92,7 +92,6 @@ S: Maintained
L: linuxppc-dev@lists.ozlabs.org
F: powerpc/
F: lib/powerpc/
-F: lib/ppc64/
RISCV
M: Andrew Jones <andrew.jones@linux.dev>
diff --git a/configure b/configure
index aeb5570c..974ef4b4 100755
--- a/configure
+++ b/configure
@@ -273,6 +273,7 @@ fi
arch_name=$arch
[ "$arch" = "aarch64" ] && arch="arm64"
+[ "$arch" = "powerpc" ] && arch="ppc64"
[ "$arch_name" = "arm64" ] && arch_name="aarch64"
if [ "$arch" = "riscv" ]; then
@@ -386,7 +387,7 @@ elif [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
fi
elif [ "$arch" = "ppc64" ]; then
testdir=powerpc
- arch_libdir=ppc64
+ arch_libdir=powerpc
firmware="$testdir/boot_rom.bin"
if [ "$endian" != "little" ] && [ "$endian" != "big" ]; then
echo "You must provide endianness (big or little)!"
diff --git a/lib/ppc64/asm-offsets.c b/lib/powerpc/asm-offsets.c
similarity index 100%
rename from lib/ppc64/asm-offsets.c
rename to lib/powerpc/asm-offsets.c
diff --git a/lib/ppc64/asm/asm-offsets.h b/lib/powerpc/asm/asm-offsets.h
similarity index 100%
rename from lib/ppc64/asm/asm-offsets.h
rename to lib/powerpc/asm/asm-offsets.h
diff --git a/lib/ppc64/asm/atomic.h b/lib/powerpc/asm/atomic.h
similarity index 100%
rename from lib/ppc64/asm/atomic.h
rename to lib/powerpc/asm/atomic.h
diff --git a/lib/ppc64/asm/barrier.h b/lib/powerpc/asm/barrier.h
similarity index 83%
rename from lib/ppc64/asm/barrier.h
rename to lib/powerpc/asm/barrier.h
index 475434b6..22349d69 100644
--- a/lib/ppc64/asm/barrier.h
+++ b/lib/powerpc/asm/barrier.h
@@ -1,5 +1,5 @@
-#ifndef _ASMPPC64_BARRIER_H_
-#define _ASMPPC64_BARRIER_H_
+#ifndef _ASMPOWERPC_BARRIER_H_
+#define _ASMPOWERPC_BARRIER_H_
#define cpu_relax() asm volatile("or 1,1,1 ; or 2,2,2" ::: "memory")
#define pause_short() asm volatile(".long 0x7c40003c" ::: "memory")
diff --git a/lib/ppc64/asm/bitops.h b/lib/powerpc/asm/bitops.h
similarity index 69%
rename from lib/ppc64/asm/bitops.h
rename to lib/powerpc/asm/bitops.h
index c93d64bb..dc1b8cd3 100644
--- a/lib/ppc64/asm/bitops.h
+++ b/lib/powerpc/asm/bitops.h
@@ -1,5 +1,5 @@
-#ifndef _ASMPPC64_BITOPS_H_
-#define _ASMPPC64_BITOPS_H_
+#ifndef _ASMPOWERPC_BITOPS_H_
+#define _ASMPOWERPC_BITOPS_H_
#ifndef _BITOPS_H_
#error only <bitops.h> can be included directly
diff --git a/lib/ppc64/asm/io.h b/lib/powerpc/asm/io.h
similarity index 50%
rename from lib/ppc64/asm/io.h
rename to lib/powerpc/asm/io.h
index 08d7297c..cfe099f0 100644
--- a/lib/ppc64/asm/io.h
+++ b/lib/powerpc/asm/io.h
@@ -1,5 +1,5 @@
-#ifndef _ASMPPC64_IO_H_
-#define _ASMPPC64_IO_H_
+#ifndef _ASMPOWERPC_IO_H_
+#define _ASMPOWERPC_IO_H_
#define __iomem
diff --git a/lib/ppc64/asm/mmu.h b/lib/powerpc/asm/mmu.h
similarity index 100%
rename from lib/ppc64/asm/mmu.h
rename to lib/powerpc/asm/mmu.h
diff --git a/lib/ppc64/asm/opal.h b/lib/powerpc/asm/opal.h
similarity index 90%
rename from lib/ppc64/asm/opal.h
rename to lib/powerpc/asm/opal.h
index 6c3e9ffe..44e62d80 100644
--- a/lib/ppc64/asm/opal.h
+++ b/lib/powerpc/asm/opal.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASMPPC64_OPAL_H_
-#define _ASMPPC64_OPAL_H_
+#ifndef _ASMPOWERPC_OPAL_H_
+#define _ASMPOWERPC_OPAL_H_
#include <stdint.h>
diff --git a/lib/ppc64/asm/page.h b/lib/powerpc/asm/page.h
similarity index 94%
rename from lib/ppc64/asm/page.h
rename to lib/powerpc/asm/page.h
index 4a7ac9ec..21886c32 100644
--- a/lib/ppc64/asm/page.h
+++ b/lib/powerpc/asm/page.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ASMPPC64_PAGE_H_
-#define _ASMPPC64_PAGE_H_
+#ifndef _ASMPOWERPC_PAGE_H_
+#define _ASMPOWERPC_PAGE_H_
/*
* Adapted from
* lib/arm64/asm/page.h and Linux kernel defines.
@@ -62,4 +62,4 @@ extern unsigned long __phys_to_virt(phys_addr_t addr);
extern void *__ioremap(phys_addr_t phys_addr, size_t size);
#endif /* !__ASSEMBLER__ */
-#endif /* _ASMPPC64_PAGE_H_ */
+#endif /* _ASMPOWERPC_PAGE_H_ */
diff --git a/lib/ppc64/asm/pgtable-hwdef.h b/lib/powerpc/asm/pgtable-hwdef.h
similarity index 93%
rename from lib/ppc64/asm/pgtable-hwdef.h
rename to lib/powerpc/asm/pgtable-hwdef.h
index 0f4b1068..3f8c6fe3 100644
--- a/lib/ppc64/asm/pgtable-hwdef.h
+++ b/lib/powerpc/asm/pgtable-hwdef.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ASMPPC64_PGTABLE_HWDEF_H_
-#define _ASMPPC64_PGTABLE_HWDEF_H_
+#ifndef _ASMPOWERPC_PGTABLE_HWDEF_H_
+#define _ASMPOWERPC_PGTABLE_HWDEF_H_
/*
* Copyright (C) 2024, IBM Inc, Nicholas Piggin <npiggin@gmail.com>
*
@@ -63,4 +63,4 @@
#define PHYS_MASK_SHIFT (48)
#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
-#endif /* _ASMPPC64_PGTABLE_HWDEF_H_ */
+#endif /* _ASMPOWERPC_PGTABLE_HWDEF_H_ */
diff --git a/lib/ppc64/asm/pgtable.h b/lib/powerpc/asm/pgtable.h
similarity index 99%
rename from lib/ppc64/asm/pgtable.h
rename to lib/powerpc/asm/pgtable.h
index a6ee0d4c..d4f2c826 100644
--- a/lib/ppc64/asm/pgtable.h
+++ b/lib/powerpc/asm/pgtable.h
@@ -122,4 +122,4 @@ static inline pte_t *pte_alloc(pmd_t *pmd, unsigned long addr)
return pte_offset(pmd, addr);
}
-#endif /* _ASMPPC64_PGTABLE_H_ */
+#endif /* _ASMPOWERPC_PGTABLE_H_ */
diff --git a/lib/ppc64/asm/ptrace.h b/lib/powerpc/asm/ptrace.h
similarity index 89%
rename from lib/ppc64/asm/ptrace.h
rename to lib/powerpc/asm/ptrace.h
index 133ad2f9..4d5fc59c 100644
--- a/lib/ppc64/asm/ptrace.h
+++ b/lib/powerpc/asm/ptrace.h
@@ -1,5 +1,5 @@
-#ifndef _ASMPPC64_PTRACE_H_
-#define _ASMPPC64_PTRACE_H_
+#ifndef _ASMPOWERPC_PTRACE_H_
+#define _ASMPOWERPC_PTRACE_H_
#define KERNEL_REDZONE_SIZE 288
#define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */
@@ -38,4 +38,4 @@ static inline void regs_advance_insn(struct pt_regs *regs)
#endif /* __ASSEMBLER__ */
-#endif /* _ASMPPC64_PTRACE_H_ */
+#endif /* _ASMPOWERPC_PTRACE_H_ */
diff --git a/lib/powerpc/asm/spinlock.h b/lib/powerpc/asm/spinlock.h
new file mode 100644
index 00000000..da259ff4
--- /dev/null
+++ b/lib/powerpc/asm/spinlock.h
@@ -0,0 +1,6 @@
+#ifndef _ASMPOWERPC_SPINLOCK_H_
+#define _ASMPOWERPC_SPINLOCK_H_
+
+#include <asm-generic/spinlock.h>
+
+#endif /* _ASMPOWERPC_SPINLOCK_H_ */
diff --git a/lib/powerpc/asm/stack.h b/lib/powerpc/asm/stack.h
index e1c46ee0..eea139a4 100644
--- a/lib/powerpc/asm/stack.h
+++ b/lib/powerpc/asm/stack.h
@@ -5,4 +5,7 @@
#error Do not directly include <asm/stack.h>. Just use <stack.h>.
#endif
+#define HAVE_ARCH_BACKTRACE
+#define HAVE_ARCH_BACKTRACE_FRAME
+
#endif
diff --git a/lib/ppc64/asm/vpa.h b/lib/powerpc/asm/vpa.h
similarity index 100%
rename from lib/ppc64/asm/vpa.h
rename to lib/powerpc/asm/vpa.h
diff --git a/lib/ppc64/mmu.c b/lib/powerpc/mmu.c
similarity index 100%
rename from lib/ppc64/mmu.c
rename to lib/powerpc/mmu.c
diff --git a/lib/ppc64/opal-calls.S b/lib/powerpc/opal-calls.S
similarity index 100%
rename from lib/ppc64/opal-calls.S
rename to lib/powerpc/opal-calls.S
diff --git a/lib/ppc64/opal.c b/lib/powerpc/opal.c
similarity index 100%
rename from lib/ppc64/opal.c
rename to lib/powerpc/opal.c
diff --git a/lib/ppc64/stack.c b/lib/powerpc/stack.c
similarity index 100%
rename from lib/ppc64/stack.c
rename to lib/powerpc/stack.c
diff --git a/lib/ppc64/.gitignore b/lib/ppc64/.gitignore
deleted file mode 100644
index 84872bf1..00000000
--- a/lib/ppc64/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-asm-offsets.[hs]
diff --git a/lib/ppc64/asm/handlers.h b/lib/ppc64/asm/handlers.h
deleted file mode 100644
index 92e6fb24..00000000
--- a/lib/ppc64/asm/handlers.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/handlers.h"
diff --git a/lib/ppc64/asm/hcall.h b/lib/ppc64/asm/hcall.h
deleted file mode 100644
index daabaca5..00000000
--- a/lib/ppc64/asm/hcall.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/hcall.h"
diff --git a/lib/ppc64/asm/memory_areas.h b/lib/ppc64/asm/memory_areas.h
deleted file mode 100644
index b9fd46b9..00000000
--- a/lib/ppc64/asm/memory_areas.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASMPPC64_MEMORY_AREAS_H_
-#define _ASMPPC64_MEMORY_AREAS_H_
-
-#include <asm-generic/memory_areas.h>
-
-#endif
diff --git a/lib/ppc64/asm/ppc_asm.h b/lib/ppc64/asm/ppc_asm.h
deleted file mode 100644
index e3929eee..00000000
--- a/lib/ppc64/asm/ppc_asm.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/ppc_asm.h"
diff --git a/lib/ppc64/asm/processor.h b/lib/ppc64/asm/processor.h
deleted file mode 100644
index 066a51a0..00000000
--- a/lib/ppc64/asm/processor.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/processor.h"
diff --git a/lib/ppc64/asm/reg.h b/lib/ppc64/asm/reg.h
deleted file mode 100644
index bc407b55..00000000
--- a/lib/ppc64/asm/reg.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/reg.h"
diff --git a/lib/ppc64/asm/rtas.h b/lib/ppc64/asm/rtas.h
deleted file mode 100644
index fe77f635..00000000
--- a/lib/ppc64/asm/rtas.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/rtas.h"
diff --git a/lib/ppc64/asm/setup.h b/lib/ppc64/asm/setup.h
deleted file mode 100644
index 20192985..00000000
--- a/lib/ppc64/asm/setup.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/setup.h"
diff --git a/lib/ppc64/asm/smp.h b/lib/ppc64/asm/smp.h
deleted file mode 100644
index 67ced756..00000000
--- a/lib/ppc64/asm/smp.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/smp.h"
diff --git a/lib/ppc64/asm/spinlock.h b/lib/ppc64/asm/spinlock.h
deleted file mode 100644
index f59eed19..00000000
--- a/lib/ppc64/asm/spinlock.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASMPPC64_SPINLOCK_H_
-#define _ASMPPC64_SPINLOCK_H_
-
-#include <asm-generic/spinlock.h>
-
-#endif /* _ASMPPC64_SPINLOCK_H_ */
diff --git a/lib/ppc64/asm/stack.h b/lib/ppc64/asm/stack.h
deleted file mode 100644
index 94fd1021..00000000
--- a/lib/ppc64/asm/stack.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASMPPC64_STACK_H_
-#define _ASMPPC64_STACK_H_
-
-#ifndef _STACK_H_
-#error Do not directly include <asm/stack.h>. Just use <stack.h>.
-#endif
-
-#define HAVE_ARCH_BACKTRACE
-#define HAVE_ARCH_BACKTRACE_FRAME
-
-#endif
diff --git a/lib/ppc64/asm/time.h b/lib/ppc64/asm/time.h
deleted file mode 100644
index 326d2887..00000000
--- a/lib/ppc64/asm/time.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../powerpc/asm/time.h"
diff --git a/powerpc/Makefile b/powerpc/Makefile
index 8a007ab5..41e752b0 100644
--- a/powerpc/Makefile
+++ b/powerpc/Makefile
@@ -1 +1,110 @@
-include $(SRCDIR)/$(TEST_DIR)/Makefile.$(ARCH)
+#
+# powerpc makefile
+#
+# Authors: Andrew Jones <drjones@redhat.com>
+#
+tests = \
+ $(TEST_DIR)/selftest.elf \
+ $(TEST_DIR)/selftest-migration.elf \
+ $(TEST_DIR)/memory-verify.elf \
+ $(TEST_DIR)/sieve.elf \
+ $(TEST_DIR)/spapr_vpa.elf \
+ $(TEST_DIR)/spapr_hcall.elf \
+ $(TEST_DIR)/rtas.elf \
+ $(TEST_DIR)/emulator.elf \
+ $(TEST_DIR)/atomics.elf \
+ $(TEST_DIR)/tm.elf \
+ $(TEST_DIR)/smp.elf \
+ $(TEST_DIR)/sprs.elf \
+ $(TEST_DIR)/timebase.elf \
+ $(TEST_DIR)/interrupts.elf \
+ $(TEST_DIR)/mmu.elf \
+ $(TEST_DIR)/pmu.elf
+
+all: directories $(TEST_DIR)/boot_rom.bin $(tests)
+
+cstart.o = $(TEST_DIR)/cstart64.o
+reloc.o = $(TEST_DIR)/reloc64.o
+
+OBJDIRS += lib/powerpc
+cflatobjs += lib/powerpc/stack.o
+cflatobjs += lib/powerpc/mmu.o
+cflatobjs += lib/powerpc/opal.o
+cflatobjs += lib/powerpc/opal-calls.o
+cflatobjs += lib/util.o
+cflatobjs += lib/getchar.o
+cflatobjs += lib/alloc_phys.o
+cflatobjs += lib/alloc.o
+cflatobjs += lib/alloc_page.o
+cflatobjs += lib/vmalloc.o
+cflatobjs += lib/devicetree.o
+cflatobjs += lib/migrate.o
+cflatobjs += lib/powerpc/io.o
+cflatobjs += lib/powerpc/hcall.o
+cflatobjs += lib/powerpc/setup.o
+cflatobjs += lib/powerpc/rtas.o
+cflatobjs += lib/powerpc/processor.o
+cflatobjs += lib/powerpc/handlers.o
+cflatobjs += lib/powerpc/smp.o
+
+##################################################################
+
+bits = 64
+
+ifeq ($(ENDIAN),little)
+ arch_CFLAGS = -mlittle-endian
+ arch_LDFLAGS = -EL
+else
+ arch_CFLAGS = -mbig-endian
+ arch_LDFLAGS = -EB
+endif
+
+mabi_no_altivec := $(call cc-option,-mabi=no-altivec,"")
+
+CFLAGS += -std=gnu99
+CFLAGS += -ffreestanding
+CFLAGS += -O2 -msoft-float -mno-altivec $(mabi_no_altivec)
+CFLAGS += -I $(SRCDIR)/lib -I $(SRCDIR)/lib/libfdt -I lib
+CFLAGS += -Wa,-mregnames
+
+# We want to keep intermediate files
+.PRECIOUS: %.o
+
+asm-offsets = lib/powerpc/asm-offsets.h
+include $(SRCDIR)/scripts/asm-offsets.mak
+
+%.aux.o: $(SRCDIR)/lib/auxinfo.c
+ $(CC) $(CFLAGS) -c -o $@ $< -DPROGNAME=\"$(@:.aux.o=.elf)\"
+
+FLATLIBS = $(libcflat) $(LIBFDT_archive)
+%.elf: CFLAGS += $(arch_CFLAGS)
+%.elf: LDFLAGS += $(arch_LDFLAGS) -pie -n
+%.elf: %.o $(FLATLIBS) $(SRCDIR)/powerpc/flat.lds $(cstart.o) $(reloc.o) %.aux.o
+ $(LD) $(LDFLAGS) -o $@ \
+ -T $(SRCDIR)/powerpc/flat.lds --build-id=none \
+ $(filter %.o, $^) $(FLATLIBS)
+ @chmod a-x $@
+ @echo -n Checking $@ for unsupported reloc types...
+ @if $(OBJDUMP) -R $@ | grep R_ | grep -v R_PPC64_RELATIVE; then \
+ false; \
+ else \
+ echo " looks good."; \
+ fi
+
+$(TEST_DIR)/boot_rom.bin: $(TEST_DIR)/boot_rom.elf
+ dd if=/dev/zero of=$@ bs=256 count=1
+ $(OBJCOPY) -O binary $^ $@.tmp
+ cat $@.tmp >> $@
+ $(RM) $@.tmp
+
+$(TEST_DIR)/boot_rom.elf: CFLAGS = -mbig-endian
+$(TEST_DIR)/boot_rom.elf: $(TEST_DIR)/boot_rom.o
+ $(LD) -EB -nostdlib -Ttext=0x100 --entry=start --build-id=none -o $@ $<
+ @chmod a-x $@
+
+arch_clean: asm_offsets_clean
+ $(RM) $(TEST_DIR)/*.{o,elf} $(TEST_DIR)/boot_rom.bin \
+ $(TEST_DIR)/.*.d lib/powerpc/.*.d
+
+generated-files = $(asm-offsets)
+$(tests:.elf=.o) $(cstart.o) $(cflatobjs): $(generated-files)
diff --git a/powerpc/Makefile.common b/powerpc/Makefile.common
deleted file mode 100644
index 3b357982..00000000
--- a/powerpc/Makefile.common
+++ /dev/null
@@ -1,95 +0,0 @@
-#
-# powerpc common makefile
-#
-# Authors: Andrew Jones <drjones@redhat.com>
-#
-
-tests-common = \
- $(TEST_DIR)/selftest.elf \
- $(TEST_DIR)/selftest-migration.elf \
- $(TEST_DIR)/memory-verify.elf \
- $(TEST_DIR)/sieve.elf \
- $(TEST_DIR)/spapr_hcall.elf \
- $(TEST_DIR)/rtas.elf \
- $(TEST_DIR)/emulator.elf \
- $(TEST_DIR)/atomics.elf \
- $(TEST_DIR)/tm.elf \
- $(TEST_DIR)/smp.elf \
- $(TEST_DIR)/sprs.elf \
- $(TEST_DIR)/timebase.elf \
- $(TEST_DIR)/interrupts.elf \
- $(TEST_DIR)/mmu.elf \
- $(TEST_DIR)/pmu.elf
-
-tests-all = $(tests-common) $(tests)
-all: directories $(TEST_DIR)/boot_rom.bin $(tests-all)
-
-##################################################################
-
-mabi_no_altivec := $(call cc-option,-mabi=no-altivec,"")
-
-CFLAGS += -std=gnu99
-CFLAGS += -ffreestanding
-CFLAGS += -O2 -msoft-float -mno-altivec $(mabi_no_altivec)
-CFLAGS += -I $(SRCDIR)/lib -I $(SRCDIR)/lib/libfdt -I lib
-CFLAGS += -Wa,-mregnames
-
-# We want to keep intermediate files
-.PRECIOUS: %.o
-
-asm-offsets = lib/$(ARCH)/asm-offsets.h
-include $(SRCDIR)/scripts/asm-offsets.mak
-
-cflatobjs += lib/util.o
-cflatobjs += lib/getchar.o
-cflatobjs += lib/alloc_phys.o
-cflatobjs += lib/alloc.o
-cflatobjs += lib/alloc_page.o
-cflatobjs += lib/vmalloc.o
-cflatobjs += lib/devicetree.o
-cflatobjs += lib/migrate.o
-cflatobjs += lib/powerpc/io.o
-cflatobjs += lib/powerpc/hcall.o
-cflatobjs += lib/powerpc/setup.o
-cflatobjs += lib/powerpc/rtas.o
-cflatobjs += lib/powerpc/processor.o
-cflatobjs += lib/powerpc/handlers.o
-cflatobjs += lib/powerpc/smp.o
-
-OBJDIRS += lib/powerpc
-
-%.aux.o: $(SRCDIR)/lib/auxinfo.c
- $(CC) $(CFLAGS) -c -o $@ $< -DPROGNAME=\"$(@:.aux.o=.elf)\"
-
-FLATLIBS = $(libcflat) $(LIBFDT_archive)
-%.elf: CFLAGS += $(arch_CFLAGS)
-%.elf: LDFLAGS += $(arch_LDFLAGS) -pie -n
-%.elf: %.o $(FLATLIBS) $(SRCDIR)/powerpc/flat.lds $(cstart.o) $(reloc.o) %.aux.o
- $(LD) $(LDFLAGS) -o $@ \
- -T $(SRCDIR)/powerpc/flat.lds --build-id=none \
- $(filter %.o, $^) $(FLATLIBS)
- @chmod a-x $@
- @echo -n Checking $@ for unsupported reloc types...
- @if $(OBJDUMP) -R $@ | grep R_ | grep -v R_PPC64_RELATIVE; then \
- false; \
- else \
- echo " looks good."; \
- fi
-
-$(TEST_DIR)/boot_rom.bin: $(TEST_DIR)/boot_rom.elf
- dd if=/dev/zero of=$@ bs=256 count=1
- $(OBJCOPY) -O binary $^ $@.tmp
- cat $@.tmp >> $@
- $(RM) $@.tmp
-
-$(TEST_DIR)/boot_rom.elf: CFLAGS = -mbig-endian
-$(TEST_DIR)/boot_rom.elf: $(TEST_DIR)/boot_rom.o
- $(LD) -EB -nostdlib -Ttext=0x100 --entry=start --build-id=none -o $@ $<
- @chmod a-x $@
-
-powerpc_clean: asm_offsets_clean
- $(RM) $(TEST_DIR)/*.{o,elf} $(TEST_DIR)/boot_rom.bin \
- $(TEST_DIR)/.*.d lib/powerpc/.*.d
-
-generated-files = $(asm-offsets)
-$(tests-all:.elf=.o) $(cstart.o) $(cflatobjs): $(generated-files)
diff --git a/powerpc/Makefile.ppc64 b/powerpc/Makefile.ppc64
deleted file mode 100644
index 2466471f..00000000
--- a/powerpc/Makefile.ppc64
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# ppc64 makefile
-#
-# Authors: Andrew Jones <drjones@redhat.com>
-#
-bits = 64
-
-ifeq ($(ENDIAN),little)
- arch_CFLAGS = -mlittle-endian
- arch_LDFLAGS = -EL
-else
- arch_CFLAGS = -mbig-endian
- arch_LDFLAGS = -EB
-endif
-
-cstart.o = $(TEST_DIR)/cstart64.o
-reloc.o = $(TEST_DIR)/reloc64.o
-
-OBJDIRS += lib/ppc64
-cflatobjs += lib/ppc64/stack.o
-cflatobjs += lib/ppc64/mmu.o
-cflatobjs += lib/ppc64/opal.o
-cflatobjs += lib/ppc64/opal-calls.o
-
-# ppc64 specific tests
-tests = $(TEST_DIR)/spapr_vpa.elf
-
-include $(SRCDIR)/$(TEST_DIR)/Makefile.common
-
-arch_clean: powerpc_clean
- $(RM) lib/ppc64/.*.d
--
2.53.0
^ permalink raw reply related
* [kvm-unit-tests RFC PATCH 4/6] powerpc: gitlab CI update
From: Chinmay Rath @ 2026-06-02 6:48 UTC (permalink / raw)
To: thuth
Cc: npiggin, harshpb, lvivier, linuxppc-dev, kvm, andrew.jones, sbhat,
Chinmay Rath
In-Reply-To: <20260602064806.3101025-1-rathc@linux.ibm.com>
From: Nicholas Piggin <npiggin@gmail.com>
Change to using a gitlab-ci test group instead of specifying all
tests in .gitlab-ci.yml, and adds a few additional tests (smp, atomics)
that are known to work in CI.
To control overhead, ppc64be is used to test 64k page size, ppc64le is
used to test 4k page size.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
.gitlab-ci.yml | 53 ++++++++-----------------------------------
powerpc/unittests.cfg | 36 ++++++++++++++++++++++-------
2 files changed, 37 insertions(+), 52 deletions(-)
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index daccdfef..8608219d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -187,32 +187,15 @@ build-arm:
build-ppc64be:
extends: .outoftree_template
script:
- - dnf install -y qemu-system-ppc gcc-powerpc64-linux-gnu nmap-ncat
+ - dnf install -y qemu-system-ppc gcc-powerpc64-linux-gnu
- mkdir build
- cd build
- - ../configure --arch=ppc64 --endian=big --cross-prefix=powerpc64-linux-gnu-
+ - ../configure --arch=ppc64 --endian=big --page-size=64k --cross-prefix=powerpc64-linux-gnu-
- make -j2
- - ACCEL=tcg ./run_tests.sh
- atomics
- emulator
- interrupts
- mmu
- rtas-get-time-of-day
- rtas-get-time-of-day-base
- rtas-set-time-of-day
- selftest-migration
- selftest-migration-skip
- selftest-setup
- sieve
- smp
- smp-smt
- spapr_hcall
- spapr_vpa
- sprs
- timebase
- timebase-icount
- | tee results.txt
- - if grep -q FAIL results.txt ; then exit 1 ; fi
+ - ACCEL=tcg MAX_SMP=8 ./run_tests.sh -g gitlab-ci
+ | tee results.txt
+ - grep -q PASS results.txt && ! grep -q FAIL results.txt
+
build-ppc64le:
extends: .intree_template
@@ -220,27 +203,9 @@ build-ppc64le:
- dnf install -y qemu-system-ppc gcc-powerpc64-linux-gnu nmap-ncat
- ./configure --arch=ppc64 --endian=little --cross-prefix=powerpc64-linux-gnu-
- make -j2
- - ACCEL=tcg ./run_tests.sh
- atomics
- emulator
- interrupts
- mmu
- rtas-get-time-of-day
- rtas-get-time-of-day-base
- rtas-set-time-of-day
- selftest-migration
- selftest-migration-skip
- selftest-setup
- sieve
- smp
- smp-smt
- spapr_hcall
- spapr_vpa
- sprs
- timebase
- timebase-icount
- | tee results.txt
- - if grep -q FAIL results.txt ; then exit 1 ; fi
+ - ACCEL=tcg MAX_SMP=8 ./run_tests.sh -g gitlab-ci
+ | tee results.txt
+ - grep -q PASS results.txt && ! grep -q FAIL results.txt
build-riscv32:
extends: .outoftree_template
diff --git a/powerpc/unittests.cfg b/powerpc/unittests.cfg
index 60c73086..168af206 100644
--- a/powerpc/unittests.cfg
+++ b/powerpc/unittests.cfg
@@ -17,17 +17,25 @@ file = selftest.elf
smp = 2
test_args = 'setup smp=2 mem=1024'
qemu_params = -m 1g
-groups = selftest
+groups = selftest gitlab-ci
[selftest-migration]
file = selftest-migration.elf
machine = pseries
groups = selftest migration
+# make a kvm-only version for CI for now
+# TCG accel support with Qemu and distro release used in gitlab CI needs to checked.
+[selftest-migration-ci]
+file = selftest-migration.elf
+machine = pseries
+groups = nodefault selftest migration gitlab-ci
+accel = kvm
+
[selftest-migration-skip]
file = selftest-migration.elf
machine = pseries
-groups = selftest migration
+groups = selftest migration gitlab-ci
test_args = "skip"
[migration-memory]
@@ -38,17 +46,19 @@ groups = migration
[spapr_hcall]
file = spapr_hcall.elf
machine = pseries
+groups = gitlab-ci
[spapr_vpa]
file = spapr_vpa.elf
machine = pseries
+groups = gitlab-ci
[rtas-get-time-of-day]
file = rtas.elf
machine = pseries
timeout = 5
test_args = "get-time-of-day date=$(date +%s)"
-groups = rtas
+groups = rtas gitlab-ci
[rtas-get-time-of-day-base]
file = rtas.elf
@@ -56,35 +66,41 @@ machine = pseries
timeout = 5
test_args = "get-time-of-day date=$(date --date="2006-06-17 UTC" +%s)"
qemu_params = -rtc base="2006-06-17"
-groups = rtas
+groups = rtas gitlab-ci
[rtas-set-time-of-day]
file = rtas.elf
machine = pseries
test_args = "set-time-of-day"
timeout = 5
-groups = rtas
+groups = rtas gitlab-ci
[emulator]
file = emulator.elf
+groups = gitlab-ci
[interrupts]
file = interrupts.elf
+groups = gitlab-ci
[mmu]
file = mmu.elf
smp = 2
+groups = gitlab-ci
[pmu]
file = pmu.elf
+groups = gitlab-ci
[smp]
file = smp.elf
smp = 2
+groups = gitlab-ci
[smp-smt]
file = smp.elf
smp = 8,threads=4
+groups = gitlab-ci
# mttcg is the default most places, so add a thread=single test
[smp-thread-single]
@@ -94,21 +110,23 @@ accel = tcg,thread=single
[atomics]
file = atomics.elf
-smp = 2
+groups = gitlab-ci
[atomics-migration]
file = atomics.elf
machine = pseries
test_args = "migration -m"
-groups = migration
+groups = migration gitlab-ci
[timebase]
file = timebase.elf
+groups = gitlab-ci
[timebase-icount]
file = timebase.elf
accel = tcg
qemu_params = -icount shift=5
+groups = gitlab-ci
[h_cede_tm]
file = tm.elf
@@ -121,12 +139,14 @@ groups = h_cede_tm
[sprs]
file = sprs.elf
+groups = gitlab-ci
[sprs-migration]
file = sprs.elf
machine = pseries
test_args = '-w'
-groups = migration
+groups = migration gitlab-ci
[sieve]
file = sieve.elf
+groups = gitlab-ci
--
2.53.0
^ permalink raw reply related
* [kvm-unit-tests RFC PATCH 0/6] powerpc improvements
From: Chinmay Rath @ 2026-06-02 6:48 UTC (permalink / raw)
To: thuth
Cc: npiggin, harshpb, lvivier, linuxppc-dev, kvm, andrew.jones, sbhat,
Chinmay Rath
This series aims to add a couple of new powerpc tests and improve the
powerpc build structure.
This is originally Nick's work. The last version (v10) of which can be seen here :
https://lore.kernel.org/linuxppc-dev/20240612052322.218726-1-npiggin@gmail.com/
Since it has been 2 years since this series was posted, I am now
posting it as a RFC. Couple of patches from the series were already merged.
Posting the rest.
I have rebased the patches to the upstream state.
For the patches which did not require any changes in the existing upstream code,
I have retained the "Reviewed-by"s and "Acked-by" (patch 3,5 & 6), and have
removed the same for the ones which required changes in the existing upstream
code during rebase (patch 2). Nick had originally introduced powernv tests as well.
Removed it for now since it was causing CI failure in the current upstream.
Link to Gitlab tree with patches :
https://gitlab.com/rathc/kvm-unit-tests/-/tree/ppc64
Link to Gitlab pipeline :
https://gitlab.com/rathc/kvm-unit-tests/-/pipelines/2569132740
Please note that the build-centos8 test is failing due to a recent
upstream change unrelated to this patch series.
Nicholas Piggin (6):
powerpc: add pmu tests
configure: Make arch_libdir a first-class entity
powerpc: Remove remnants of ppc64 directory and build structure
powerpc: gitlab CI update
scripts/arch-run.bash: Fix run_panic() success exit status
powerpc: Add a panic test
.gitlab-ci.yml | 53 +-
MAINTAINERS | 1 -
Makefile | 2 +-
configure | 21 +-
lib/{ppc64 => powerpc}/asm-offsets.c | 0
lib/{ppc64 => powerpc}/asm/asm-offsets.h | 0
lib/{ppc64 => powerpc}/asm/atomic.h | 0
lib/{ppc64 => powerpc}/asm/barrier.h | 4 +-
lib/{ppc64 => powerpc}/asm/bitops.h | 4 +-
lib/{ppc64 => powerpc}/asm/io.h | 4 +-
lib/{ppc64 => powerpc}/asm/mmu.h | 0
lib/{ppc64 => powerpc}/asm/opal.h | 4 +-
lib/{ppc64 => powerpc}/asm/page.h | 6 +-
lib/{ppc64 => powerpc}/asm/pgtable-hwdef.h | 6 +-
lib/{ppc64 => powerpc}/asm/pgtable.h | 2 +-
lib/powerpc/asm/processor.h | 2 +
lib/{ppc64 => powerpc}/asm/ptrace.h | 6 +-
lib/powerpc/asm/reg.h | 9 +
lib/powerpc/asm/rtas.h | 1 +
lib/powerpc/asm/setup.h | 1 +
lib/powerpc/asm/spinlock.h | 6 +
lib/powerpc/asm/stack.h | 3 +
lib/{ppc64 => powerpc}/asm/vpa.h | 0
lib/{ppc64 => powerpc}/mmu.c | 0
lib/{ppc64 => powerpc}/opal-calls.S | 0
lib/{ppc64 => powerpc}/opal.c | 0
lib/powerpc/rtas.c | 16 +
lib/powerpc/setup.c | 20 +
lib/{ppc64 => powerpc}/stack.c | 0
lib/ppc64/.gitignore | 1 -
lib/ppc64/asm/handlers.h | 1 -
lib/ppc64/asm/hcall.h | 1 -
lib/ppc64/asm/memory_areas.h | 6 -
lib/ppc64/asm/ppc_asm.h | 1 -
lib/ppc64/asm/processor.h | 1 -
lib/ppc64/asm/reg.h | 1 -
lib/ppc64/asm/rtas.h | 1 -
lib/ppc64/asm/setup.h | 1 -
lib/ppc64/asm/smp.h | 1 -
lib/ppc64/asm/spinlock.h | 6 -
lib/ppc64/asm/stack.h | 11 -
lib/ppc64/asm/time.h | 1 -
powerpc/Makefile | 111 +++-
powerpc/Makefile.common | 94 ----
powerpc/Makefile.ppc64 | 31 --
powerpc/pmu.c | 567 +++++++++++++++++++++
powerpc/run | 2 +-
powerpc/selftest.c | 17 +-
powerpc/unittests.cfg | 44 +-
scripts/arch-run.bash | 1 +
50 files changed, 830 insertions(+), 241 deletions(-)
rename lib/{ppc64 => powerpc}/asm-offsets.c (100%)
rename lib/{ppc64 => powerpc}/asm/asm-offsets.h (100%)
rename lib/{ppc64 => powerpc}/asm/atomic.h (100%)
rename lib/{ppc64 => powerpc}/asm/barrier.h (83%)
rename lib/{ppc64 => powerpc}/asm/bitops.h (69%)
rename lib/{ppc64 => powerpc}/asm/io.h (50%)
rename lib/{ppc64 => powerpc}/asm/mmu.h (100%)
rename lib/{ppc64 => powerpc}/asm/opal.h (90%)
rename lib/{ppc64 => powerpc}/asm/page.h (94%)
rename lib/{ppc64 => powerpc}/asm/pgtable-hwdef.h (93%)
rename lib/{ppc64 => powerpc}/asm/pgtable.h (99%)
rename lib/{ppc64 => powerpc}/asm/ptrace.h (89%)
create mode 100644 lib/powerpc/asm/spinlock.h
rename lib/{ppc64 => powerpc}/asm/vpa.h (100%)
rename lib/{ppc64 => powerpc}/mmu.c (100%)
rename lib/{ppc64 => powerpc}/opal-calls.S (100%)
rename lib/{ppc64 => powerpc}/opal.c (100%)
rename lib/{ppc64 => powerpc}/stack.c (100%)
delete mode 100644 lib/ppc64/.gitignore
delete mode 100644 lib/ppc64/asm/handlers.h
delete mode 100644 lib/ppc64/asm/hcall.h
delete mode 100644 lib/ppc64/asm/memory_areas.h
delete mode 100644 lib/ppc64/asm/ppc_asm.h
delete mode 100644 lib/ppc64/asm/processor.h
delete mode 100644 lib/ppc64/asm/reg.h
delete mode 100644 lib/ppc64/asm/rtas.h
delete mode 100644 lib/ppc64/asm/setup.h
delete mode 100644 lib/ppc64/asm/smp.h
delete mode 100644 lib/ppc64/asm/spinlock.h
delete mode 100644 lib/ppc64/asm/stack.h
delete mode 100644 lib/ppc64/asm/time.h
delete mode 100644 powerpc/Makefile.common
delete mode 100644 powerpc/Makefile.ppc64
create mode 100644 powerpc/pmu.c
--
2.53.0
^ permalink raw reply
* [kvm-unit-tests RFC PATCH 6/6] powerpc: Add a panic test
From: Chinmay Rath @ 2026-06-02 6:48 UTC (permalink / raw)
To: thuth
Cc: npiggin, harshpb, lvivier, linuxppc-dev, kvm, andrew.jones, sbhat,
Chinmay Rath
In-Reply-To: <20260602064806.3101025-1-rathc@linux.ibm.com>
From: Nicholas Piggin <npiggin@gmail.com>
This adds a simple panic test for pseries that works with
TCG (unlike the s390x panic tests), making it easier to test
this part of the harness code.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
lib/powerpc/asm/rtas.h | 1 +
lib/powerpc/rtas.c | 16 ++++++++++++++++
powerpc/run | 2 +-
powerpc/selftest.c | 17 ++++++++++++++++-
powerpc/unittests.cfg | 5 +++++
5 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/lib/powerpc/asm/rtas.h b/lib/powerpc/asm/rtas.h
index 989b21bd..fdb3c544 100644
--- a/lib/powerpc/asm/rtas.h
+++ b/lib/powerpc/asm/rtas.h
@@ -26,6 +26,7 @@ extern int rtas_call(int token, int nargs, int nret, int *outputs, ...);
extern int rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, int *outputs, ...);
extern void rtas_power_off(void);
+extern void rtas_os_panic(void);
extern void rtas_stop_self(void);
#endif /* __ASSEMBLER__ */
diff --git a/lib/powerpc/rtas.c b/lib/powerpc/rtas.c
index 9c1e0aff..98eee24f 100644
--- a/lib/powerpc/rtas.c
+++ b/lib/powerpc/rtas.c
@@ -182,3 +182,19 @@ void rtas_power_off(void)
ret = rtas_call_unlocked(&args, token, 2, 1, NULL, -1, -1);
printf("RTAS power-off returned %d\n", ret);
}
+
+void rtas_os_panic(void)
+{
+ struct rtas_args args;
+ uint32_t token;
+ int ret;
+
+ ret = rtas_token("ibm,os-term", &token);
+ if (ret) {
+ puts("RTAS ibm,os-term not available\n");
+ return;
+ }
+
+ ret = rtas_call_unlocked(&args, token, 1, 1, NULL, "rtas_os_panic");
+ printf("RTAS ibm,os-term returned %d\n", ret);
+}
diff --git a/powerpc/run b/powerpc/run
index 06657764..718f08cb 100755
--- a/powerpc/run
+++ b/powerpc/run
@@ -57,7 +57,7 @@ fi
command="$qemu -nodefaults $A $M $B $D"
command+=" -display none -serial stdio -kernel"
-command="$(migration_cmd) $(timeout_cmd) $command"
+command="$(panic_cmd) $(migration_cmd) $(timeout_cmd) $command"
# powerpc tests currently exit with rtas-poweroff, which exits with 0.
# run_test treats that as a failure exit and returns 1, so we need
diff --git a/powerpc/selftest.c b/powerpc/selftest.c
index 8d1a2c76..f6f24d6a 100644
--- a/powerpc/selftest.c
+++ b/powerpc/selftest.c
@@ -7,6 +7,7 @@
*/
#include <libcflat.h>
#include <util.h>
+#include <asm/rtas.h>
#include <asm/setup.h>
#include <asm/smp.h>
@@ -47,6 +48,17 @@ static void check_setup(int argc, char **argv)
report_abort("missing input");
}
+static void do_panic(void)
+{
+ if (machine_is_pseries()) {
+ rtas_os_panic();
+ } else {
+ /* Cause a checkstop with MSR[ME] disabled */
+ *((char *)0x10000000000) = 0;
+ }
+ report_fail("survived panic");
+}
+
int main(int argc, char **argv)
{
report_prefix_push("selftest");
@@ -60,7 +72,10 @@ int main(int argc, char **argv)
check_setup(argc-2, &argv[2]);
+ } else if (strcmp(argv[1], "panic") == 0) {
+ do_panic();
+ } else {
+ report_abort("unknown test %s", argv[1]);
}
-
return report_summary();
}
diff --git a/powerpc/unittests.cfg b/powerpc/unittests.cfg
index 168af206..d1395464 100644
--- a/powerpc/unittests.cfg
+++ b/powerpc/unittests.cfg
@@ -19,6 +19,11 @@ test_args = 'setup smp=2 mem=1024'
qemu_params = -m 1g
groups = selftest gitlab-ci
+[selftest-panic]
+file = selftest.elf
+extra_params = -append 'panic'
+groups = selftest panic gitlab-ci
+
[selftest-migration]
file = selftest-migration.elf
machine = pseries
--
2.53.0
^ permalink raw reply related
* [PATCH v3 00/19] mm: Refactor bootmem gigantic hugepage allocation
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
This series is split out from the earlier larger series "mm: Generalize
HVO for HugeTLB and device DAX" [1]. It collects the first 19 patches of
that series as a standalone set of fixes and preparatory cleanups around
bootmem HugeTLB handling, sparse initialization ordering, and related
vmemmap setup.
The first patches fix a few bugs found while reviewing the existing
code, including incorrect bootmem HVO handling, wrong vmemmap
registration arguments, a powerpc compound-vmemmap tracking bug, and
too-late initialization of gigantic bootmem HugeTLB struct pages.
The rest of the series reorders early memory initialization so the
relevant zone state is available before sparse and HugeTLB boot-time
setup runs, then simplifies the remaining bootmem gigantic hugepage
allocation path and removes code made obsolete by that rework.
At a high level:
- patches [1-4] fix boot-time and arch-specific bugs
- patches [5-12] reorder and simplify sparse/mm/hugetlb early init
- patches [13-19] refactor bootmem gigantic hugepage allocation and
remove obsolete helpers and state
Changes since v2:
- patch 1: add a comment explaining why shared tail pages must be
initialized from gather_bootmem_prealloc() before
hugetlb_vmemmap_init() runs
- patch 1: update the stale sparse-vmemmap comment to point to
gather_bootmem_prealloc() as the shared-tail initialization site
- patch 2: collect Acked-by from Oscar Salvador
- patch 19: fold __init_page_from_nid() into __init_deferred_page()
instead of only making it static
[1] https://lore.kernel.org/linux-mm/20260513130542.35604-1-songmuchun@bytedance.com/
Muchun Song (19):
mm/hugetlb: Fix boot panic with CONFIG_DEBUG_VM and HVO bootmem pages
mm/hugetlb_vmemmap: Fix __hugetlb_vmemmap_optimize_folios()
powerpc/mm: Fix wrong addr_pfn tracking in compound vmemmap population
mm/hugetlb: Initialize gigantic bootmem hugepage struct pages earlier
mm/mm_init: Simplify deferred_free_pages() migratetype init
mm/sparse: Panic on memmap and usemap allocation failure
mm/sparse: Move subsection_map_init() into sparse_init()
mm/mm_init: Defer sparse_init() until after zone initialization
mm/mm_init: Defer hugetlb reservation until after zone initialization
mm/mm_init: Remove set_pageblock_order() call from sparse_init()
mm/sparse: Move sparse_vmemmap_init_nid_late() into sparse_init_nid()
mm/hugetlb_cma: Validate hugetlb CMA range by zone at reserve time
mm/hugetlb: Refactor early boot gigantic hugepage allocation
mm/hugetlb: Free cross-zone bootmem gigantic pages after allocation
mm/hugetlb_vmemmap: Move bootmem HVO setup to early init
mm/hugetlb: Remove obsolete bootmem cross-zone checks
mm/sparse-vmemmap: Remove sparse_vmemmap_init_nid_late()
mm/hugetlb: Remove unused bootmem cma field
mm/mm_init: Fold __init_page_from_nid() into __init_deferred_page()
arch/powerpc/mm/book3s64/radix_pgtable.c | 7 +-
arch/powerpc/mm/hugetlbpage.c | 13 +-
include/linux/hugetlb.h | 24 +--
include/linux/mmzone.h | 7 -
mm/cma.c | 3 +-
mm/hugetlb.c | 259 +++++++++++------------
mm/hugetlb_cma.c | 44 ++--
mm/hugetlb_cma.h | 8 +-
mm/hugetlb_vmemmap.c | 94 ++------
mm/hugetlb_vmemmap.h | 5 -
mm/internal.h | 14 +-
mm/mm_init.c | 88 +++-----
mm/sparse-vmemmap.c | 26 ++-
mm/sparse.c | 48 +----
14 files changed, 241 insertions(+), 399 deletions(-)
base-commit: 08484c504b55a98bd100527fbe10a3caf55ff3ff
--
2.54.0
^ permalink raw reply
* [PATCH v3 01/19] mm/hugetlb: Fix boot panic with CONFIG_DEBUG_VM and HVO bootmem pages
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
Commit 622026e87c40 ("mm/hugetlb: remove fake head pages") switched
HVO to reuse per-zone shared tail pages from zone->vmemmap_tails[].
Those shared tail pages were initialized in hugetlb_vmemmap_init(), but
bootmem HugeTLB folios are prepared earlier from gather_bootmem_prealloc().
With hugetlb_free_vmemmap=on, prep_and_add_bootmem_folios() can access
pageblock flags on bootmem HugeTLB pages whose mirrored tail struct pages
already point to the shared tail page. On CONFIG_DEBUG_VM kernels,
get_pfnblock_bitmap_bitidx() then dereferences the still-uninitialized
shared tail page and can panic during boot.
Initialize zone->vmemmap_tails[] from gather_bootmem_prealloc(), before
bootmem HugeTLB folios are processed, and drop the later initialization
from hugetlb_vmemmap_init().
This bug only affects CONFIG_DEBUG_VM kernels, where the relevant
assertion is evaluated.
Fixes: 622026e87c40 ("mm/hugetlb: remove fake head pages")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
---
v2->v3:
- add a comment explaining why shared tail pages must be initialized from
gather_bootmem_prealloc() before hugetlb_vmemmap_init() runs (per Oscar
Salvador)
- update the stale sparse-vmemmap comment to point to gather_bootmem_prealloc()
as the bootmem HugeTLB shared-tail initialization site (reported by Oscar
Salvador)
---
mm/hugetlb.c | 25 +++++++++++++++++++++++++
mm/hugetlb_vmemmap.c | 17 -----------------
mm/sparse-vmemmap.c | 2 +-
3 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 571212b80835..cd55524c7e30 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3365,6 +3365,31 @@ static void __init gather_bootmem_prealloc(void)
.max_threads = num_node_state(N_MEMORY),
.numa_aware = true,
};
+#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+ struct zone *zone;
+
+ for_each_zone(zone) {
+ for (int i = 0; i < NR_VMEMMAP_TAILS; i++) {
+ struct page *tail, *p;
+ unsigned int order;
+
+ tail = zone->vmemmap_tails[i];
+ if (!tail)
+ continue;
+
+ order = i + VMEMMAP_TAIL_MIN_ORDER;
+ p = page_to_virt(tail);
+ /*
+ * prep_and_add_bootmem_folios() can access pageblock
+ * flags on bootmem HugeTLB pages, so initialize the
+ * shared tail struct pages here before bootmem folios
+ * start using them.
+ */
+ for (int j = 0; j < PAGE_SIZE / sizeof(struct page); j++)
+ init_compound_tail(p + j, NULL, order, zone);
+ }
+ }
+#endif
padata_do_multithreaded(&job);
}
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 133b46dfb09f..c713c0d2593a 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -870,27 +870,10 @@ static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
static int __init hugetlb_vmemmap_init(void)
{
const struct hstate *h;
- struct zone *zone;
/* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES);
- for_each_zone(zone) {
- for (int i = 0; i < NR_VMEMMAP_TAILS; i++) {
- struct page *tail, *p;
- unsigned int order;
-
- tail = zone->vmemmap_tails[i];
- if (!tail)
- continue;
-
- order = i + VMEMMAP_TAIL_MIN_ORDER;
- p = page_to_virt(tail);
- for (int j = 0; j < PAGE_SIZE / sizeof(struct page); j++)
- init_compound_tail(p + j, NULL, order, zone);
- }
- }
-
for_each_hstate(h) {
if (hugetlb_vmemmap_optimizable(h)) {
register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 112ccf9c71ca..8f41b73fb674 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -342,7 +342,7 @@ static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *
*
* Any initialization done here will be overwritten by memmap_init().
*
- * hugetlb_vmemmap_init() will take care of initialization after
+ * gather_bootmem_prealloc() will take care of initialization after
* memmap_init().
*/
--
2.54.0
^ permalink raw reply related
* [PATCH v3 02/19] mm/hugetlb_vmemmap: Fix __hugetlb_vmemmap_optimize_folios()
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
__hugetlb_vmemmap_optimize_folios() uses incorrect arguments when handling
bootmem HugeTLB folios.
The section number passed to register_page_bootmem_memmap() is derived from
the vmemmap virtual address of folio->page instead of the folio PFN, so the
bootmem memmap metadata can be registered against the wrong section. The
helper is also given HUGETLB_VMEMMAP_RESERVE_SIZE even though it expects a
page count, not a size in bytes. In addition, the write-protect range is
based on pages_per_huge_page(h), which does not cover the full HugeTLB
vmemmap area and can leave part of the shared tail vmemmap mapping writable.
Fix the section lookup to use folio_pfn(folio), use
HUGETLB_VMEMMAP_RESERVE_PAGES when registering the reserved memmap pages, and
use hugetlb_vmemmap_size(h) for the write-protect range.
Fixes: 752fe17af693 ("mm/hugetlb: add pre-HVO framework")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
---
v2->v3:
- collect Acked-by from Oscar Salvador
---
mm/hugetlb_vmemmap.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index c713c0d2593a..ea6af85bfec1 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -635,12 +635,12 @@ static void __hugetlb_vmemmap_optimize_folios(struct hstate *h,
* mirrored tail page structs RO.
*/
spfn = (unsigned long)&folio->page;
- epfn = spfn + pages_per_huge_page(h);
+ epfn = spfn + hugetlb_vmemmap_size(h);
vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio),
HUGETLB_VMEMMAP_RESERVE_SIZE);
- register_page_bootmem_memmap(pfn_to_section_nr(spfn),
+ register_page_bootmem_memmap(pfn_to_section_nr(folio_pfn(folio)),
&folio->page,
- HUGETLB_VMEMMAP_RESERVE_SIZE);
+ HUGETLB_VMEMMAP_RESERVE_PAGES);
continue;
}
--
2.54.0
^ permalink raw reply related
* [PATCH v3 03/19] powerpc/mm: Fix wrong addr_pfn tracking in compound vmemmap population
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
vmemmap_populate_compound_pages() uses addr_pfn to determine the PFN
offset within a compound page and to decide whether the current
vmemmap slot should be populated as a head page mapping or should reuse
a tail page mapping.
However, addr_pfn is advanced manually in parallel with addr. The loop
itself progresses in vmemmap address space, so each PAGE_SIZE step in
addr covers PAGE_SIZE / sizeof(struct page) struct page slots. Since
addr_pfn is compared against nr_pages in data-PFN units, it should
advance by the same number of PFNs. The existing manual increments do
not match that and therefore do not reliably track the PFN
corresponding to the current addr.
As a result, pfn_offset can be computed from the wrong PFN and the code
can make the head/tail decision for the wrong compound-page position.
Fix this by deriving addr_pfn directly from the current vmemmap address
instead of carrying it as loop state.
Fixes: f2b79c0d7968 ("powerpc/book3s64/radix: add support for vmemmap optimization for radix")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
---
v2->v3:
- collect Acked-by from Oscar Salvador
---
arch/powerpc/mm/book3s64/radix_pgtable.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 10aced261cff..cf692b2b5f7b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1314,7 +1314,6 @@ int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
* covering out both edges.
*/
unsigned long addr;
- unsigned long addr_pfn = start_pfn;
unsigned long next;
pgd_t *pgd;
p4d_t *p4d;
@@ -1335,7 +1334,6 @@ int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
if (pmd_leaf(READ_ONCE(*pmd))) {
/* existing huge mapping. Skip the range */
- addr_pfn += (PMD_SIZE >> PAGE_SHIFT);
next = pmd_addr_end(addr, end);
continue;
}
@@ -1348,11 +1346,11 @@ int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
* page whose VMEMMAP_RESERVE_NR pages were mapped and
* this request fall in those pages.
*/
- addr_pfn += 1;
next = addr + PAGE_SIZE;
continue;
} else {
unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+ unsigned long addr_pfn = page_to_pfn((struct page *)addr);
unsigned long pfn_offset = addr_pfn - ALIGN_DOWN(addr_pfn, nr_pages);
pte_t *tail_page_pte;
@@ -1376,7 +1374,6 @@ int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
if (!pte)
return -ENOMEM;
- addr_pfn += 2;
next = addr + 2 * PAGE_SIZE;
continue;
}
@@ -1392,7 +1389,6 @@ int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
return -ENOMEM;
vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
- addr_pfn += 1;
next = addr + PAGE_SIZE;
continue;
}
@@ -1402,7 +1398,6 @@ int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
return -ENOMEM;
vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
- addr_pfn += 1;
next = addr + PAGE_SIZE;
continue;
}
--
2.54.0
^ permalink raw reply related
* [PATCH v3 04/19] mm/hugetlb: Initialize gigantic bootmem hugepage struct pages earlier
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
Gigantic bootmem HugeTLB pages are currently initialized from hugetlb_init(),
but page_alloc_init_late() runs earlier and walks pageblocks to determine
zone contiguity.
If a bootmem HugeTLB region is marked noinit, set_zone_contiguous() can
observe still-uninitialized struct pages through __pageblock_pfn_to_page().
This may not trigger an immediate failure, but it can make
set_zone_contiguous() compute the wrong zone contiguity state. If extra
poisoned-page checks are added in this path, such as PF_POISONED_CHECK()
in page_zone_id(), it can also trigger an early boot panic.
Initialize gigantic bootmem HugeTLB struct pages from page_alloc_init_late(),
before zone contiguity is evaluated, so later page allocator setup only
sees valid struct page state. This also makes the initialization order
more natural, as struct pages should be initialized before later code
inspects them.
Fixes: fde1c4ecf916 ("mm: hugetlb: skip initialization of gigantic tail struct pages if freed by HVO")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Oscar Salvador <osalvador@suse.de>
---
v2->v3:
- rename the helper to hugetlb_bootmem_struct_page_init() to make the
bootmem-only scope explicit (per Oscar Salvador)
---
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 5 ++---
mm/mm_init.c | 1 +
mm/sparse-vmemmap.c | 4 ++--
4 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2abaf99321e9..3700c0a1f6ff 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -171,6 +171,7 @@ extern int movable_gigantic_pages __read_mostly;
extern int sysctl_hugetlb_shm_group __read_mostly;
extern struct list_head huge_boot_pages[MAX_NUMNODES];
+void hugetlb_bootmem_struct_page_init(void);
void hugetlb_bootmem_alloc(void);
extern nodemask_t hugetlb_bootmem_nodes;
void hugetlb_bootmem_set_nodes(void);
@@ -1293,6 +1294,10 @@ static inline bool hugetlbfs_pagecache_present(
static inline void hugetlb_bootmem_alloc(void)
{
}
+
+static inline void hugetlb_bootmem_struct_page_init(void)
+{
+}
#endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index cd55524c7e30..2bf9fe16abb9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3353,7 +3353,7 @@ static void __init gather_bootmem_prealloc_parallel(unsigned long start,
gather_bootmem_prealloc_node(nid);
}
-static void __init gather_bootmem_prealloc(void)
+void __init hugetlb_bootmem_struct_page_init(void)
{
struct padata_mt_job job = {
.thread_fn = gather_bootmem_prealloc_parallel,
@@ -3582,7 +3582,7 @@ static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h)
* - For gigantic pages, this is called early in the boot process and
* pages are allocated from memblock allocated or something similar.
* Gigantic pages are actually added to pools later with the routine
- * gather_bootmem_prealloc.
+ * hugetlb_bootmem_struct_page_init.
* - For non-gigantic pages, this is called later in the boot process after
* all of mm is up and functional. Pages are allocated from buddy and
* then added to hugetlb pools.
@@ -4152,7 +4152,6 @@ static int __init hugetlb_init(void)
}
hugetlb_init_hstates();
- gather_bootmem_prealloc();
report_hugepages();
hugetlb_sysfs_init();
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 6de3a77eb9ae..1890bda948b8 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2338,6 +2338,7 @@ void __init page_alloc_init_late(void)
/* Reinit limits that are based on free pages after the kernel is up */
files_maxfiles_init();
#endif
+ hugetlb_bootmem_struct_page_init();
/* Accounting of total+free memory is stable at this point. */
mem_init_print_info();
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 8f41b73fb674..db9cfe57e827 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -342,8 +342,8 @@ static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *
*
* Any initialization done here will be overwritten by memmap_init().
*
- * gather_bootmem_prealloc() will take care of initialization after
- * memmap_init().
+ * hugetlb_bootmem_struct_page_init() will take care of initialization
+ * after memmap_init().
*/
p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
--
2.54.0
^ permalink raw reply related
* [PATCH v3 05/19] mm/mm_init: Simplify deferred_free_pages() migratetype init
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
deferred_free_pages() open-codes two loops to initialize the pageblock
migratetype for a range of pages.
Replace them with pageblock_migratetype_init_range() to remove the
duplication and make the code clearer (Note that deferred_free_pages()
may be called from atomic context).
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Oscar Salvador <osalvador@suse.de>
---
v2->v3:
- collect Acked-by from Oscar Salvador
---
mm/mm_init.c | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 1890bda948b8..be652b6990a2 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -674,15 +674,15 @@ static inline void fixup_hashdist(void)
static inline void fixup_hashdist(void) {}
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_ZONE_DEVICE
+#if defined(CONFIG_ZONE_DEVICE) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
static __meminit void pageblock_migratetype_init_range(unsigned long pfn,
- unsigned long nr_pages, int migratetype)
+ unsigned long nr_pages, int migratetype, bool atomic)
{
const unsigned long end = pfn + nr_pages;
for (pfn = pageblock_align(pfn); pfn < end; pfn += pageblock_nr_pages) {
init_pageblock_migratetype(pfn_to_page(pfn), migratetype, false);
- if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
+ if (!atomic && IS_ALIGNED(pfn, PAGES_PER_SECTION))
cond_resched();
}
}
@@ -1142,7 +1142,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
compound_nr_pages(pfn, altmap, pgmap));
}
- pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE);
+ pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false);
pr_debug("%s initialised %lu pages in %ums\n", __func__,
nr_pages, jiffies_to_msecs(jiffies - start));
@@ -1996,12 +1996,12 @@ static void __init deferred_free_pages(unsigned long pfn,
if (!nr_pages)
return;
+ pageblock_migratetype_init_range(pfn, nr_pages, mt, true);
+
page = pfn_to_page(pfn);
/* Free a large naturally-aligned chunk if possible */
if (nr_pages == MAX_ORDER_NR_PAGES && IS_MAX_ORDER_ALIGNED(pfn)) {
- for (i = 0; i < nr_pages; i += pageblock_nr_pages)
- init_pageblock_migratetype(page + i, mt, false);
__free_pages_core(page, MAX_PAGE_ORDER, MEMINIT_EARLY);
return;
}
@@ -2009,11 +2009,8 @@ static void __init deferred_free_pages(unsigned long pfn,
/* Accept chunks smaller than MAX_PAGE_ORDER upfront */
accept_memory(PFN_PHYS(pfn), nr_pages * PAGE_SIZE);
- for (i = 0; i < nr_pages; i++, page++, pfn++) {
- if (pageblock_aligned(pfn))
- init_pageblock_migratetype(page, mt, false);
- __free_pages_core(page, 0, MEMINIT_EARLY);
- }
+ for (i = 0; i < nr_pages; i++)
+ __free_pages_core(page + i, 0, MEMINIT_EARLY);
}
/* Completion tracking for deferred_init_memmap() threads */
--
2.54.0
^ permalink raw reply related
* [PATCH v3 06/19] mm/sparse: Panic on memmap and usemap allocation failure
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
When vmemmap or usemap allocation fails, sparse_init_nid() currently
marks the section non-present and continues. Later boot-time code can
still walk PFNs in that section without checking for this partial setup,
which leads to invalid accesses. subsection_map_init() can also touch an
unallocated usemap.
Auditing and fixing all early PFN walkers for this case is not worth the
complexity. These allocation failures are expected to be fatal anyway,
and other memory models already treat them that way.
Make memmap and usemap allocation failures panic immediately instead of
trying to recover and crashing later in less obvious ways. This is also
consistent with how other memory model configurations handle memmap
allocation failures.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Oscar Salvador <osalvador@suse.de>
---
v2->v3:
- collect Acked-by from Oscar Salvador
---
mm/sparse.c | 44 +++++++++-----------------------------------
1 file changed, 9 insertions(+), 35 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index 16ac6df3c89f..c92bbc3f3aa3 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -239,15 +239,8 @@ struct page __init *__populate_section_memmap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
unsigned long size = section_map_size();
- struct page *map;
- phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
- map = memmap_alloc(size, size, addr, nid, false);
- if (!map)
- panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
- __func__, size, PAGE_SIZE, nid, &addr);
-
- return map;
+ return memmap_alloc(size, size, __pa(MAX_DMA_ADDRESS), nid, false);
}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
@@ -300,17 +293,14 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
unsigned long map_count)
{
unsigned long pnum;
- struct page *map;
- struct mem_section *ms;
- if (sparse_usage_init(nid, map_count)) {
- pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
- goto failed;
- }
+ if (sparse_usage_init(nid, map_count))
+ panic("Failed to allocate usemap for node %d\n", nid);
sparse_vmemmap_init_nid_early(nid);
for_each_present_section_nr(pnum_begin, pnum) {
+ struct mem_section *ms;
unsigned long pfn = section_nr_to_pfn(pnum);
if (pnum >= pnum_end)
@@ -318,34 +308,18 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
ms = __nr_to_section(pnum);
if (!preinited_vmemmap_section(ms)) {
+ struct page *map;
+
map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
- nid, NULL, NULL);
- if (!map) {
- pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
- __func__, nid);
- pnum_begin = pnum;
- sparse_usage_fini();
- goto failed;
- }
+ nid, NULL, NULL);
+ if (!map)
+ panic("Failed to allocate memmap for section %lu\n", pnum);
memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page),
PAGE_SIZE));
sparse_init_early_section(nid, map, pnum, 0);
}
}
sparse_usage_fini();
- return;
-failed:
- /*
- * We failed to allocate, mark all the following pnums as not present,
- * except the ones already initialized earlier.
- */
- for_each_present_section_nr(pnum_begin, pnum) {
- if (pnum >= pnum_end)
- break;
- ms = __nr_to_section(pnum);
- if (!preinited_vmemmap_section(ms))
- ms->section_mem_map = 0;
- }
}
/*
--
2.54.0
^ permalink raw reply related
* [PATCH v3 07/19] mm/sparse: Move subsection_map_init() into sparse_init()
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
subsection_map_init() is part of sparse memory initialization, but it is
currently called from free_area_init().
Move it into sparse_init() so the sparse-specific setup stays together
instead of being split across the generic free_area_init() path.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Oscar Salvador <osalvador@suse.de>
---
v2->v3:
- collect Acked-by from Oscar Salvador
---
mm/internal.h | 5 ++---
mm/mm_init.c | 10 ++--------
mm/sparse-vmemmap.c | 11 ++++++++++-
mm/sparse.c | 1 +
4 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 5602393054f3..e71ba519f7f2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -994,10 +994,9 @@ static inline void sparse_init(void) {}
* mm/sparse-vmemmap.c
*/
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-void sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages);
+void sparse_init_subsection_map(void);
#else
-static inline void sparse_init_subsection_map(unsigned long pfn,
- unsigned long nr_pages)
+static inline void sparse_init_subsection_map(void)
{
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index be652b6990a2..3a57bf5a9b46 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1879,18 +1879,12 @@ static void __init free_area_init(void)
(u64)zone_movable_pfn[i] << PAGE_SHIFT);
}
- /*
- * Print out the early node map, and initialize the
- * subsection-map relative to active online memory ranges to
- * enable future "sub-section" extensions of the memory map.
- */
+ /* Print out the early node map. */
pr_info("Early memory node ranges\n");
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid,
(u64)start_pfn << PAGE_SHIFT,
((u64)end_pfn << PAGE_SHIFT) - 1);
- sparse_init_subsection_map(start_pfn, end_pfn - start_pfn);
- }
/* Initialise every node */
mminit_verify_pageflags_layout();
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index db9cfe57e827..3b036251a2f4 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -596,7 +596,7 @@ static void subsection_mask_set(unsigned long *map, unsigned long pfn,
bitmap_set(map, idx, end - idx + 1);
}
-void __init sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages)
+static void __init sparse_init_subsection_map_range(unsigned long pfn, unsigned long nr_pages)
{
int end_sec_nr = pfn_to_section_nr(pfn + nr_pages - 1);
unsigned long nr, start_sec_nr = pfn_to_section_nr(pfn);
@@ -619,6 +619,15 @@ void __init sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages
}
}
+void __init sparse_init_subsection_map(void)
+{
+ int i, nid;
+ unsigned long start, end;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid)
+ sparse_init_subsection_map_range(start, end - start);
+}
+
#ifdef CONFIG_MEMORY_HOTPLUG
/* Mark all memory sections within the pfn range as online */
diff --git a/mm/sparse.c b/mm/sparse.c
index c92bbc3f3aa3..85557ef387c7 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -361,5 +361,6 @@ void __init sparse_init(void)
}
/* cover the last node */
sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
+ sparse_init_subsection_map();
vmemmap_populate_print_last();
}
--
2.54.0
^ permalink raw reply related
* [PATCH v3 08/19] mm/mm_init: Defer sparse_init() until after zone initialization
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song,
Oscar Salvador (SUSE)
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
free_area_init() is responsible for initializing pgdat and zone state.
Calling sparse_init() from there mixes in later vmemmap and struct page
setup, which makes the initialization flow less clear.
Defer sparse_init(), sparse_vmemmap_init_nid_late(), and memmap_init()
until after free_area_init() completes, when zone initialization is fully
done. This keeps free_area_init() focused on zone setup and ensures that
sparse_init() runs with the relevant zone state already available.
This is also a prerequisite for later hugetlb vmemmap changes that need
zone information during early sparse vmemmap setup.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Oscar Salvador (SUSE) <osalvador@kernel.org>
---
v2->v3:
- collect Reviewed-by from Oscar Salvador
---
mm/mm_init.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3a57bf5a9b46..f349a6f34139 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1829,7 +1829,6 @@ static void __init free_area_init(void)
bool descending;
arch_zone_limits_init(max_zone_pfn);
- sparse_init();
start_pfn = PHYS_PFN(memblock_start_of_DRAM());
descending = arch_has_descending_max_zone_pfns();
@@ -1918,11 +1917,7 @@ static void __init free_area_init(void)
}
}
- for_each_node_state(nid, N_MEMORY)
- sparse_vmemmap_init_nid_late(nid);
-
calc_nr_kernel_pages();
- memmap_init();
/* disable hash distribution for systems with a single node */
fixup_hashdist();
@@ -2694,10 +2689,17 @@ void __init __weak mem_init(void)
void __init mm_core_init_early(void)
{
+ int nid;
+
hugetlb_cma_reserve();
hugetlb_bootmem_alloc();
free_area_init();
+
+ sparse_init();
+ for_each_node_state(nid, N_MEMORY)
+ sparse_vmemmap_init_nid_late(nid);
+ memmap_init();
}
/*
--
2.54.0
^ permalink raw reply related
* [PATCH v3 09/19] mm/mm_init: Defer hugetlb reservation until after zone initialization
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song,
Oscar Salvador (SUSE)
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
hugetlb_cma_reserve() and hugetlb_bootmem_alloc() currently run before
free_area_init(), so HugeTLB reservation happens before zone state is
initialized.
Move the reservation step after free_area_init() so the relevant zone
information is available before HugeTLB reserves memory. This is needed
for later hugetlb changes that validate boot-time HugeTLB reservations
against zone boundaries.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Oscar Salvador (SUSE) <osalvador@kernel.org>
---
v2->v3:
- collect Reviewed-by from Mike Rapoport
- collect Reviewed-by from Oscar Salvador
---
mm/mm_init.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f349a6f34139..4601e5d659eb 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2691,11 +2691,11 @@ void __init mm_core_init_early(void)
{
int nid;
+ free_area_init();
+
hugetlb_cma_reserve();
hugetlb_bootmem_alloc();
- free_area_init();
-
sparse_init();
for_each_node_state(nid, N_MEMORY)
sparse_vmemmap_init_nid_late(nid);
--
2.54.0
^ permalink raw reply related
* [PATCH v3 10/19] mm/mm_init: Remove set_pageblock_order() call from sparse_init()
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
free_area_init() already sets pageblock_order before sparse_init() runs
for CONFIG_HUGETLB_PAGE_SIZE_VARIABLE, so sparse_init() does not need to
call set_pageblock_order() again.
With that call removed, set_pageblock_order() is only used in mm/mm_init.c.
Make it static.
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Oscar Salvador (SUSE) <osalvador@suse.de>
---
v2->v3:
- collect Reviewed-by from Oscar Salvador
---
mm/internal.h | 1 -
mm/mm_init.c | 4 ++--
mm/sparse.c | 3 ---
3 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index e71ba519f7f2..004a3f1d5006 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1435,7 +1435,6 @@ extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
-extern void set_pageblock_order(void);
unsigned long reclaim_pages(struct list_head *folio_list);
unsigned int reclaim_clean_pages_from_list(struct zone *zone,
struct list_head *folio_list);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 4601e5d659eb..44512f3b3544 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1511,7 +1511,7 @@ static inline void setup_usemap(struct zone *zone) {}
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
-void __init set_pageblock_order(void)
+static void __init set_pageblock_order(void)
{
unsigned int order = PAGE_BLOCK_MAX_ORDER;
@@ -1537,7 +1537,7 @@ void __init set_pageblock_order(void)
* include/linux/pageblock-flags.h for the values of pageblock_order based on
* the kernel config
*/
-void __init set_pageblock_order(void)
+static inline void __init set_pageblock_order(void)
{
}
diff --git a/mm/sparse.c b/mm/sparse.c
index 85557ef387c7..324213d8bdcb 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -343,9 +343,6 @@ void __init sparse_init(void)
pnum_begin = first_present_section_nr();
nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
- /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
- set_pageblock_order();
-
for_each_present_section_nr(pnum_begin + 1, pnum_end) {
int nid = sparse_early_nid(__nr_to_section(pnum_end));
--
2.54.0
^ permalink raw reply related
* [PATCH v3 11/19] mm/sparse: Move sparse_vmemmap_init_nid_late() into sparse_init_nid()
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song,
Oscar Salvador (SUSE)
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
sparse_vmemmap_init_nid_late() is still called separately from
mm_core_init_early(), away from the rest of the sparse initialization
path.
Now that sparse_init() runs after zone initialization, call
sparse_vmemmap_init_nid_late() from sparse_init_nid() instead. This
keeps both sparse_vmemmap_init_nid_early() and
sparse_vmemmap_init_nid_late() in the sparse setup path.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Oscar Salvador (SUSE) <osalvador@kernel.org>
---
v2->v3:
- collect Reviewed-by from Oscar Salvador
---
mm/mm_init.c | 4 ----
mm/sparse.c | 1 +
2 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 44512f3b3544..41b83dd18c01 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2689,16 +2689,12 @@ void __init __weak mem_init(void)
void __init mm_core_init_early(void)
{
- int nid;
-
free_area_init();
hugetlb_cma_reserve();
hugetlb_bootmem_alloc();
sparse_init();
- for_each_node_state(nid, N_MEMORY)
- sparse_vmemmap_init_nid_late(nid);
memmap_init();
}
diff --git a/mm/sparse.c b/mm/sparse.c
index 324213d8bdcb..3917a47153d8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -320,6 +320,7 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
}
}
sparse_usage_fini();
+ sparse_vmemmap_init_nid_late(nid);
}
/*
--
2.54.0
^ permalink raw reply related
* [PATCH v3 12/19] mm/hugetlb_cma: Validate hugetlb CMA range by zone at reserve time
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
Hugetlb CMA allocation currently has to cope with CMA areas that span
multiple zones.
Validate the reserved CMA range up front in hugetlb_cma_reserve() so
later hugetlb CMA allocations can assume a zone-consistent area.
Also drop the pfn_valid() check from cma_validate_zones(). mem_section
is not fully initialized at this point, so the check can trigger false
warnings. Keep the sanity check in cma_activate_area() instead.
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Oscar Salvador (SUSE) <osalvador@suse.de>
---
v2->v3:
- collect Reviewed-by from Oscar Salvador
---
mm/cma.c | 3 ++-
mm/hugetlb_cma.c | 6 ++++--
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/mm/cma.c b/mm/cma.c
index a13ce4999b39..31073738f2ac 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -126,7 +126,6 @@ bool cma_validate_zones(struct cma *cma)
* to be in the same zone. Simplify by forcing the entire
* CMA resv range to be in the same zone.
*/
- WARN_ON_ONCE(!pfn_valid(base_pfn));
if (pfn_range_intersects_zones(cma->nid, base_pfn, cmr->count)) {
set_bit(CMA_ZONES_INVALID, &cma->flags);
return false;
@@ -165,6 +164,8 @@ static void __init cma_activate_area(struct cma *cma)
bitmap_set(cmr->bitmap, 0, bitmap_count);
}
+ WARN_ON_ONCE(!pfn_valid(cmr->base_pfn));
+
for (pfn = early_pfn[r]; pfn < cmr->base_pfn + cmr->count;
pfn += pageblock_nr_pages)
init_cma_reserved_pageblock(pfn_to_page(pfn));
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index 39344d6c78d8..ce999391cc14 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -231,9 +231,11 @@ void __init hugetlb_cma_reserve(void)
res = cma_declare_contiguous_multi(size, gigantic_page_size,
HUGETLB_PAGE_ORDER, name,
&hugetlb_cma[nid], nid);
- if (res) {
- pr_warn("hugetlb_cma: reservation failed: err %d, node %d",
+ if (res || !cma_validate_zones(hugetlb_cma[nid])) {
+ pr_warn("hugetlb_cma: %s: err %d, node %d\n",
+ res ? "reservation failed" : "reserved area spans zones",
res, nid);
+ hugetlb_cma[nid] = NULL;
continue;
}
--
2.54.0
^ permalink raw reply related
* [PATCH v3 13/19] mm/hugetlb: Refactor early boot gigantic hugepage allocation
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
The early boot gigantic hugepage allocation helpers currently mix
allocation with huge_bootmem_page setup, and leave part of the
initialization flow in architecture code.
Refactor the interface to return the allocated huge page pointer and
move the huge_bootmem_page setup into the generic hugetlb code. This
makes the architecture-specific paths focus only on finding memory,
while the common code handles node placement and early page metadata
setup in one place.
This also lets powerpc benefit from memblock_reserved_mark_noinit(),
which it did not enable before.
In addition, upcoming cross-zone validation for boot-time gigantic
hugetlb reservation is common logic. With this refactoring, that logic
can stay in the generic code instead of being duplicated in
architecture-specific paths.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Oscar Salvador (SUSE) <osalvador@suse.de>
---
v2->v3:
- keep powerpc code independent of struct huge_bootmem_page by switching
it to void * (per Mike Rapoport)
- move huge_bootmem_page internals out of include/linux/hugetlb.h and keep
them in mm-private scope so the arch code does not need to see the type
(per Mike Rapoport, echoed by Oscar Salvador)
---
arch/powerpc/mm/hugetlbpage.c | 13 ++---
include/linux/hugetlb.h | 18 ++-----
mm/hugetlb.c | 95 ++++++++++++++---------------------
mm/hugetlb_cma.c | 13 ++---
mm/hugetlb_cma.h | 8 ++-
mm/internal.h | 9 ++++
6 files changed, 64 insertions(+), 92 deletions(-)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 558fafb82b8a..a298746dc143 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -104,17 +104,14 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p
}
}
-static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
+static __init void *pseries_alloc_bootmem_huge_page(struct hstate *hstate)
{
- struct huge_bootmem_page *m;
+ void *m;
if (nr_gpages == 0)
- return 0;
+ return NULL;
m = phys_to_virt(gpage_freearray[--nr_gpages]);
gpage_freearray[nr_gpages] = 0;
- list_add(&m->list, &huge_boot_pages[0]);
- m->hstate = hstate;
- m->flags = 0;
- return 1;
+ return m;
}
bool __init hugetlb_node_alloc_supported(void)
@@ -124,7 +121,7 @@ bool __init hugetlb_node_alloc_supported(void)
#endif
-int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid)
{
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3700c0a1f6ff..09f28dd773b7 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -674,19 +674,11 @@ struct hstate {
char name[HSTATE_NAME_LEN];
};
-struct cma;
-
-struct huge_bootmem_page {
- struct list_head list;
- struct hstate *hstate;
- unsigned long flags;
- struct cma *cma;
-};
-
#define HUGE_BOOTMEM_HVO 0x0001
#define HUGE_BOOTMEM_ZONES_VALID 0x0002
#define HUGE_BOOTMEM_CMA 0x0004
+struct huge_bootmem_page;
bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list);
@@ -706,8 +698,8 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
unsigned long address, struct folio *folio);
/* arch callback */
-int __init __alloc_bootmem_huge_page(struct hstate *h, int nid);
-int __init alloc_bootmem_huge_page(struct hstate *h, int nid);
+void *__init __alloc_bootmem_huge_page(struct hstate *h, int nid);
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid);
bool __init hugetlb_node_alloc_supported(void);
void __init hugetlb_add_hstate(unsigned order);
@@ -1138,9 +1130,9 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
return NULL;
}
-static inline int __alloc_bootmem_huge_page(struct hstate *h)
+static inline void *__alloc_bootmem_huge_page(struct hstate *h, int nid)
{
- return 0;
+ return NULL;
}
static inline struct hstate *hstate_file(struct file *f)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2bf9fe16abb9..5e557c05d80a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3027,79 +3027,58 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact)
{
- struct huge_bootmem_page *m;
- int listnode = nid;
-
if (hugetlb_early_cma(h))
- m = hugetlb_cma_alloc_bootmem(h, &listnode, node_exact);
- else {
- if (node_exact)
- m = memblock_alloc_exact_nid_raw(huge_page_size(h),
+ return hugetlb_cma_alloc_bootmem(h, nid, node_exact);
+
+ if (node_exact)
+ return memblock_alloc_exact_nid_raw(huge_page_size(h),
huge_page_size(h), 0,
MEMBLOCK_ALLOC_ACCESSIBLE, nid);
- else {
- m = memblock_alloc_try_nid_raw(huge_page_size(h),
+
+ return memblock_alloc_try_nid_raw(huge_page_size(h),
huge_page_size(h), 0,
MEMBLOCK_ALLOC_ACCESSIBLE, nid);
- /*
- * For pre-HVO to work correctly, pages need to be on
- * the list for the node they were actually allocated
- * from. That node may be different in the case of
- * fallback by memblock_alloc_try_nid_raw. So,
- * extract the actual node first.
- */
- if (m)
- listnode = early_pfn_to_nid(PHYS_PFN(__pa(m)));
- }
-
- if (m) {
- m->flags = 0;
- m->cma = NULL;
- }
- }
-
- if (m) {
- /*
- * Use the beginning of the huge page to store the
- * huge_bootmem_page struct (until gather_bootmem
- * puts them into the mem_map).
- *
- * Put them into a private list first because mem_map
- * is not up yet.
- */
- INIT_LIST_HEAD(&m->list);
- list_add(&m->list, &huge_boot_pages[listnode]);
- m->hstate = h;
- }
-
- return m;
}
-int alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid)
__attribute__ ((weak, alias("__alloc_bootmem_huge_page")));
-int __alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init __alloc_bootmem_huge_page(struct hstate *h, int nid)
{
- struct huge_bootmem_page *m = NULL; /* initialize for clang */
int nr_nodes, node = nid;
/* do node specific alloc */
- if (nid != NUMA_NO_NODE) {
- m = alloc_bootmem(h, node, true);
- if (!m)
- return 0;
- goto found;
- }
+ if (nid != NUMA_NO_NODE)
+ return alloc_bootmem(h, node, true);
/* allocate from next node when distributing huge pages */
for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node,
- &hugetlb_bootmem_nodes) {
- m = alloc_bootmem(h, node, false);
- if (!m)
- return 0;
- goto found;
- }
+ &hugetlb_bootmem_nodes)
+ return alloc_bootmem(h, node, false);
-found:
+ return NULL;
+}
+
+static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
+{
+ struct huge_bootmem_page *m = arch_alloc_bootmem_huge_page(h, nid);
+
+ if (!m)
+ return false;
+
+ nid = early_pfn_to_nid(PHYS_PFN(__pa(m)));
+ /*
+ * Use the beginning of the huge page to store the huge_bootmem_page
+ * struct (until gather_bootmem puts them into the mem_map).
+ *
+ * Put them into a private list first because mem_map is not up yet.
+ */
+ INIT_LIST_HEAD(&m->list);
+ list_add(&m->list, &huge_boot_pages[nid]);
+ m->hstate = h;
+ if (!hugetlb_early_cma(h)) {
+ m->cma = NULL;
+ m->flags = 0;
+ }
/*
* Only initialize the head struct page in memmap_init_reserved_pages,
@@ -3111,7 +3090,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
memblock_reserved_mark_noinit(__pa((void *)m + PAGE_SIZE),
huge_page_size(h) - PAGE_SIZE);
- return 1;
+ return true;
}
/* Initialize [start_page:end_page_number] tail struct pages of a hugepage */
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index ce999391cc14..e487d0ffffc0 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -56,14 +56,13 @@ struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
return folio;
}
-struct huge_bootmem_page * __init
-hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool node_exact)
+void * __init hugetlb_cma_alloc_bootmem(struct hstate *h, int nid, bool node_exact)
{
struct cma *cma;
struct huge_bootmem_page *m;
- int node = *nid;
+ int node;
- cma = hugetlb_cma[*nid];
+ cma = hugetlb_cma[nid];
m = cma_reserve_early(cma, huge_page_size(h));
if (!m) {
if (node_exact)
@@ -71,13 +70,11 @@ hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool node_exact)
for_each_node_mask(node, hugetlb_bootmem_nodes) {
cma = hugetlb_cma[node];
- if (!cma || node == *nid)
+ if (!cma || node == nid)
continue;
m = cma_reserve_early(cma, huge_page_size(h));
- if (m) {
- *nid = node;
+ if (m)
break;
- }
}
}
diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
index c619c394b1ae..3aa483573d17 100644
--- a/mm/hugetlb_cma.h
+++ b/mm/hugetlb_cma.h
@@ -6,8 +6,7 @@
void hugetlb_cma_free_frozen_folio(struct folio *folio);
struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
int nid, nodemask_t *nodemask);
-struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
- bool node_exact);
+void *hugetlb_cma_alloc_bootmem(struct hstate *h, int nid, bool node_exact);
bool hugetlb_cma_exclusive_alloc(void);
unsigned long hugetlb_cma_total_size(void);
void hugetlb_cma_validate_params(void);
@@ -23,9 +22,8 @@ static inline struct folio *hugetlb_cma_alloc_frozen_folio(int order,
return NULL;
}
-static inline
-struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
- bool node_exact)
+static inline void *hugetlb_cma_alloc_bootmem(struct hstate *h, int nid,
+ bool node_exact)
{
return NULL;
}
diff --git a/mm/internal.h b/mm/internal.h
index 004a3f1d5006..6b9802460a7c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -23,6 +23,15 @@
#include "vma.h"
struct folio_batch;
+struct hstate;
+struct cma;
+
+struct huge_bootmem_page {
+ struct list_head list;
+ struct hstate *hstate;
+ unsigned long flags;
+ struct cma *cma;
+};
/*
* Maintains state across a page table move. The operation assumes both source
--
2.54.0
^ permalink raw reply related
* [PATCH v3 14/19] mm/hugetlb: Free cross-zone bootmem gigantic pages after allocation
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
Now that hugetlb reservation runs after zone initialization, bootmem
gigantic page allocation can detect pages that span multiple zones.
Keep those cross-zone pages separate during allocation and free them
after allocation completes, so later hugetlb initialization only sees
zone-valid gigantic pages.
This chooses to free cross-zone gigantic pages directly instead of
retrying allocation. In practice, such cross-zone cases are expected to
be very rare, so adding retry logic does not seem justified at this
point. Keeping the handling simple also preserves the previous behavior.
If similar real-world reports show up later, retry support can be
reconsidered then.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
mm/hugetlb.c | 75 ++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 64 insertions(+), 11 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5e557c05d80a..218fb1ca45f4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3060,12 +3060,15 @@ void *__init __alloc_bootmem_huge_page(struct hstate *h, int nid)
static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
{
+ unsigned long pfn;
+ unsigned int nid_request = nid;
struct huge_bootmem_page *m = arch_alloc_bootmem_huge_page(h, nid);
if (!m)
return false;
- nid = early_pfn_to_nid(PHYS_PFN(__pa(m)));
+ pfn = PHYS_PFN(__pa(m));
+ nid = early_pfn_to_nid(pfn);
/*
* Use the beginning of the huge page to store the huge_bootmem_page
* struct (until gather_bootmem puts them into the mem_map).
@@ -3073,22 +3076,38 @@ static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
* Put them into a private list first because mem_map is not up yet.
*/
INIT_LIST_HEAD(&m->list);
- list_add(&m->list, &huge_boot_pages[nid]);
m->hstate = h;
if (!hugetlb_early_cma(h)) {
m->cma = NULL;
m->flags = 0;
}
- /*
- * Only initialize the head struct page in memmap_init_reserved_pages,
- * rest of the struct pages will be initialized by the HugeTLB
- * subsystem itself.
- * The head struct page is used to get folio information by the HugeTLB
- * subsystem like zone id and node id.
- */
- memblock_reserved_mark_noinit(__pa((void *)m + PAGE_SIZE),
- huge_page_size(h) - PAGE_SIZE);
+ /* CMA pages: zone-crossing is validated in hugetlb_cma_reserve(). */
+ if (!hugetlb_early_cma(h) &&
+ pfn_range_intersects_zones(nid, pfn, pages_per_huge_page(h))) {
+ /*
+ * If the allocated page is on a different node than requested
+ * (e.g., on PowerPC LPARs), put it on the requested node's list,
+ * because hugetlb_free_cross_zone_pages() only frees cross-zone
+ * pages belonging to the requested node.
+ */
+ if (WARN_ON_ONCE(nid_request != NUMA_NO_NODE && nid != nid_request))
+ list_add(&m->list, &huge_boot_pages[nid_request]);
+ else
+ list_add(&m->list, &huge_boot_pages[nid]);
+ } else {
+ list_add_tail(&m->list, &huge_boot_pages[nid]);
+ m->flags |= HUGE_BOOTMEM_ZONES_VALID;
+ /*
+ * Only initialize the head struct page in memmap_init_reserved_pages,
+ * rest of the struct pages will be initialized by the HugeTLB
+ * subsystem itself.
+ * The head struct page is used to get folio information by the HugeTLB
+ * subsystem like zone id and node id.
+ */
+ memblock_reserved_mark_noinit(__pa((void *)m + PAGE_SIZE),
+ huge_page_size(h) - PAGE_SIZE);
+ }
return true;
}
@@ -3373,6 +3392,34 @@ void __init hugetlb_bootmem_struct_page_init(void)
padata_do_multithreaded(&job);
}
+static unsigned long __init hugetlb_free_cross_zone_pages(struct hstate *h, int nid)
+{
+ unsigned long freed = 0;
+ struct huge_bootmem_page *m, *tmp;
+
+ if (!hstate_is_gigantic(h))
+ return freed;
+
+ list_for_each_entry_safe(m, tmp, &huge_boot_pages[nid], list) {
+ if (m->flags & HUGE_BOOTMEM_ZONES_VALID)
+ break;
+
+ list_del(&m->list);
+ memblock_free(m, huge_page_size(h));
+ freed++;
+ }
+
+ if (freed) {
+ char buf[32];
+
+ string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, sizeof(buf));
+ pr_warn("HugeTLB: freed %lu cross-zone hugepages of size %s on node %d.\n",
+ freed, buf, nid);
+ }
+
+ return freed;
+}
+
static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
{
unsigned long i;
@@ -3403,6 +3450,8 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
cond_resched();
}
+ i -= hugetlb_free_cross_zone_pages(h, nid);
+
if (!list_empty(&folio_list))
prep_and_add_allocated_folios(h, &folio_list);
@@ -3476,6 +3525,7 @@ static void __init hugetlb_pages_alloc_boot_node(unsigned long start, unsigned l
static unsigned long __init hugetlb_gigantic_pages_alloc_boot(struct hstate *h)
{
+ int nid;
unsigned long i;
for (i = 0; i < h->max_huge_pages; ++i) {
@@ -3484,6 +3534,9 @@ static unsigned long __init hugetlb_gigantic_pages_alloc_boot(struct hstate *h)
cond_resched();
}
+ for_each_node(nid)
+ i -= hugetlb_free_cross_zone_pages(h, nid);
+
return i;
}
--
2.54.0
^ permalink raw reply related
* [PATCH v3 15/19] mm/hugetlb_vmemmap: Move bootmem HVO setup to early init
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
Bootmem HugeTLB pages currently defer HVO setup to
hugetlb_vmemmap_init_late(), because the optimization needs zone
information.
Now that zone initialization is available earlier, the bootmem HVO setup
can be done directly from hugetlb_vmemmap_init_early(). This lets
gigantic HugeTLB pages apply HVO as soon as they are allocated.
Bootmem gigantic pages that span multiple zones are now filtered out
when they are allocated, so the remaining bootmem gigantic pages seen by
later hugetlb initialization are already zone-valid. As a result,
hugetlb_vmemmap_init_late() no longer needs to handle bootmem HVO setup.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
mm/hugetlb_vmemmap.c | 67 +++++++++-----------------------------------
1 file changed, 13 insertions(+), 54 deletions(-)
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index ea6af85bfec1..464578ee246e 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -745,6 +745,8 @@ static bool vmemmap_should_optimize_bootmem_page(struct huge_bootmem_page *m)
return true;
}
+static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn);
+
/*
* Initialize memmap section for a gigantic page, HVO-style.
*/
@@ -752,6 +754,7 @@ void __init hugetlb_vmemmap_init_early(int nid)
{
unsigned long psize, paddr, section_size;
unsigned long ns, i, pnum, pfn, nr_pages;
+ unsigned long start, end;
struct huge_bootmem_page *m = NULL;
void *map;
@@ -761,6 +764,8 @@ void __init hugetlb_vmemmap_init_early(int nid)
section_size = (1UL << PA_SECTION_SHIFT);
list_for_each_entry(m, &huge_boot_pages[nid], list) {
+ struct zone *zone;
+
if (!vmemmap_should_optimize_bootmem_page(m))
continue;
@@ -769,6 +774,14 @@ void __init hugetlb_vmemmap_init_early(int nid)
paddr = virt_to_phys(m);
pfn = PHYS_PFN(paddr);
map = pfn_to_page(pfn);
+ start = (unsigned long)map;
+ end = start + hugetlb_vmemmap_size(m->hstate);
+ zone = pfn_to_zone(nid, pfn);
+
+ if (vmemmap_populate_hvo(start, end, huge_page_order(m->hstate),
+ zone, HUGETLB_VMEMMAP_RESERVE_SIZE))
+ panic("Failed to allocate memmap for HugeTLB page\n");
+ memmap_boot_pages_add(DIV_ROUND_UP(HUGETLB_VMEMMAP_RESERVE_SIZE, PAGE_SIZE));
pnum = pfn_to_section_nr(pfn);
ns = psize / section_size;
@@ -800,60 +813,6 @@ static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn)
void __init hugetlb_vmemmap_init_late(int nid)
{
- struct huge_bootmem_page *m, *tm;
- unsigned long phys, nr_pages, start, end;
- unsigned long pfn, nr_mmap;
- struct zone *zone = NULL;
- struct hstate *h;
- void *map;
-
- if (!READ_ONCE(vmemmap_optimize_enabled))
- return;
-
- list_for_each_entry_safe(m, tm, &huge_boot_pages[nid], list) {
- if (!(m->flags & HUGE_BOOTMEM_HVO))
- continue;
-
- phys = virt_to_phys(m);
- h = m->hstate;
- pfn = PHYS_PFN(phys);
- nr_pages = pages_per_huge_page(h);
- map = pfn_to_page(pfn);
- start = (unsigned long)map;
- end = start + nr_pages * sizeof(struct page);
-
- if (!hugetlb_bootmem_page_zones_valid(nid, m)) {
- /*
- * Oops, the hugetlb page spans multiple zones.
- * Remove it from the list, and populate it normally.
- */
- list_del(&m->list);
-
- vmemmap_populate(start, end, nid, NULL);
- nr_mmap = end - start;
- memmap_boot_pages_add(DIV_ROUND_UP(nr_mmap, PAGE_SIZE));
-
- memblock_phys_free(phys, huge_page_size(h));
- continue;
- }
-
- if (!zone || !zone_spans_pfn(zone, pfn))
- zone = pfn_to_zone(nid, pfn);
- if (WARN_ON_ONCE(!zone))
- continue;
-
- if (vmemmap_populate_hvo(start, end, huge_page_order(h), zone,
- HUGETLB_VMEMMAP_RESERVE_SIZE) < 0) {
- /* Fallback if HVO population fails */
- vmemmap_populate(start, end, nid, NULL);
- nr_mmap = end - start;
- } else {
- m->flags |= HUGE_BOOTMEM_ZONES_VALID;
- nr_mmap = HUGETLB_VMEMMAP_RESERVE_SIZE;
- }
-
- memmap_boot_pages_add(DIV_ROUND_UP(nr_mmap, PAGE_SIZE));
- }
}
#endif
--
2.54.0
^ permalink raw reply related
* [PATCH v3 16/19] mm/hugetlb: Remove obsolete bootmem cross-zone checks
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
Bootmem gigantic HugeTLB pages used to be validated again during
gather_bootmem_prealloc_node() and any cross-zone pages were discarded
there.
That validation is no longer needed. Cross-zone bootmem gigantic pages
are now detected during allocation and freed before they reach the later
bootmem gathering path, so the remaining pages are already zone-valid.
Remove the obsolete cross-zone validation, invalid-page freeing, and the
associated discarded-page accounting.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
include/linux/hugetlb.h | 3 --
mm/hugetlb.c | 70 -----------------------------------------
2 files changed, 73 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 09f28dd773b7..f68a390d43bd 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -678,9 +678,6 @@ struct hstate {
#define HUGE_BOOTMEM_ZONES_VALID 0x0002
#define HUGE_BOOTMEM_CMA 0x0004
-struct huge_bootmem_page;
-bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
-
int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
void wait_for_freed_hugetlb_folios(void);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 218fb1ca45f4..47c3d6d11c58 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -58,7 +58,6 @@ struct hstate hstates[HUGE_MAX_HSTATE];
__initdata nodemask_t hugetlb_bootmem_nodes;
__initdata struct list_head huge_boot_pages[MAX_NUMNODES];
-static unsigned long hstate_boot_nrinvalid[HUGE_MAX_HSTATE] __initdata;
/*
* Due to ordering constraints across the init code for various
@@ -3221,57 +3220,6 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
}
}
-bool __init hugetlb_bootmem_page_zones_valid(int nid,
- struct huge_bootmem_page *m)
-{
- unsigned long start_pfn;
- bool valid;
-
- if (m->flags & HUGE_BOOTMEM_ZONES_VALID) {
- /*
- * Already validated, skip check.
- */
- return true;
- }
-
- if (hugetlb_bootmem_page_earlycma(m)) {
- valid = cma_validate_zones(m->cma);
- goto out;
- }
-
- start_pfn = virt_to_phys(m) >> PAGE_SHIFT;
-
- valid = !pfn_range_intersects_zones(nid, start_pfn,
- pages_per_huge_page(m->hstate));
-out:
- if (!valid)
- hstate_boot_nrinvalid[hstate_index(m->hstate)]++;
-
- return valid;
-}
-
-/*
- * Free a bootmem page that was found to be invalid (intersecting with
- * multiple zones).
- *
- * Since it intersects with multiple zones, we can't just do a free
- * operation on all pages at once, but instead have to walk all
- * pages, freeing them one by one.
- */
-static void __init hugetlb_bootmem_free_invalid_page(int nid, struct page *page,
- struct hstate *h)
-{
- unsigned long npages = pages_per_huge_page(h);
- unsigned long pfn;
-
- while (npages--) {
- pfn = page_to_pfn(page);
- __init_page_from_nid(pfn, nid);
- free_reserved_page(page);
- page++;
- }
-}
-
/*
* Put bootmem huge pages into the standard lists after mem_map is up.
* Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages.
@@ -3287,17 +3235,6 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
struct folio *folio = (void *)page;
h = m->hstate;
- if (!hugetlb_bootmem_page_zones_valid(nid, m)) {
- /*
- * Can't use this page. Initialize the
- * page structures if that hasn't already
- * been done, and give them to the page
- * allocator.
- */
- hugetlb_bootmem_free_invalid_page(nid, page, h);
- continue;
- }
-
/*
* It is possible to have multiple huge page sizes (hstates)
* in this list. If so, process each size separately.
@@ -3692,20 +3629,13 @@ static void __init hugetlb_init_hstates(void)
static void __init report_hugepages(void)
{
struct hstate *h;
- unsigned long nrinvalid;
for_each_hstate(h) {
char buf[32];
- nrinvalid = hstate_boot_nrinvalid[hstate_index(h)];
- h->max_huge_pages -= nrinvalid;
-
string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
buf, h->nr_huge_pages);
- if (nrinvalid)
- pr_info("HugeTLB: %s page size: %lu invalid page%s discarded\n",
- buf, nrinvalid, str_plural(nrinvalid));
pr_info("HugeTLB: %d KiB vmemmap can be freed for a %s page\n",
hugetlb_vmemmap_optimizable_size(h) / SZ_1K, buf);
}
--
2.54.0
^ permalink raw reply related
* [PATCH v3 17/19] mm/sparse-vmemmap: Remove sparse_vmemmap_init_nid_late()
From: Muchun Song @ 2026-06-02 10:10 UTC (permalink / raw)
To: Oscar Salvador, David Hildenbrand, Andrew Morton,
Madhavan Srinivasan, Michael Ellerman
Cc: Muchun Song, Mike Rapoport, Lorenzo Stoakes, Liam R. Howlett,
Vlastimil Babka, linux-mm, linux-kernel, Nicholas Piggin,
Christophe Leroy (CS GROUP), Ritesh Harjani (IBM),
Aneesh Kumar K.V, linuxppc-dev, Mike Kravetz, Muchun Song
In-Reply-To: <20260602101039.1867613-1-songmuchun@bytedance.com>
hugetlb_vmemmap_init_late() no longer has any users, so the remaining
late-init path in sparse_vmemmap_init_nid_late() is dead code.
Remove sparse_vmemmap_init_nid_late() and its declarations.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
include/linux/mmzone.h | 7 -------
mm/hugetlb_vmemmap.c | 4 ----
mm/hugetlb_vmemmap.h | 5 -----
mm/sparse-vmemmap.c | 11 -----------
mm/sparse.c | 1 -
5 files changed, 28 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1331a7b93f33..72883df17c72 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2170,8 +2170,6 @@ static inline int preinited_vmemmap_section(const struct mem_section *section)
}
void sparse_vmemmap_init_nid_early(int nid);
-void sparse_vmemmap_init_nid_late(int nid);
-
#else
static inline int preinited_vmemmap_section(const struct mem_section *section)
{
@@ -2180,10 +2178,6 @@ static inline int preinited_vmemmap_section(const struct mem_section *section)
static inline void sparse_vmemmap_init_nid_early(int nid)
{
}
-
-static inline void sparse_vmemmap_init_nid_late(int nid)
-{
-}
#endif
static inline int online_section_nr(unsigned long nr)
@@ -2388,7 +2382,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
#else
#define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
-#define sparse_vmemmap_init_nid_late(_nid) do {} while (0)
#define pfn_in_present_section pfn_valid
#endif /* CONFIG_SPARSEMEM */
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 464578ee246e..cde6f3aba87b 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -810,10 +810,6 @@ static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn)
return NULL;
}
-
-void __init hugetlb_vmemmap_init_late(int nid)
-{
-}
#endif
static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 18b490825215..7ac49c52457d 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -29,7 +29,6 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
void hugetlb_vmemmap_init_early(int nid);
-void hugetlb_vmemmap_init_late(int nid);
#endif
@@ -81,10 +80,6 @@ static inline void hugetlb_vmemmap_init_early(int nid)
{
}
-static inline void hugetlb_vmemmap_init_late(int nid)
-{
-}
-
static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
{
return 0;
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 3b036251a2f4..077686af394b 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -574,17 +574,6 @@ void __init sparse_vmemmap_init_nid_early(int nid)
{
hugetlb_vmemmap_init_early(nid);
}
-
-/*
- * This is called just before the initialization of page structures
- * through memmap_init. Zones are now initialized, so any work that
- * needs to be done that needs zone information can be done from
- * here.
- */
-void __init sparse_vmemmap_init_nid_late(int nid)
-{
- hugetlb_vmemmap_init_late(nid);
-}
#endif
static void subsection_mask_set(unsigned long *map, unsigned long pfn,
diff --git a/mm/sparse.c b/mm/sparse.c
index 3917a47153d8..324213d8bdcb 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -320,7 +320,6 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
}
}
sparse_usage_fini();
- sparse_vmemmap_init_nid_late(nid);
}
/*
--
2.54.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox