From: Linu Cherian <linu.cherian@arm.com>
To: Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>,
Ryan Roberts <ryan.roberts@arm.com>,
Kevin Brodsky <kevin.brodsky@arm.com>,
Anshuman Khandual <anshuman.khandual@arm.com>,
Suzuki K Poulose <suzuki.poulose@arm.com>,
Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, Linu Cherian <linu.cherian@arm.com>
Subject: [PATCH 1/5] arm64: cpufeature: Add BBML3
Date: Wed, 1 Jul 2026 15:11:27 +0530 [thread overview]
Message-ID: <20260701094131.677636-2-linu.cherian@arm.com> (raw)
In-Reply-To: <20260701094131.677636-1-linu.cherian@arm.com>
- As bbml2_noabort is functionally equivalent to bbml3,
rename cpu/system_supports_bbml2_noabort to
cpu/system_supports_bbml3.
The ARM64 capability name is also renamed accordingly.
- As BBML2_NOABORT or the equivalent BBML3 is the
kernel requirement for setting up linear map with
block/contpte mappings and not BBML2, replace all
bbml2 references with bbml3.
FEAT_BBML3, is introduced as part of 2025 Architecture Extensions.
https://developer.arm.com/documentation/109697/2026_03/2025-Architecture-Extensions
No functional changes are introduced with this patch.
Signed-off-by: Linu Cherian <linu.cherian@arm.com>
---
arch/arm64/include/asm/cpufeature.h | 6 ++--
arch/arm64/kernel/cpufeature.c | 30 +++++------------
arch/arm64/mm/contpte.c | 21 +++++-------
arch/arm64/mm/mmu.c | 52 ++++++++++++++---------------
arch/arm64/mm/proc.S | 4 +--
arch/arm64/tools/cpucaps | 2 +-
6 files changed, 49 insertions(+), 66 deletions(-)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index a57870fa96db..d90040fb9de6 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -878,11 +878,11 @@ static inline bool system_supports_pmuv3(void)
return cpus_have_final_cap(ARM64_HAS_PMUV3);
}
-bool cpu_supports_bbml2_noabort(void);
+bool cpu_supports_bbml3(void);
-static inline bool system_supports_bbml2_noabort(void)
+static inline bool system_supports_bbml3(void)
{
- return alternative_has_cap_unlikely(ARM64_HAS_BBML2_NOABORT);
+ return alternative_has_cap_unlikely(ARM64_HAS_BBML3);
}
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9a22df0c5120..9986eb7b379c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2131,21 +2131,10 @@ static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
-bool cpu_supports_bbml2_noabort(void)
+bool cpu_supports_bbml3(void)
{
- /*
- * We want to allow usage of BBML2 in as wide a range of kernel contexts
- * as possible. This list is therefore an allow-list of known-good
- * implementations that both support BBML2 and additionally, fulfill the
- * extra constraint of never generating TLB conflict aborts when using
- * the relaxed BBML2 semantics (such aborts make use of BBML2 in certain
- * kernel contexts difficult to prove safe against recursive aborts).
- *
- * Note that implementations can only be considered "known-good" if their
- * implementors attest to the fact that the implementation never raises
- * TLB conflict aborts for BBML2 mapping granularity changes.
- */
- static const struct midr_range supports_bbml2_noabort_list[] = {
+ /* CPUs that support BBML3 but dont advertise through MMFR2 ID */
+ static const struct midr_range supports_bbml3_list[] = {
MIDR_REV_RANGE(MIDR_CORTEX_X4, 0, 3, 0xf),
MIDR_REV_RANGE(MIDR_NEOVERSE_V3, 0, 2, 0xf),
MIDR_REV_RANGE(MIDR_NEOVERSE_V3AE, 0, 2, 0xf),
@@ -2155,8 +2144,7 @@ bool cpu_supports_bbml2_noabort(void)
{}
};
- /* Does our cpu guarantee to never raise TLB conflict aborts? */
- if (!is_midr_in_range_list(supports_bbml2_noabort_list))
+ if (!is_midr_in_range_list(supports_bbml3_list))
return false;
/*
@@ -2167,9 +2155,9 @@ bool cpu_supports_bbml2_noabort(void)
return true;
}
-static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
+static bool has_bbml3(const struct arm64_cpu_capabilities *caps, int scope)
{
- return cpu_supports_bbml2_noabort();
+ return cpu_supports_bbml3();
}
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
@@ -3062,10 +3050,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
},
{
- .desc = "BBM Level 2 without TLB conflict abort",
- .capability = ARM64_HAS_BBML2_NOABORT,
+ .desc = "BBM Level 3",
+ .capability = ARM64_HAS_BBML3,
.type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
- .matches = has_bbml2_noabort,
+ .matches = has_bbml3,
},
{
.desc = "52-bit Virtual Addressing for KVM (LPA2)",
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index 2de12656b4d8..0acab179fc1a 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -89,7 +89,7 @@ static void contpte_convert(struct mm_struct *mm, unsigned long addr,
}
/*
- * On eliding the __tlb_flush_range() under BBML2+noabort:
+ * On eliding the __tlb_flush_range() under BBML3:
*
* NOTE: Instead of using N=16 as the contiguous block length, we use
* N=4 for clarity.
@@ -135,7 +135,7 @@ static void contpte_convert(struct mm_struct *mm, unsigned long addr,
* contiguous TLB entry, which is a micro-optimisation opportunity,
* but does not affect correctness.
*
- * In the BBML2 case, the change is avoiding the intermediate tlbi+dsb.
+ * In the BBML3 case, the change is avoiding the intermediate tlbi+dsb.
* This means a few things, but notably other PEs will still "see" any
* stale cached TLB entries. This could lead to a "contiguous bit
* misprogramming" issue until the final tlbi+dsb of the changed page,
@@ -158,21 +158,16 @@ static void contpte_convert(struct mm_struct *mm, unsigned long addr,
* are present, and a write is made to this address, do we fault or
* is the write permitted (via amalgamation)?
*
- * The relevant Arm ARM DDI 0487L.a requirements are RNGLXZ and RJQQTC,
- * and together state that when BBML1 or BBML2 are implemented, either
- * a TLB conflict abort is raised (which we expressly forbid), or will
- * "produce an OA, access permissions, and memory attributes that are
- * consistent with any of the programmed translation table values".
- *
- * That is to say, will either raise a TLB conflict, or produce one of
- * the cached TLB entries, but never amalgamate.
+ * With BBML3 implemented, no TLB conflict abort is raised and the OA,
+ * access permissions and memory attributes produced is one of the cached
+ * TLB entries, but never amalgamate.
*
* Thus, as the page tables are only considered "consistent" after
* the final tlbi+dsb (which evicts both the single stale (RW,n) TLB
* entry as well as the new contiguous (RO,c) TLB entry), omitting the
* initial tlbi+dsb is correct.
*
- * It is also important to note that at the end of the BBML2 folding
+ * It is also important to note that at the end of the BBML3 folding
* case, we are still left with potentially all N TLB entries still
* cached (the N-1 non-contiguous ptes, and the single contiguous
* block). However, over time, natural TLB pressure will cause the
@@ -214,7 +209,7 @@ static void contpte_convert(struct mm_struct *mm, unsigned long addr,
*
* |____| <--- tlbi + dsb
*
- * For BBML2, we again remove the intermediate tlbi+dsb. Here, there
+ * For BBML3, we again remove the intermediate tlbi+dsb. Here, there
* are no issues, as the final tlbi+dsb covering the changed page is
* guaranteed to remove the original large contiguous (RW,c) TLB entry,
* as well as the intermediate (RW,n) TLB entry; the next access will
@@ -224,7 +219,7 @@ static void contpte_convert(struct mm_struct *mm, unsigned long addr,
* regardless.
*/
- if (!system_supports_bbml2_noabort())
+ if (!system_supports_bbml3())
__flush_tlb_range(&vma, start_addr, addr, PAGE_SIZE, 3,
TLBF_NOWALKCACHE);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f2be501468ce..d94a049480b1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -779,18 +779,18 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
static inline bool force_pte_mapping(void)
{
- const bool bbml2 = system_capabilities_finalized() ?
- system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
+ const bool bbml3 = system_capabilities_finalized() ?
+ system_supports_bbml3() : cpu_supports_bbml3();
if (debug_pagealloc_enabled())
return true;
- if (bbml2)
+ if (bbml3)
return false;
return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world();
}
static DEFINE_MUTEX(pgtable_split_lock);
-static bool linear_map_requires_bbml2;
+static bool linear_map_requires_bbml3;
int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
{
@@ -803,15 +803,15 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
* always pte-mapped), we must not go any further because taking the
* mutex below may sleep. Do not call force_pte_mapping() here because
* it could return a confusing result if called from a secondary cpu
- * prior to finalizing caps. Instead, linear_map_requires_bbml2 gives us
+ * prior to finalizing caps. Instead, linear_map_requires_bbml3 gives us
* what we need.
*/
- if (!linear_map_requires_bbml2 || is_kfence_address((void *)start))
+ if (!linear_map_requires_bbml3 || is_kfence_address((void *)start))
return 0;
- if (!system_supports_bbml2_noabort()) {
+ if (!system_supports_bbml3()) {
/*
- * !BBML2_NOABORT systems should not be trying to change
+ * BBML3 systems should not be trying to change
* permissions on anything that is not pte-mapped in the first
* place. Just return early and let the permission change code
* raise a warning if not already pte-mapped.
@@ -828,7 +828,7 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
/*
* Boot-time: Started secondary cpus but don't know if they
- * support BBML2_NOABORT yet. Can't allow splitting in this
+ * support BBML3 yet. Can't allow splitting in this
* window in case they don't.
*/
if (WARN_ON(num_online_cpus() > 1))
@@ -934,11 +934,11 @@ static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp
return ret;
}
-u32 idmap_kpti_bbml2_flag;
+u32 idmap_kpti_bbml3_flag;
-static void __init init_idmap_kpti_bbml2_flag(void)
+static void __init init_idmap_kpti_bbml3_flag(void)
{
- WRITE_ONCE(idmap_kpti_bbml2_flag, 1);
+ WRITE_ONCE(idmap_kpti_bbml3_flag, 1);
/* Must be visible to other CPUs before stop_machine() is called. */
smp_mb();
}
@@ -947,7 +947,7 @@ static int __init linear_map_split_to_ptes(void *__unused)
{
/*
* Repainting the linear map must be done by CPU0 (the boot CPU) because
- * that's the only CPU that we know supports BBML2. The other CPUs will
+ * that's the only CPU that we know supports BBML3. The other CPUs will
* be held in a waiting area with the idmap active.
*/
if (!smp_processor_id()) {
@@ -960,7 +960,7 @@ static int __init linear_map_split_to_ptes(void *__unused)
/*
* Wait for all secondary CPUs to be put into the waiting area.
*/
- smp_cond_load_acquire(&idmap_kpti_bbml2_flag, VAL == num_online_cpus());
+ smp_cond_load_acquire(&idmap_kpti_bbml3_flag, VAL == num_online_cpus());
/*
* Walk all of the linear map [lstart, lend), except the kernel
@@ -979,7 +979,7 @@ static int __init linear_map_split_to_ptes(void *__unused)
* Relies on dsb in flush_tlb_kernel_range() to avoid reordering
* before any page table split operations.
*/
- WRITE_ONCE(idmap_kpti_bbml2_flag, 0);
+ WRITE_ONCE(idmap_kpti_bbml3_flag, 0);
} else {
typedef void (wait_split_fn)(void);
extern wait_split_fn wait_linear_map_split_to_ptes;
@@ -988,7 +988,7 @@ static int __init linear_map_split_to_ptes(void *__unused)
wait_fn = (void *)__pa_symbol(wait_linear_map_split_to_ptes);
/*
- * At least one secondary CPU doesn't support BBML2 so cannot
+ * At least one secondary CPU doesn't support BBML3 so cannot
* tolerate the size of the live mappings changing. So have the
* secondary CPUs wait for the boot CPU to make the changes
* with the idmap active and init_mm inactive.
@@ -1003,8 +1003,8 @@ static int __init linear_map_split_to_ptes(void *__unused)
void __init linear_map_maybe_split_to_ptes(void)
{
- if (linear_map_requires_bbml2 && !system_supports_bbml2_noabort()) {
- init_idmap_kpti_bbml2_flag();
+ if (linear_map_requires_bbml3 && !system_supports_bbml3()) {
+ init_idmap_kpti_bbml3_flag();
stop_machine(linear_map_split_to_ptes, NULL, cpu_online_mask);
}
}
@@ -1127,7 +1127,7 @@ bool arch_kfence_init_pool(void)
mutex_unlock(&pgtable_split_lock);
/*
- * Since the system supports bbml2_noabort, tlb invalidation is not
+ * Since the system supports bbml3, tlb invalidation is not
* required here; the pgtable mappings have been split to pte but larger
* entries may safely linger in the TLB.
*/
@@ -1166,7 +1166,7 @@ static void __init map_mem(void)
arm64_kfence_map_pool();
- linear_map_requires_bbml2 = !force_pte_mapping() && can_set_direct_map();
+ linear_map_requires_bbml3 = !force_pte_mapping() && can_set_direct_map();
if (force_pte_mapping())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
@@ -1333,7 +1333,7 @@ void __init kpti_install_ng_mappings(void)
if (arm64_use_ng_mappings)
return;
- init_idmap_kpti_bbml2_flag();
+ init_idmap_kpti_bbml3_flag();
stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
}
@@ -1394,7 +1394,7 @@ void __pi_map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
u64 va_offset);
static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
- kpti_bbml2_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
+ kpti_bbml3_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
static void __init create_idmap(void)
{
@@ -1406,17 +1406,17 @@ static void __init create_idmap(void)
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
- if (linear_map_requires_bbml2 ||
+ if (linear_map_requires_bbml3 ||
(IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings)) {
- phys_addr_t pa = __pa_symbol(&idmap_kpti_bbml2_flag);
+ phys_addr_t pa = __pa_symbol(&idmap_kpti_bbml3_flag);
/*
* The KPTI G-to-nG conversion code needs a read-write mapping
* of its synchronization flag in the ID map. This is also used
* when splitting the linear map to ptes if a secondary CPU
- * doesn't support bbml2.
+ * doesn't support bbml3.
*/
- ptep = __pa_symbol(kpti_bbml2_ptes);
+ ptep = __pa_symbol(kpti_bbml3_ptes);
__pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 22866b49be37..f4e4e71a0ea8 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -287,7 +287,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
mov x5, x3 // preserve temp_pte arg
mrs swapper_ttb, ttbr1_el1
- adr_l flag_ptr, idmap_kpti_bbml2_flag
+ adr_l flag_ptr, idmap_kpti_bbml3_flag
cbnz cpu, __idmap_kpti_secondary
@@ -445,7 +445,7 @@ SYM_TYPED_FUNC_START(wait_linear_map_split_to_ptes)
flag_ptr .req x4
mrs swapper_ttb, ttbr1_el1
- adr_l flag_ptr, idmap_kpti_bbml2_flag
+ adr_l flag_ptr, idmap_kpti_bbml3_flag
__idmap_cpu_set_reserved_ttbr1 x16, x17
scondary_cpu_wait:
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 9b85a84f6fd4..c05371365d14 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -14,6 +14,7 @@ HAS_ADDRESS_AUTH_ARCH_QARMA5
HAS_ADDRESS_AUTH_IMP_DEF
HAS_AMU_EXTN
HAS_ARMv8_4_TTL
+HAS_BBML3
HAS_CACHE_DIC
HAS_CACHE_IDC
HAS_CNP
@@ -51,7 +52,6 @@ HAS_LS64_V
HAS_LSUI
HAS_MOPS
HAS_NESTED_VIRT
-HAS_BBML2_NOABORT
HAS_PAN
HAS_PMUV3
HAS_S1PIE
--
2.43.0
next prev parent reply other threads:[~2026-07-01 9:42 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-07-01 9:41 [PATCH 0/5] Add BBML3 cpu feature Linu Cherian
2026-07-01 9:41 ` Linu Cherian [this message]
2026-07-01 9:41 ` [PATCH 2/5] arm64: cpufeature: Detect BBML3 based on MMFR2 ID Linu Cherian
2026-07-02 10:45 ` Mark Rutland
2026-07-01 9:41 ` [PATCH 3/5] arm64: cputype: Add Cortex-A520AE definitions Linu Cherian
2026-07-01 9:41 ` [PATCH 4/5] arm64: cputype: Add C1-Nano definitions Linu Cherian
2026-07-01 9:41 ` [PATCH 5/5] arm64: cpufeature: Extend bbml3 support list Linu Cherian
2026-07-02 10:47 ` Mark Rutland
2026-07-01 10:11 ` [PATCH 0/5] Add BBML3 cpu feature Suzuki K Poulose
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260701094131.677636-2-linu.cherian@arm.com \
--to=linu.cherian@arm.com \
--cc=anshuman.khandual@arm.com \
--cc=catalin.marinas@arm.com \
--cc=kevin.brodsky@arm.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=ryan.roberts@arm.com \
--cc=suzuki.poulose@arm.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox