* [PATCH 5.10 223/342] arm64/mm: Enable batched TLB flush in unmap_hotplug_range()
From: Greg Kroah-Hartman @ 2026-06-16 14:58 UTC (permalink / raw)
To: stable
Cc: Greg Kroah-Hartman, patches, Will Deacon, linux-arm-kernel,
linux-kernel, David Hildenbrand (Arm), Ryan Roberts,
Anshuman Khandual, Catalin Marinas, Sasha Levin
In-Reply-To: <20260616145048.348037099@linuxfoundation.org>
5.10-stable review patch. If anyone has any objections, please let me know.
------------------
From: Anshuman Khandual <anshuman.khandual@arm.com>
[ Upstream commit 48478b9f791376b4b89018d7afdfd06865498f65 ]
During a memory hot remove operation, both linear and vmemmap mappings for
the memory range being removed, get unmapped via unmap_hotplug_range() but
mapped pages get freed only for vmemmap mapping. This is just a sequential
operation where each table entry gets cleared, followed by a leaf specific
TLB flush, and then followed by memory free operation when applicable.
This approach was simple and uniform both for vmemmap and linear mappings.
But linear mapping might contain CONT marked block memory where it becomes
necessary to first clear out all entire in the range before a TLB flush.
This is as per the architecture requirement. Hence batch all TLB flushes
during the table tear down walk and finally do it in unmap_hotplug_range().
Prior to this fix, it was hypothetically possible for a speculative access
to a higher address in the contiguous block to fill the TLB with shattered
entries for the entire contiguous range after a lower address had already
been cleared and invalidated. Due to the table entries being shattered, the
subsequent TLB invalidation for the higher address would not then clear the
TLB entries for the lower address, meaning stale TLB entries could persist.
Besides it also helps in improving the performance via TLBI range operation
along with reduced synchronization instructions. The time spent executing
unmap_hotplug_range() improved 97% measured over a 2GB memory hot removal
in KVM guest.
This scheme is not applicable during vmemmap mapping tear down where memory
needs to be freed and hence a TLB flush is required after clearing out page
table entry.
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Closes: https://lore.kernel.org/all/aWZYXhrT6D2M-7-N@willie-the-truck/
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Cc: stable@vger.kernel.org
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[ renamed `__pte_clear()` to `pte_clear()` and inlined `pmd_cont(pmd)` as `pmd_val(pmd) & PMD_SECT_CONT` ]
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
arch/arm64/mm/mmu.c | 36 ++++++++++++++++++++----------------
1 file changed, 20 insertions(+), 16 deletions(-)
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -862,10 +862,14 @@ static void unmap_hotplug_pte_range(pmd_
WARN_ON(!pte_present(pte));
pte_clear(&init_mm, addr, ptep);
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ /* CONT blocks are not supported in the vmemmap */
+ WARN_ON(pte_cont(pte));
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
free_hotplug_page_range(pte_page(pte),
PAGE_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
} while (addr += PAGE_SIZE, addr < end);
}
@@ -886,15 +890,14 @@ static void unmap_hotplug_pmd_range(pud_
WARN_ON(!pmd_present(pmd));
if (pmd_sect(pmd)) {
pmd_clear(pmdp);
-
- /*
- * One TLBI should be sufficient here as the PMD_SIZE
- * range is mapped with a single block entry.
- */
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ /* CONT blocks are not supported in the vmemmap */
+ WARN_ON(pmd_val(pmd) & PMD_SECT_CONT);
+ flush_tlb_kernel_range(addr, addr + PMD_SIZE);
free_hotplug_page_range(pmd_page(pmd),
PMD_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
continue;
}
WARN_ON(!pmd_table(pmd));
@@ -919,15 +922,12 @@ static void unmap_hotplug_pud_range(p4d_
WARN_ON(!pud_present(pud));
if (pud_sect(pud)) {
pud_clear(pudp);
-
- /*
- * One TLBI should be sufficient here as the PUD_SIZE
- * range is mapped with a single block entry.
- */
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ flush_tlb_kernel_range(addr, addr + PUD_SIZE);
free_hotplug_page_range(pud_page(pud),
PUD_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
continue;
}
WARN_ON(!pud_table(pud));
@@ -957,6 +957,7 @@ static void unmap_hotplug_p4d_range(pgd_
static void unmap_hotplug_range(unsigned long addr, unsigned long end,
bool free_mapped, struct vmem_altmap *altmap)
{
+ unsigned long start = addr;
unsigned long next;
pgd_t *pgdp, pgd;
@@ -978,6 +979,9 @@ static void unmap_hotplug_range(unsigned
WARN_ON(!pgd_present(pgd));
unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped, altmap);
} while (addr = next, addr < end);
+
+ if (!free_mapped)
+ flush_tlb_kernel_range(start, end);
}
static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
^ permalink raw reply
* Re: [PATCH v7 9/9] arm64: dts: mediatek: Add MediaTek MT6392 PMIC dtsi
From: Rob Herring @ 2026-06-16 18:57 UTC (permalink / raw)
To: Luca Leonardo Scorcia
Cc: linux-mediatek, Val Packett, Dmitry Torokhov, Krzysztof Kozlowski,
Conor Dooley, Sen Chu, Sean Wang, Macpaul Lin, Lee Jones,
Matthias Brugger, AngeloGioacchino Del Regno, Liam Girdwood,
Mark Brown, Linus Walleij, Louis-Alexis Eyraud, Julien Massot,
Fabien Parent, Akari Tsuyukusa, Chen Zhong, linux-input,
devicetree, linux-kernel, linux-pm, linux-arm-kernel, linux-gpio
In-Reply-To: <CAORyz2LiMHnaTK6QnsLxJDtw0fZ_N9LELw0iCorOZwHuWXus0g@mail.gmail.com>
On Tue, Jun 16, 2026 at 10:32 AM Luca Leonardo Scorcia
<l.scorcia@gmail.com> wrote:
>
> > > arch/arm64/boot/dts/mediatek/mt6392.dtsi | 75 ++++++++++++++++++++++++
> >
> > Nothing is using this so it is a dead file that doesn't get tested.
>
> Hi, it's not referenced as the dtsi inclusion was removed in the
> original patch from 2019 for an easier merging of support for mt8516
> pumpkin boards [1][2].
> If you prefer in the next revision I can add another patch to readd it
> to the existing pumpkin board.
That or move this patch to the series for the board(s). If the board
is already upstream, then add the include in *this* patch.
Rob
^ permalink raw reply
* Re: [PATCH] arm64: futex: Consolidate 'old == new' check in __lsui_cmpxchg32()
From: Catalin Marinas @ 2026-06-16 18:36 UTC (permalink / raw)
To: Yeoreum Yun; +Cc: Will Deacon, linux-arm-kernel
In-Reply-To: <ajEW41-ncVO_nDY7@e129823.arm.com>
On Tue, Jun 16, 2026 at 10:26:59AM +0100, Yeoreum Yun wrote:
> > On Tue, May 19, 2026 at 04:09:02PM +0100, Catalin Marinas wrote:
> > > On Tue, May 19, 2026 at 10:08:22AM +0100, Will Deacon wrote:
> > > > The LSUI futex implementation relies on a cmpxchg() loop to implement
> > > > FUTEX_OP_XOR, as the architecture doesn't provide unprivileged *EOR
> > > > atomics. Since the unprivileged 'CAST' instructions used to implement
> > > > the cmpxchg() can only operate on 64-bit memory locations, the
> > > > __lsui_cmpxchg32() helper function performs a song and dance to marshall
> > > > the 32-bit futex value into the correct part of a 64-bit register and
> > > > fill the remaining bytes with the neighbouring data.
> > >
> > > IIRC, the reason for the current __lsui_cmpxchg32() was not EOR but the
> > > expected futex_atomic_cmpxchg_inatomic() semantics. Looking at it again,
> > > we have wake_futex_pi() that does something else if the ret is 0 but the
> > > value differs. Looking at it again, the caller of wake_futex_pi()
> > > retries on -EAGAIN anyway, so I don't see a correctness issue, it will
> > > eventually hit the condition.
> >
> > Hmm, but I think that means my patch does change the behaviour of
> > wake_futex_pi() in an undesirable way. For example, futex_unlock_pi()
> > will go round the retry loop for any change in the futex value, whereas
> > before we would go back to userspace only if the TID changed.
> >
> > So I think we should swallow the -EAGAIN for the CAS-based cmpxchg() if
> > the futex word has changed, along the lines of the diff below.
> >
> > Will
> >
> > --->8
> >
> > diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
> > index db84a7b2de74..79c6d86c38a9 100644
> > --- a/arch/arm64/include/asm/futex.h
> > +++ b/arch/arm64/include/asm/futex.h
> > @@ -215,14 +215,14 @@ __lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
> > static __always_inline int
> > __lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
> > {
> > + u32 curval = oldval;
> > int ret;
> >
> > - /*
> > - * Callers of futex_atomic_cmpxchg_inatomic() already retry on
> > - * -EAGAIN, no need for another loop of max retries.
> > - */
> > - ret = __lsui_cmpxchg32(uaddr, &oldval, newval);
> > - *oval = oldval;
> > + ret = __lsui_cmpxchg32(uaddr, &curval, newval);
> > + if (ret == -EAGAIN && curval != oldval)
> > + ret = 0;
> > +
> > + *oval = curval;
> > return ret;
> > }
> > #endif /* CONFIG_ARM64_LSUI */
>
> Agree. It is good that this additional patch does not change
> the existing behavior.
>
> @Catalin, Could you check this please?
Ah, yes, looks good to me. I completely forgot about this.
--
Catalin
^ permalink raw reply
* [PATCH 5.15 259/411] arm64/mm: Enable batched TLB flush in unmap_hotplug_range()
From: Greg Kroah-Hartman @ 2026-06-16 14:58 UTC (permalink / raw)
To: stable
Cc: Greg Kroah-Hartman, patches, Will Deacon, linux-arm-kernel,
linux-kernel, David Hildenbrand (Arm), Ryan Roberts,
Anshuman Khandual, Catalin Marinas, Sasha Levin
In-Reply-To: <20260616145100.376842714@linuxfoundation.org>
5.15-stable review patch. If anyone has any objections, please let me know.
------------------
From: Anshuman Khandual <anshuman.khandual@arm.com>
[ Upstream commit 48478b9f791376b4b89018d7afdfd06865498f65 ]
During a memory hot remove operation, both linear and vmemmap mappings for
the memory range being removed, get unmapped via unmap_hotplug_range() but
mapped pages get freed only for vmemmap mapping. This is just a sequential
operation where each table entry gets cleared, followed by a leaf specific
TLB flush, and then followed by memory free operation when applicable.
This approach was simple and uniform both for vmemmap and linear mappings.
But linear mapping might contain CONT marked block memory where it becomes
necessary to first clear out all entire in the range before a TLB flush.
This is as per the architecture requirement. Hence batch all TLB flushes
during the table tear down walk and finally do it in unmap_hotplug_range().
Prior to this fix, it was hypothetically possible for a speculative access
to a higher address in the contiguous block to fill the TLB with shattered
entries for the entire contiguous range after a lower address had already
been cleared and invalidated. Due to the table entries being shattered, the
subsequent TLB invalidation for the higher address would not then clear the
TLB entries for the lower address, meaning stale TLB entries could persist.
Besides it also helps in improving the performance via TLBI range operation
along with reduced synchronization instructions. The time spent executing
unmap_hotplug_range() improved 97% measured over a 2GB memory hot removal
in KVM guest.
This scheme is not applicable during vmemmap mapping tear down where memory
needs to be freed and hence a TLB flush is required after clearing out page
table entry.
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Closes: https://lore.kernel.org/all/aWZYXhrT6D2M-7-N@willie-the-truck/
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Cc: stable@vger.kernel.org
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[ replaced `__pte_clear()` with `pte_clear()` ]
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
arch/arm64/mm/mmu.c | 36 ++++++++++++++++++++----------------
1 file changed, 20 insertions(+), 16 deletions(-)
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -886,10 +886,14 @@ static void unmap_hotplug_pte_range(pmd_
WARN_ON(!pte_present(pte));
pte_clear(&init_mm, addr, ptep);
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ /* CONT blocks are not supported in the vmemmap */
+ WARN_ON(pte_cont(pte));
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
free_hotplug_page_range(pte_page(pte),
PAGE_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
} while (addr += PAGE_SIZE, addr < end);
}
@@ -910,15 +914,14 @@ static void unmap_hotplug_pmd_range(pud_
WARN_ON(!pmd_present(pmd));
if (pmd_sect(pmd)) {
pmd_clear(pmdp);
-
- /*
- * One TLBI should be sufficient here as the PMD_SIZE
- * range is mapped with a single block entry.
- */
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ /* CONT blocks are not supported in the vmemmap */
+ WARN_ON(pmd_cont(pmd));
+ flush_tlb_kernel_range(addr, addr + PMD_SIZE);
free_hotplug_page_range(pmd_page(pmd),
PMD_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
continue;
}
WARN_ON(!pmd_table(pmd));
@@ -943,15 +946,12 @@ static void unmap_hotplug_pud_range(p4d_
WARN_ON(!pud_present(pud));
if (pud_sect(pud)) {
pud_clear(pudp);
-
- /*
- * One TLBI should be sufficient here as the PUD_SIZE
- * range is mapped with a single block entry.
- */
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ flush_tlb_kernel_range(addr, addr + PUD_SIZE);
free_hotplug_page_range(pud_page(pud),
PUD_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
continue;
}
WARN_ON(!pud_table(pud));
@@ -981,6 +981,7 @@ static void unmap_hotplug_p4d_range(pgd_
static void unmap_hotplug_range(unsigned long addr, unsigned long end,
bool free_mapped, struct vmem_altmap *altmap)
{
+ unsigned long start = addr;
unsigned long next;
pgd_t *pgdp, pgd;
@@ -1002,6 +1003,9 @@ static void unmap_hotplug_range(unsigned
WARN_ON(!pgd_present(pgd));
unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped, altmap);
} while (addr = next, addr < end);
+
+ if (!free_mapped)
+ flush_tlb_kernel_range(start, end);
}
static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
^ permalink raw reply
* [arm-platforms:kvm-arm64/nv3 37/37] arch/arm64/kvm/sys_regs.c:222:50: error: expected ';' before ':' token
From: kernel test robot @ 2026-06-16 18:10 UTC (permalink / raw)
To: Marc Zyngier; +Cc: oe-kbuild-all, linux-arm-kernel
tree: https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/nv3
head: aa9a6e84f564417704258a20210b95d18ebf5601
commit: aa9a6e84f564417704258a20210b95d18ebf5601 [37/37] WIP
config: arm64-defconfig (https://download.01.org/0day-ci/archive/20260617/202606170206.EV8DFnS1-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 16.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260617/202606170206.EV8DFnS1-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202606170206.EV8DFnS1-lkp@intel.com/
All errors (new ones prefixed by >>):
arch/arm64/kvm/sys_regs.c: In function 'locate_register':
>> arch/arm64/kvm/sys_regs.c:222:50: error: expected ';' before ':' token
222 | loc->loc = SR_LOC_SPECIAL : SR_LOC_MEMORY;
| ^~
| ;
vim +222 arch/arm64/kvm/sys_regs.c
168
169 #define MAPPED_EL2_SYSREG(r, m, t) \
170 case r: { \
171 locate_mapped_el2_register(vcpu, r, m, t, loc); \
172 break; \
173 }
174
175 static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg,
176 struct sr_loc *loc)
177 {
178 if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) {
179 loc->loc = SR_LOC_MEMORY;
180 return;
181 }
182
183 switch (reg) {
184 MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
185 translate_sctlr_el2_to_sctlr_el1 );
186 MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1,
187 translate_ttbr0_el2_to_ttbr0_el1 );
188 MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1, NULL );
189 MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1,
190 translate_tcr_el2_to_tcr_el1 );
191 MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1, NULL );
192 MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1, NULL );
193 MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1, NULL );
194 MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL );
195 MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL );
196 MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL );
197 MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1, NULL );
198 MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1, NULL );
199 MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1, NULL );
200 MAPPED_EL2_SYSREG(POR_EL2, POR_EL1, NULL );
201 MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL );
202 MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL );
203 MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
204 MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
205 MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL );
206 case CNTHCTL_EL2:
207 /* CNTHCTL_EL2 is super special, unless we support NV2p1 */
208 loc->loc = (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu) ?
209 SR_LOC_SPECIAL : SR_LOC_MEMORY);
210 break;
211 case CPTR_EL2:
212 /*
213 * CPTR_EL2 is just as special, and needs a certain amount
214 * of handholding. It always lives in memory, due to being
215 * heavily trapped thanks to CPACR_EL1.TCPAC being RES0.
216 * FEAT_NV2p1 fixes this.
217 */
218 locate_mapped_el2_register(vcpu, CPTR_EL2, CPACR_EL1,
219 translate_cptr_el2_to_cpacr_el1,
220 loc);
221 if (is_hyp_ctxt(vcpu))
> 222 loc->loc = SR_LOC_SPECIAL : SR_LOC_MEMORY;
223 break;
224 default:
225 loc->loc = locate_direct_register(vcpu, reg);
226 }
227 }
228
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply
* [PATCH 0/2] firmware: arm_scmi: Ensure automatic module loading
From: Bjorn Andersson @ 2026-06-16 18:09 UTC (permalink / raw)
To: Sudeep Holla, Cristian Marussi, Nathan Chancellor, Nicolas Schier
Cc: arm-scmi, linux-arm-kernel, linux-kernel, linux-kbuild,
Hans de Goede, Bjorn Andersson
SCMI drivers such as the Arm SCMI CPUfreq driver are allowed to built as
modules, but they are then not automatically loaded. Rework the SCMI
device table alias support to make modpost consume the information from
MODULE_DEVICE_TABLE(scmi, ...) and allow drivers to be loaded based on
this information, if known. Also add a protocol-based alias to also
trigger driver loading when only the SCMI protocol id is known.
Signed-off-by: Bjorn Andersson <bjorn.andersson@oss.qualcomm.com>
---
Bjorn Andersson (2):
module: add SCMI device table alias support
firmware: arm_scmi: request modules for discovered protocols
drivers/firmware/arm_scmi/bus.c | 19 +++++++++----------
drivers/firmware/arm_scmi/driver.c | 2 ++
include/linux/mod_devicetable.h | 12 ++++++++++++
include/linux/scmi_protocol.h | 6 +-----
scripts/mod/devicetable-offsets.c | 4 ++++
scripts/mod/file2alias.c | 13 +++++++++++++
6 files changed, 41 insertions(+), 15 deletions(-)
---
base-commit: 8d6dbbbe3ba62de0a63e962ee004afb848c8e3ac
change-id: 20260616-scmi-modalias-0f32421bd452
Best regards,
--
Bjorn Andersson <bjorn.andersson@oss.qualcomm.com>
^ permalink raw reply
* [PATCH 2/2] firmware: arm_scmi: request modules for discovered protocols
From: Bjorn Andersson @ 2026-06-16 18:09 UTC (permalink / raw)
To: Sudeep Holla, Cristian Marussi, Nathan Chancellor, Nicolas Schier
Cc: arm-scmi, linux-arm-kernel, linux-kernel, linux-kbuild,
Hans de Goede, Bjorn Andersson
In-Reply-To: <20260616-scmi-modalias-v1-0-662b8dd52ab2@oss.qualcomm.com>
SCMI client devices are created from SCMI driver id tables. If such a
driver is modular, the core does not know the driver's client name until
the module has already loaded, so normal device uevent based autoloading
cannot break the dependency cycle.
Emit a protocol-level alias for each SCMI device id table entry and
request that alias when the SCMI core discovers an implemented protocol.
This loads modules that have registered interest in the protocol; their
normal SCMI driver registration then requests the concrete client device
and the SCMI bus matches it by protocol and name.
This allows e.g. ARM_SCMI_CPUFREQ=m to autoload on systems that expose
only the SCMI Performance protocol node, where the cpufreq client name
is Linux-internal and not available from firmware before loading the
module.
Assisted-by: Codex:GPT-5.5
Signed-off-by: Bjorn Andersson <bjorn.andersson@oss.qualcomm.com>
---
drivers/firmware/arm_scmi/driver.c | 2 ++
include/linux/mod_devicetable.h | 1 +
scripts/mod/file2alias.c | 4 +++-
3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 3e0d975ec94c..8538eedc7c3a 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -47,6 +47,7 @@
#include <trace/events/scmi.h>
#define SCMI_VENDOR_MODULE_ALIAS_FMT "scmi-protocol-0x%02x-%s"
+#define SCMI_MODULE_ALIAS_FMT SCMI_PROTOCOL_MODULE_PREFIX "0x%02x"
static DEFINE_IDA(scmi_id);
@@ -3362,6 +3363,7 @@ static int scmi_probe(struct platform_device *pdev)
}
of_node_get(child);
+ request_module(SCMI_MODULE_ALIAS_FMT, prot_id);
scmi_create_protocol_devices(child, info, prot_id, NULL);
}
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 769382f2eadd..2cc7e78e35a3 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -477,6 +477,7 @@ struct rpmsg_device_id {
#define SCMI_NAME_SIZE 32
#define SCMI_MODULE_PREFIX "scmi:"
+#define SCMI_PROTOCOL_MODULE_PREFIX "scmi-protocol-"
struct scmi_device_id {
__u8 protocol_id;
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index a5283f4c8e6f..40a37b6bf1ad 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -852,7 +852,7 @@ static void do_rpmsg_entry(struct module *mod, void *symval)
module_alias_printf(mod, false, RPMSG_DEVICE_MODALIAS_FMT, *name);
}
-/* Looks like: scmi:NN:S */
+/* Looks like: scmi:NN:S and scmi-protocol-0xNN */
static void do_scmi_entry(struct module *mod, void *symval)
{
DEF_FIELD(symval, scmi_device_id, protocol_id);
@@ -860,6 +860,8 @@ static void do_scmi_entry(struct module *mod, void *symval)
module_alias_printf(mod, false, SCMI_MODULE_PREFIX "%02x:%s",
protocol_id, *name);
+ module_alias_printf(mod, false, SCMI_PROTOCOL_MODULE_PREFIX "0x%02x",
+ protocol_id);
}
/* Looks like: i2c:S */
--
2.53.0
^ permalink raw reply related
* [PATCH 1/2] module: add SCMI device table alias support
From: Bjorn Andersson @ 2026-06-16 18:09 UTC (permalink / raw)
To: Sudeep Holla, Cristian Marussi, Nathan Chancellor, Nicolas Schier
Cc: arm-scmi, linux-arm-kernel, linux-kernel, linux-kbuild,
Hans de Goede, Bjorn Andersson
In-Reply-To: <20260616-scmi-modalias-v1-0-662b8dd52ab2@oss.qualcomm.com>
SCMI client drivers already describe their bus match data with
MODULE_DEVICE_TABLE(scmi, ...), but modpost does not know how to consume
SCMI device tables. As a result, SCMI modules do not get generated module
aliases from their id tables.
Move struct scmi_device_id to mod_devicetable.h so it has a fixed layout
visible to modpost, add the corresponding generated offsets and teach
file2alias to emit scmi:<protocol>:<name> aliases.
Use the same stable alias format for SCMI device uevents and sysfs
modaliases. The previous string included the instance-specific device
name, which is not useful for matching modules.
Assisted-by: Codex:GPT-5.5
Signed-off-by: Bjorn Andersson <bjorn.andersson@oss.qualcomm.com>
---
drivers/firmware/arm_scmi/bus.c | 19 +++++++++----------
include/linux/mod_devicetable.h | 11 +++++++++++
include/linux/scmi_protocol.h | 6 +-----
scripts/mod/devicetable-offsets.c | 4 ++++
scripts/mod/file2alias.c | 11 +++++++++++
5 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c
index 793be9eabaed..7e344f2ee18d 100644
--- a/drivers/firmware/arm_scmi/bus.c
+++ b/drivers/firmware/arm_scmi/bus.c
@@ -13,11 +13,12 @@
#include <linux/of.h>
#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include <linux/device.h>
#include "common.h"
-#define SCMI_UEVENT_MODALIAS_FMT "%s:%02x:%s"
+#define SCMI_UEVENT_MODALIAS_FMT SCMI_MODULE_PREFIX "%02x:%s"
BLOCKING_NOTIFIER_HEAD(scmi_requested_devices_nh);
EXPORT_SYMBOL_GPL(scmi_requested_devices_nh);
@@ -141,7 +142,7 @@ static int scmi_protocol_table_register(const struct scmi_device_id *id_table)
int ret = 0;
const struct scmi_device_id *entry;
- for (entry = id_table; entry->name && ret == 0; entry++)
+ for (entry = id_table; entry->name[0] && ret == 0; entry++)
ret = scmi_protocol_device_request(entry);
return ret;
@@ -197,18 +198,18 @@ scmi_protocol_table_unregister(const struct scmi_device_id *id_table)
{
const struct scmi_device_id *entry;
- for (entry = id_table; entry->name; entry++)
+ for (entry = id_table; entry->name[0]; entry++)
scmi_protocol_device_unrequest(entry);
}
static int scmi_dev_match_by_id_table(struct scmi_device *scmi_dev,
const struct scmi_device_id *id_table)
{
- if (!id_table || !id_table->name)
+ if (!id_table || !id_table->name[0])
return 0;
/* Always skip transport devices from matching */
- for (; id_table->protocol_id && id_table->name; id_table++)
+ for (; id_table->protocol_id && id_table->name[0]; id_table++)
if (id_table->protocol_id == scmi_dev->protocol_id &&
strncmp(scmi_dev->name, "__scmi_transport_device", 23) &&
!strcmp(id_table->name, scmi_dev->name))
@@ -245,7 +246,7 @@ static struct scmi_device *scmi_child_dev_find(struct device *parent,
struct device *dev;
id_table[0].protocol_id = prot_id;
- id_table[0].name = name;
+ strscpy(id_table[0].name, name, sizeof(id_table[0].name));
dev = device_find_child(parent, &id_table, scmi_match_by_id_table);
if (!dev)
@@ -282,8 +283,7 @@ static int scmi_device_uevent(const struct device *dev, struct kobj_uevent_env *
const struct scmi_device *scmi_dev = to_scmi_dev(dev);
return add_uevent_var(env, "MODALIAS=" SCMI_UEVENT_MODALIAS_FMT,
- dev_name(&scmi_dev->dev), scmi_dev->protocol_id,
- scmi_dev->name);
+ scmi_dev->protocol_id, scmi_dev->name);
}
static ssize_t modalias_show(struct device *dev,
@@ -292,8 +292,7 @@ static ssize_t modalias_show(struct device *dev,
struct scmi_device *scmi_dev = to_scmi_dev(dev);
return sysfs_emit(buf, SCMI_UEVENT_MODALIAS_FMT,
- dev_name(&scmi_dev->dev), scmi_dev->protocol_id,
- scmi_dev->name);
+ scmi_dev->protocol_id, scmi_dev->name);
}
static DEVICE_ATTR_RO(modalias);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 3b0c9a251a2e..769382f2eadd 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -473,6 +473,17 @@ struct rpmsg_device_id {
kernel_ulong_t driver_data;
};
+/* scmi */
+
+#define SCMI_NAME_SIZE 32
+#define SCMI_MODULE_PREFIX "scmi:"
+
+struct scmi_device_id {
+ __u8 protocol_id;
+ char name[SCMI_NAME_SIZE];
+ kernel_ulong_t driver_data;
+};
+
/* i2c */
#define I2C_NAME_SIZE 20
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index 5ab73b1ab9aa..48b346a26068 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -10,6 +10,7 @@
#include <linux/bitfield.h>
#include <linux/device.h>
+#include <linux/mod_devicetable.h>
#include <linux/notifier.h>
#include <linux/types.h>
@@ -951,11 +952,6 @@ struct scmi_device {
#define to_scmi_dev(d) container_of_const(d, struct scmi_device, dev)
-struct scmi_device_id {
- u8 protocol_id;
- const char *name;
-};
-
struct scmi_driver {
const char *name;
int (*probe)(struct scmi_device *sdev);
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index b4178c42d08f..da5bd712c8da 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -144,6 +144,10 @@ int main(void)
DEVID(rpmsg_device_id);
DEVID_FIELD(rpmsg_device_id, name);
+ DEVID(scmi_device_id);
+ DEVID_FIELD(scmi_device_id, protocol_id);
+ DEVID_FIELD(scmi_device_id, name);
+
DEVID(i2c_device_id);
DEVID_FIELD(i2c_device_id, name);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 8d36c74dec2d..a5283f4c8e6f 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -852,6 +852,16 @@ static void do_rpmsg_entry(struct module *mod, void *symval)
module_alias_printf(mod, false, RPMSG_DEVICE_MODALIAS_FMT, *name);
}
+/* Looks like: scmi:NN:S */
+static void do_scmi_entry(struct module *mod, void *symval)
+{
+ DEF_FIELD(symval, scmi_device_id, protocol_id);
+ DEF_FIELD_ADDR(symval, scmi_device_id, name);
+
+ module_alias_printf(mod, false, SCMI_MODULE_PREFIX "%02x:%s",
+ protocol_id, *name);
+}
+
/* Looks like: i2c:S */
static void do_i2c_entry(struct module *mod, void *symval)
{
@@ -1491,6 +1501,7 @@ static const struct devtable devtable[] = {
{"virtio", SIZE_virtio_device_id, do_virtio_entry},
{"vmbus", SIZE_hv_vmbus_device_id, do_vmbus_entry},
{"rpmsg", SIZE_rpmsg_device_id, do_rpmsg_entry},
+ {"scmi", SIZE_scmi_device_id, do_scmi_entry},
{"i2c", SIZE_i2c_device_id, do_i2c_entry},
{"i3c", SIZE_i3c_device_id, do_i3c_entry},
{"slim", SIZE_slim_device_id, do_slim_entry},
--
2.53.0
^ permalink raw reply related
* Re: [PATCH 3/9] firmware: imx: ele: Add API functions for OCOTP fuse access
From: Frieder Schrempf @ 2026-06-16 17:59 UTC (permalink / raw)
To: Frank Li, Frieder Schrempf, Pankaj Gupta
Cc: Srinivas Kandagatla, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Frank Li, Sascha Hauer, Pengutronix Kernel Team,
Fabio Estevam, Shawn Guo, devicetree, imx, linux-arm-kernel,
linux-kernel
In-Reply-To: <ajFtkysqxuLV8GgF@SMW015318>
On 16.06.26 17:36, Frank Li wrote:
> On Tue, Jun 16, 2026 at 01:52:18PM +0200, Frieder Schrempf wrote:
>> From: Frieder Schrempf <frieder.schrempf@kontron.de>
>>
>> The ELE S400 API provides read and write access to the OCOTP fuse
>> registers. This adds the necessary API functions imx_se_read_fuse()
>> and imx_se_write_fuse() to be used by other drivers such as the
>> OCOTP S400 NVMEM driver.
>>
>> This is ported from the downstream vendor kernel.
>>
>> Signed-off-by: Frieder Schrempf <frieder.schrempf@kontron.de>
>> ---
>> drivers/firmware/imx/ele_base_msg.c | 122 ++++++++++++++++++++++++++++++++++++
>> drivers/firmware/imx/ele_base_msg.h | 6 ++
>> include/linux/firmware/imx/se_api.h | 3 +
>> 3 files changed, 131 insertions(+)
>>
> ...
>> +++ b/include/linux/firmware/imx/se_api.h
>> @@ -11,4 +11,7 @@
>> #define SOC_ID_OF_IMX8ULP 0x084d
>> #define SOC_ID_OF_IMX93 0x9300
>>
>> +int imx_se_read_fuse(void *se_if_data, uint16_t fuse_id, u32 *value);
>> +int imx_se_write_fuse(void *se_if_data, uint16_t fuse_id, u32 value);
>> +
>
> This API should implement in fuse drivers. Other consume should use standard
> fuse API to get value. If put here, it may bypass fuse driver.
The reason this is here, is the downstream implementation in linux-imx
and the current code organization. I thought there is some good reason
to have shared functions and it looks like Pankaj structured it like
this so all API functions live in ele_base_msg.c and the internal
structs and defines in ele_base_msg.h and se_ctrl.h are not exposed to
other drivers.
If I would move this into imx-ocotp-ele.c, then I would also need to
change how the code is organized and make the internal se_api functions
exposed to other drivers. I don't know if that is really a good idea.
I get your point but it looks like this contradicts the intention of
having a clean API in the firmware driver.
^ permalink raw reply
* [PATCH] ARM: omap2plus_defconfig: enable things required by iwd
From: Andreas Kemnade @ 2026-06-16 17:51 UTC (permalink / raw)
To: aaro.koskinen, andreas, khilman, rogerq, tony, linux, linux-omap,
linux-arm-kernel, linux-kernel
Several crypto related things are missing for opreation of iwd, turn
them on according to the list being printed out.
:~# /usr/libexec/iwd &
:~# No HMAC(SHA1) support found
No HMAC(MD5) support found
No CMAC(AES) support found
No HMAC(SHA256) support not found
No HMAC(SHA512) support found, certain TLS connections might fail
DES support not found
AES support not found
No CBC(DES3_EDE) support found, certain TLS connections might fail
No CBC(AES) support found, WPS will not be available
No Diffie-Hellman support found, WPS will not be available
The following options are missing in the kernel:
CONFIG_CRYPTO_USER_API_HASH
CONFIG_CRYPTO_USER_API_SKCIPHER
CONFIG_KEY_DH_OPERATIONS
CONFIG_CRYPTO_ECB
CONFIG_CRYPTO_MD5
CONFIG_CRYPTO_CBC
CONFIG_CRYPTO_SHA256
CONFIG_CRYPTO_AES
CONFIG_CRYPTO_DES
CONFIG_CRYPTO_CMAC
CONFIG_CRYPTO_HMAC
CONFIG_CRYPTO_SHA512
CONFIG_CRYPTO_SHA1
Apparently missing USER_API_SKCIPHER did also
hide some things for iwd.
Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
---
arch/arm/configs/omap2plus_defconfig | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index ad5ae1636dee..fa6fb8b27f93 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -257,6 +257,9 @@ CONFIG_RXKAD=y
CONFIG_CFG80211=m
CONFIG_CFG80211_WEXT=y
CONFIG_MAC80211=m
+CONFIG_RFKILL=m
+CONFIG_RFKILL_INPUT=y
+CONFIG_RFKILL_GPIO=m
CONFIG_PCI=y
CONFIG_PCI_MSI=y
CONFIG_PCI_DRA7XX_EP=y
@@ -703,7 +706,15 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
+CONFIG_KEY_DH_OPERATIONS=y
CONFIG_SECURITY=y
+CONFIG_CRYPTO_DH_RFC7919_GROUPS=y
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_LZO=y
+CONFIG_CRYPTO_ZSTD=y
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_GHASH_ARM_CE=m
CONFIG_CRYPTO_AES=m
CONFIG_CRYPTO_AES_ARM_BS=m
--
2.47.3
^ permalink raw reply related
* [PATCH 6.1 337/522] arm64/mm: Enable batched TLB flush in unmap_hotplug_range()
From: Greg Kroah-Hartman @ 2026-06-16 14:58 UTC (permalink / raw)
To: stable
Cc: Greg Kroah-Hartman, patches, Will Deacon, linux-arm-kernel,
linux-kernel, David Hildenbrand (Arm), Ryan Roberts,
Anshuman Khandual, Catalin Marinas, Sasha Levin
In-Reply-To: <20260616145125.307082728@linuxfoundation.org>
6.1-stable review patch. If anyone has any objections, please let me know.
------------------
From: Anshuman Khandual <anshuman.khandual@arm.com>
[ Upstream commit 48478b9f791376b4b89018d7afdfd06865498f65 ]
During a memory hot remove operation, both linear and vmemmap mappings for
the memory range being removed, get unmapped via unmap_hotplug_range() but
mapped pages get freed only for vmemmap mapping. This is just a sequential
operation where each table entry gets cleared, followed by a leaf specific
TLB flush, and then followed by memory free operation when applicable.
This approach was simple and uniform both for vmemmap and linear mappings.
But linear mapping might contain CONT marked block memory where it becomes
necessary to first clear out all entire in the range before a TLB flush.
This is as per the architecture requirement. Hence batch all TLB flushes
during the table tear down walk and finally do it in unmap_hotplug_range().
Prior to this fix, it was hypothetically possible for a speculative access
to a higher address in the contiguous block to fill the TLB with shattered
entries for the entire contiguous range after a lower address had already
been cleared and invalidated. Due to the table entries being shattered, the
subsequent TLB invalidation for the higher address would not then clear the
TLB entries for the lower address, meaning stale TLB entries could persist.
Besides it also helps in improving the performance via TLBI range operation
along with reduced synchronization instructions. The time spent executing
unmap_hotplug_range() improved 97% measured over a 2GB memory hot removal
in KVM guest.
This scheme is not applicable during vmemmap mapping tear down where memory
needs to be freed and hence a TLB flush is required after clearing out page
table entry.
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Closes: https://lore.kernel.org/all/aWZYXhrT6D2M-7-N@willie-the-truck/
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Cc: stable@vger.kernel.org
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[ replaced `__pte_clear()` with `pte_clear()` ]
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
arch/arm64/mm/mmu.c | 36 ++++++++++++++++++++----------------
1 file changed, 20 insertions(+), 16 deletions(-)
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -925,10 +925,14 @@ static void unmap_hotplug_pte_range(pmd_
WARN_ON(!pte_present(pte));
pte_clear(&init_mm, addr, ptep);
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ /* CONT blocks are not supported in the vmemmap */
+ WARN_ON(pte_cont(pte));
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
free_hotplug_page_range(pte_page(pte),
PAGE_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
} while (addr += PAGE_SIZE, addr < end);
}
@@ -949,15 +953,14 @@ static void unmap_hotplug_pmd_range(pud_
WARN_ON(!pmd_present(pmd));
if (pmd_sect(pmd)) {
pmd_clear(pmdp);
-
- /*
- * One TLBI should be sufficient here as the PMD_SIZE
- * range is mapped with a single block entry.
- */
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ /* CONT blocks are not supported in the vmemmap */
+ WARN_ON(pmd_cont(pmd));
+ flush_tlb_kernel_range(addr, addr + PMD_SIZE);
free_hotplug_page_range(pmd_page(pmd),
PMD_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
continue;
}
WARN_ON(!pmd_table(pmd));
@@ -982,15 +985,12 @@ static void unmap_hotplug_pud_range(p4d_
WARN_ON(!pud_present(pud));
if (pud_sect(pud)) {
pud_clear(pudp);
-
- /*
- * One TLBI should be sufficient here as the PUD_SIZE
- * range is mapped with a single block entry.
- */
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
- if (free_mapped)
+ if (free_mapped) {
+ flush_tlb_kernel_range(addr, addr + PUD_SIZE);
free_hotplug_page_range(pud_page(pud),
PUD_SIZE, altmap);
+ }
+ /* unmap_hotplug_range() flushes TLB for !free_mapped */
continue;
}
WARN_ON(!pud_table(pud));
@@ -1020,6 +1020,7 @@ static void unmap_hotplug_p4d_range(pgd_
static void unmap_hotplug_range(unsigned long addr, unsigned long end,
bool free_mapped, struct vmem_altmap *altmap)
{
+ unsigned long start = addr;
unsigned long next;
pgd_t *pgdp, pgd;
@@ -1041,6 +1042,9 @@ static void unmap_hotplug_range(unsigned
WARN_ON(!pgd_present(pgd));
unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped, altmap);
} while (addr = next, addr < end);
+
+ if (!free_mapped)
+ flush_tlb_kernel_range(start, end);
}
static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
^ permalink raw reply
* [arm-platforms:kvm-arm64/nv3 37/37] arch/arm64/kvm/sys_regs.c:222:32: warning: misleading indentation; statement is not part of the previous 'if'
From: kernel test robot @ 2026-06-16 17:35 UTC (permalink / raw)
To: Marc Zyngier; +Cc: llvm, oe-kbuild-all, linux-arm-kernel
tree: https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/nv3
head: aa9a6e84f564417704258a20210b95d18ebf5601
commit: aa9a6e84f564417704258a20210b95d18ebf5601 [37/37] WIP
config: arm64-randconfig-003 (https://download.01.org/0day-ci/archive/20260617/202606170158.W9YgYBN4-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260617/202606170158.W9YgYBN4-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202606170158.W9YgYBN4-lkp@intel.com/
All warnings (new ones prefixed by >>):
arch/arm64/kvm/sys_regs.c:222:30: error: expected ';' after expression
222 | loc->loc = SR_LOC_SPECIAL : SR_LOC_MEMORY;
| ^
| ;
>> arch/arm64/kvm/sys_regs.c:222:32: warning: misleading indentation; statement is not part of the previous 'if' [-Wmisleading-indentation]
222 | loc->loc = SR_LOC_SPECIAL : SR_LOC_MEMORY;
| ^
arch/arm64/kvm/sys_regs.c:221:3: note: previous statement is here
221 | if (is_hyp_ctxt(vcpu))
| ^
>> arch/arm64/kvm/sys_regs.c:222:32: warning: expression result unused [-Wunused-value]
222 | loc->loc = SR_LOC_SPECIAL : SR_LOC_MEMORY;
| ^~~~~~~~~~~~~
2 warnings and 1 error generated.
vim +/if +222 arch/arm64/kvm/sys_regs.c
168
169 #define MAPPED_EL2_SYSREG(r, m, t) \
170 case r: { \
171 locate_mapped_el2_register(vcpu, r, m, t, loc); \
172 break; \
173 }
174
175 static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg,
176 struct sr_loc *loc)
177 {
178 if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) {
179 loc->loc = SR_LOC_MEMORY;
180 return;
181 }
182
183 switch (reg) {
184 MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
185 translate_sctlr_el2_to_sctlr_el1 );
186 MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1,
187 translate_ttbr0_el2_to_ttbr0_el1 );
188 MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1, NULL );
189 MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1,
190 translate_tcr_el2_to_tcr_el1 );
191 MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1, NULL );
192 MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1, NULL );
193 MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1, NULL );
194 MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL );
195 MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL );
196 MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL );
197 MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1, NULL );
198 MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1, NULL );
199 MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1, NULL );
200 MAPPED_EL2_SYSREG(POR_EL2, POR_EL1, NULL );
201 MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL );
202 MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL );
203 MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
204 MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
205 MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL );
206 case CNTHCTL_EL2:
207 /* CNTHCTL_EL2 is super special, unless we support NV2p1 */
208 loc->loc = (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu) ?
209 SR_LOC_SPECIAL : SR_LOC_MEMORY);
210 break;
211 case CPTR_EL2:
212 /*
213 * CPTR_EL2 is just as special, and needs a certain amount
214 * of handholding. It always lives in memory, due to being
215 * heavily trapped thanks to CPACR_EL1.TCPAC being RES0.
216 * FEAT_NV2p1 fixes this.
217 */
218 locate_mapped_el2_register(vcpu, CPTR_EL2, CPACR_EL1,
219 translate_cptr_el2_to_cpacr_el1,
220 loc);
221 if (is_hyp_ctxt(vcpu))
> 222 loc->loc = SR_LOC_SPECIAL : SR_LOC_MEMORY;
223 break;
224 default:
225 loc->loc = locate_direct_register(vcpu, reg);
226 }
227 }
228
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply
* Re: [PATCH v2] dmaengine: sun6i-dma: Fix memory leak in sun6i_dma_terminate_all
From: Jernej Škrabec @ 2026-06-16 16:59 UTC (permalink / raw)
To: vkoul, Frank.Li, wens, samuel, mripard, arnd, Hongling Zeng
Cc: dmaengine, linux-arm-kernel, linux-sunxi, linux-kernel,
zhongling0719, Hongling Zeng
In-Reply-To: <20260616060449.42225-1-zenghongling@kylinos.cn>
Dne torek, 16. junij 2026 ob 08:04:49 Srednjeevropski poletni čas je Hongling Zeng napisal(a):
> When terminating a non-cyclic DMA transfer, the active descriptor
> is not properly reclaimed. The descriptor is removed from the
> desc_issued list in sun6i_dma_start_desc(), but in
> sun6i_dma_terminate_all(), only cyclic transfer descriptors are
> added to the desc_completed list before cleanup.
>
> For non-cyclic transfers, pchan->desc is set to NULL without first
> adding the descriptor back to a list that vchan_get_all_descriptors()
> can collect. This causes the descriptor and its associated LLI chain
> to be permanently leaked.
>
> Fix by ensuring both cyclic and non-cyclic active descriptors are
> added to the desc_completed list before setting pchan->desc to NULL.
>
> Fixes: 555859308723 ("dmaengine: sun6i: Add driver for the Allwinner A31 DMA controller")
> Signed-off-by: Hongling Zeng <zenghongling@kylinos.cn>
Acked-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Best regards,
Jernej
>
> ---
> Change in v2;
> -Add pchan->desc != pchan->done check to prevent race condition
> where completed descriptors could be double-added to desc_completed
> list, causing list corruption
> ---
> drivers/dma/sun6i-dma.c | 12 +++++-------
> 1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
> index 7a79f346250a..12d038ef5f2e 100644
> --- a/drivers/dma/sun6i-dma.c
> +++ b/drivers/dma/sun6i-dma.c
> @@ -946,16 +946,14 @@ static int sun6i_dma_terminate_all(struct dma_chan *chan)
>
> spin_lock_irqsave(&vchan->vc.lock, flags);
>
> - if (vchan->cyclic) {
> - vchan->cyclic = false;
> - if (pchan && pchan->desc) {
> - struct virt_dma_desc *vd = &pchan->desc->vd;
> - struct virt_dma_chan *vc = &vchan->vc;
> + if (pchan && pchan->desc && pchan->desc != pchan->done) {
> + struct virt_dma_desc *vd = &pchan->desc->vd;
> + struct virt_dma_chan *vc = &vchan->vc;
>
> - list_add_tail(&vd->node, &vc->desc_completed);
> - }
> + list_add_tail(&vd->node, &vc->desc_completed);
> }
>
> + vchan->cyclic = false;
> vchan_get_all_descriptors(&vchan->vc, &head);
>
> if (pchan) {
>
^ permalink raw reply
* Re: [PATCH v3] dmaengine: sun6i-dma: Fix use-after-free in error handling paths
From: Jernej Škrabec @ 2026-06-16 16:54 UTC (permalink / raw)
To: vkoul, Frank.Li, wens, samuel, mripard, arnd, Hongling Zeng
Cc: dmaengine, linux-arm-kernel, linux-sunxi, linux-kernel,
zhongling0719, Hongling Zeng
In-Reply-To: <20260616023138.15904-1-zenghongling@kylinos.cn>
Dne torek, 16. junij 2026 ob 04:31:38 Srednjeevropski poletni čas je Hongling Zeng napisal(a):
> In error handling paths, the for loop frees v_lli in the loop body,
> then accesses v_lli->v_lli_next and v_lli->p_lli_next in the
> increment expression, which is use-after-free.
>
> Fix by saving both the next virtual and physical pointers before
> freeing the current node.
>
> Fixes: 555859308723 ("dmaengine: Add driver for Allwinner sun6i DMA")
> Signed-off-by: Hongling Zeng <zenghongling@kylinos.cn>
> Suggested-by: Jernej Skrabec <jernej.skrabec@gmail.com>
>
> ---
This looks great! Thank you for your patience.
Reviewed-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Best regards,
Jernej
> Changes in v2:
> -Refactored the fix to avoid code duplication by creating a helper function
> sun6i_dma_free_lli_list() that handles LLI list cleanup
> -Add Suggested-by: Jernej Skrabec <jernej.skrabec@gmail.com>
>
> ---
> Change in v3:
> -Further refactoring to move txd handling into the helper function
> as suggested by Jernej
> ---
> drivers/dma/sun6i-dma.c | 31 ++++++++++++++++---------------
> 1 file changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
> index a9a254dbf8cb..7a79f346250a 100644
> --- a/drivers/dma/sun6i-dma.c
> +++ b/drivers/dma/sun6i-dma.c
> @@ -406,16 +406,12 @@ static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan,
> v_lli->len, v_lli->para, v_lli->p_lli_next);
> }
>
> -static void sun6i_dma_free_desc(struct virt_dma_desc *vd)
> +static void sun6i_dma_free_desc(struct sun6i_dma_dev *sdev,
> + struct sun6i_desc *txd)
> {
> - struct sun6i_desc *txd = to_sun6i_desc(&vd->tx);
> - struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device);
> struct sun6i_dma_lli *v_lli, *v_next;
> dma_addr_t p_lli, p_next;
>
> - if (unlikely(!txd))
> - return;
> -
> p_lli = txd->p_lli;
> v_lli = txd->v_lli;
>
> @@ -432,6 +428,17 @@ static void sun6i_dma_free_desc(struct virt_dma_desc *vd)
> kfree(txd);
> }
>
> +static void sun6i_dma_free_desc_virt(struct virt_dma_desc *vd)
> +{
> + struct sun6i_desc *txd = to_sun6i_desc(&vd->tx);
> + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device);
> +
> + if (unlikely(!txd))
> + return;
> +
> + sun6i_dma_free_desc(sdev, txd);
> +}
> +
> static int sun6i_dma_start_desc(struct sun6i_vchan *vchan)
> {
> struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device);
> @@ -788,10 +795,7 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg(
> return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
>
> err_lli_free:
> - for (p_lli = txd->p_lli, v_lli = txd->v_lli; v_lli;
> - p_lli = v_lli->p_lli_next, v_lli = v_lli->v_lli_next)
> - dma_pool_free(sdev->pool, v_lli, p_lli);
> - kfree(txd);
> + sun6i_dma_free_desc(sdev, txd);
> return NULL;
> }
>
> @@ -869,10 +873,7 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_cyclic(
> return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
>
> err_lli_free:
> - for (p_lli = txd->p_lli, v_lli = txd->v_lli; v_lli;
> - p_lli = v_lli->p_lli_next, v_lli = v_lli->v_lli_next)
> - dma_pool_free(sdev->pool, v_lli, p_lli);
> - kfree(txd);
> + sun6i_dma_free_desc(sdev, txd);
> return NULL;
> }
>
> @@ -1431,7 +1432,7 @@ static int sun6i_dma_probe(struct platform_device *pdev)
> struct sun6i_vchan *vchan = &sdc->vchans[i];
>
> INIT_LIST_HEAD(&vchan->node);
> - vchan->vc.desc_free = sun6i_dma_free_desc;
> + vchan->vc.desc_free = sun6i_dma_free_desc_virt;
> vchan_init(&vchan->vc, &sdc->slave);
> }
>
>
^ permalink raw reply
* Re: [PATCH RFC 8/9] arm64: dts: qcom: shikra-cqs-evk: Enable ethernet0
From: Mohd Ayaan Anwar @ 2026-06-16 16:50 UTC (permalink / raw)
To: Konrad Dybcio
Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
Richard Cochran, Bjorn Andersson, Konrad Dybcio, Maxime Coquelin,
Alexandre Torgue, Russell King, linux-arm-msm, netdev, devicetree,
linux-kernel, linux-stm32, linux-arm-kernel
In-Reply-To: <2cb658f3-f564-4396-884d-d025eaa674a1@oss.qualcomm.com>
On Tue, Jun 16, 2026 at 11:50:26AM +0200, Konrad Dybcio wrote:
> On 6/11/26 8:37 PM, Mohd Ayaan Anwar wrote:
>
> > +&tlmm {
> > + ethernet0_defaults: ethernet0-defaults-state {
>
> s/defaults/default
>
> Please move this definition to shikra.dtsi
>
The CQM and CQS variants have identical GPIO mapping but the IQS is
different. So should I keep this in shikra.dtsi and overwrite for IQS in
shikra-iqs-evk.dts?
> > +
> > + emac0_phy_en_hog: emac0-phy-en-hog {
> > + gpio-hog;
> > + gpios = <149 GPIO_ACTIVE_HIGH>;
> > + output-high;
> > + line-name = "emac0-phy-en";
> > + };
>
> This looks like a hack - what does this pin actually do?
>
The power supply to both PHYs on Shikra is gated by a GPIO pin. I am
unsure whether they should be modelled as a fixed, enable-on-boot
regulator or just like this. They need to be powered on early so that
MDIO can detect them.
Thank you for the review. I will fix the stylistic issues in v2.
Ayaan
^ permalink raw reply
* Re: [PATCH v2 1/5] arm64: Rename page table BSS section to .bss..pgtbl
From: Frank Li @ 2026-06-16 16:32 UTC (permalink / raw)
To: Ard Biesheuvel
Cc: Ard Biesheuvel, linux-arm-kernel, linux-kernel, Will Deacon,
Catalin Marinas, Kevin Brodsky, Mark Brown, Marc Zyngier
In-Reply-To: <fe467789-ae72-4a74-a9c0-6062b522fbd2@app.fastmail.com>
On Tue, Jun 16, 2026 at 11:38:48AM +0200, Ard Biesheuvel wrote:
>
>
> On Mon, 15 Jun 2026, at 22:09, Frank Li wrote:
> > On Thu, Jun 04, 2026 at 05:11:53PM +0200, Ard Biesheuvel wrote:
> >> From: Ard Biesheuvel <ardb@kernel.org>
> >>
> >> Rename the .pgdir.bss section to .bss..pgtbl so that the compiler will
> >> notice the leading ".bss" and mark it as NOBITS by default (rather than
> >> PROGBITS, which would take up space in Image binary, forcing all of the
> >> preceding BSS to be emitted into the image as well). This supersedes the
> >> NOLOAD linker directive, which achieves the same thing, and can be
> >> therefore be dropped.
> >>
> >> Also, rename .pgdir to .pgtbl to be more generic, as page tables of
> >> various levels will reside here.
> >>
> >> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> >> ---
> >
> > I met boot failure for i.MX8QXP by this patch
> >
> > [ 0.823515] Unable to handle kernel paging request at virtual
> > address ffff00000328f000
> > [ 0.831116] Mem abort info:
> > [ 0.833886] ESR = 0x0000000096000147
> > [ 0.837622] EC = 0x25: DABT (current EL), IL = 32 bits
> > [ 0.842923] SET = 0, FnV = 0
> > [ 0.845961] EA = 0, S1PTW = 0
> > [ 0.849088] FSC = 0x07: level 3 translation fault
> > [ 0.853952] Data abort info:
> > [ 0.856809] ISV = 0, ISS = 0x00000147, ISS2 = 0x00000000
> > [ 0.862296] CM = 1, WnR = 1, TnD = 0, TagAccess = 0
> > [ 0.867330] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> > [ 0.872633] swapper pgtable: 4k pages, 48-bit VAs,
> > pgdp=000000008211f000
> > [ 0.879321] [ffff00000328f000] pgd=0000000000000000,
> > p4d=18000008bffff403, pud=18000008bfffe403, pmd=18000008bffea403,
> > pte=00e800008328ff06
> > [ 0.891834] Internal error: Oops: 0000000096000147 [#1] SMP
> > [ 0.897469] Modules linked in:
> > [ 0.900514] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted
> > 7.1.0-rc1-00016-g63e0b6a5b693 #834 PREEMPT
> > [ 0.909978] Hardware name: Freescale i.MX8QXP MEK (DT)
> > [ 0.915104] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS
> > BTYPE=--)
> > [ 0.922053] pc : dcache_clean_inval_poc+0x24/0x48
> > [ 0.926742] lr : kvm_arm_init+0xa78/0x1638
> > [ 0.930828] sp : ffff80008318bd10
> > [ 0.934127] x29: ffff80008318bd50 x28: 0000000000000000 x27:
> > ffff00000328f000
> > [ 0.941251] x26: 0000000000002000 x25: ffff80008219e000 x24:
> > 0000000001002222
> > [ 0.948374] x23: 0000000000000030 x22: ffff800081e850c0 x21:
> > ffff800082b790d0
> > [ 0.955498] x20: 0000000000000004 x19: ffff8000830a0000 x18:
> > 0000000000000000
> > [ 0.962622] x17: ffff800082f938b8 x16: ffff800082b8b4e0 x15:
> > ffff800082b8b4b8
> > [ 0.969746] x14: ffff80008308f0a0 x13: ffff800082b8b490 x12:
> > ffff800082b8b530
> > [ 0.976869] x11: ffff800082b8b508 x10: ffff80008308f140 x9 :
> > ffff80008308f118
> > [ 0.983993] x8 : ffff80008308f0f0 x7 : ffff80008308f0c8 x6 :
> > ffff80008308f078
> > [ 0.991117] x5 : ffff80008308f050 x4 : ffff800082b8b468 x3 :
> > 000000000000003f
> > [ 0.998240] x2 : 0000000000000040 x1 : ffff000003291000 x0 :
> > ffff00000328f000
> > [ 1.005367] Call trace:
> > [ 1.007800] dcache_clean_inval_poc+0x24/0x48 (P)
> > [ 1.012490] do_one_initcall+0x80/0x1c8
> > [ 1.016310] kernel_init_freeable+0x208/0x2f0
> > [ 1.020654] kernel_init+0x24/0x1e0
> > [ 1.024131] ret_from_fork+0x10/0x20
> > [ 1.027700] Code: 9ac32042 d1000443 8a230000 d503201f (d50b7e20)
> > [ 1.033779] ---[ end trace 0000000000000000 ]---
> > [ 1.038428] Kernel panic - not syncing: Attempted to kill init!
> > exitcode=0x0000000b
> > [ 1.046026] SMP: stopping secondary CPUs
> > [ 1.049943] Kernel Offset: disabled
> > [ 1.053408] CPU features: 0x00000000,00000008,00040021,0400421b
> > [ 1.059316] Memory Limit: none
> > [ 1.062359] ---[ end Kernel panic - not syncing: Attempted to kill
> > init! exitcode=0x0000000b ]---
> >
> >
> > Any idea?
> >
>
> Which tree is this based on?
Sorry, after rebase to 20260615 linux-next, problem disappear. Problem
appear at next-20260608. Suppose some patch fixed this problem recently.
Frank
^ permalink raw reply
* Re: [PATCH RFC 3/9] net: stmmac: qcom-ethqos: fix RGMII_ID mode to use DLL bypass
From: Mohd Ayaan Anwar @ 2026-06-16 16:32 UTC (permalink / raw)
To: Andrew Lunn
Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
Richard Cochran, Bjorn Andersson, Konrad Dybcio, Maxime Coquelin,
Alexandre Torgue, Russell King, linux-arm-msm, netdev, devicetree,
linux-kernel, linux-stm32, linux-arm-kernel
In-Reply-To: <82705420-771d-41bf-a4d9-ed94dff86ff0@lunn.ch>
On Mon, Jun 15, 2026 at 06:48:55PM +0200, Andrew Lunn wrote:
> > > I'm curious how this works at the moment? Do no boards make use of
> > > RGMII ID? Are all current boards broken?
> >
> > Searching through the DTS, I found that we have two boards using "rgmii"
> > (qcs404-evb-4000.dts and sa8155-adp.dts) and another board using
> > "rgmii-txid" (sa8540p-ride.dts). No board which uses RGMII ID.
>
> So this causes problems. We cannot break existing boards, yet it would
> be good to fix the current broken behaviour.
I am trying to track down the sa8155-adp and sa8540p-ride boards. The
EMAC on QCS404 is extremely similar to QCS615 Ride [0], and I got that
board to work with this series (with RGMII ID mode). So I am fairly
confident that QCS404 would not break (if its even booting up with the
upstream kernel currently). Also, I think we could change the phy-mode
for QCS404 to "rgmii-id" from "rgmii" if these fixes go in.
> It could be the best way forward is that you issue a warning when
> "rgmii" is found and pass rgmii-id to the PHY. And you also change the
> two boards to use rgmii-id. Lets think about the rgmii-txid case once
> we better understand it.
>
As Konrad mentioned, it would be great to know if we can test out these
boards. Looking at the different versions of the ETHQOS programming
guide, stopping MAC side delay should be as simple as what we are doing
in this commit. But whether the two boards work directly with the
default PHY delays is unknown.
Ayaan
[0] The proposed RGMII fixes would help enable ethernet on QCS615 Ride
as well. I see that the original series had a lot of issues:
https://lore.kernel.org/all/20250121-dts_qcs615-v3-0-fa4496950d8a@quicinc.com/
^ permalink raw reply
* Re: [PATCH RFC 4/9] net: stmmac: qcom-ethqos: add per-platform NOC clock voting
From: Mohd Ayaan Anwar @ 2026-06-16 16:17 UTC (permalink / raw)
To: Konrad Dybcio
Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
Richard Cochran, Bjorn Andersson, Konrad Dybcio, Maxime Coquelin,
Alexandre Torgue, Russell King, linux-arm-msm, netdev, devicetree,
linux-kernel, linux-stm32, linux-arm-kernel
In-Reply-To: <45d7faac-7c0f-4f89-808e-06129e8420e4@oss.qualcomm.com>
Hi Konrad,
On Mon, Jun 15, 2026 at 02:13:05PM +0200, Konrad Dybcio wrote:
> On 6/11/26 8:37 PM, Mohd Ayaan Anwar wrote:
> > Some SoCs gate the EMAC's path to the System NOC behind dedicated clocks
> > that must be enabled before the DMA can reach memory. Add
> > ethqos_noc_clk_cfg and the corresponding fields in the driver-data and
> > runtime structs so each compatible can declare its own set with per-clock
> > rates. The clocks are acquired during probe and enabled/disabled
> > alongside the existing link clock in ethqos_clks_config().
>
> Sounds like we should use an OPP table instead, we can't just do
> set_rate() on qcom, as that will not propagate the required perf
> state to the clock controller's supplier power domain (i.e. VDDCX)
>
Understood, I will test this out for v2.
Ayaan
^ permalink raw reply
* Re: [PATCH 1/2] dt-bindings: pinctrl: aspeed,ast2700-soc1: Add JTAGM1TRST group
From: Conor Dooley @ 2026-06-16 15:59 UTC (permalink / raw)
To: Billy Tsai
Cc: Andrew Jeffery, Linus Walleij, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Joel Stanley, linux-aspeed, openbmc, linux-gpio,
devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <20260616-pinctrl-fix-v1-1-621036e45c7c@aspeedtech.com>
[-- Attachment #1: Type: text/plain, Size: 75 bytes --]
Acked-by: Conor Dooley <conor.dooley@microchip.com>
pw-bot: not-applicable
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply
* [PATCH v5 4/4] selftests/bpf: Adjust wasted entries threshold for ARM64 BRBE
From: Puranjay Mohan @ 2026-06-16 15:57 UTC (permalink / raw)
To: bpf
Cc: Puranjay Mohan, Puranjay Mohan, Alexei Starovoitov,
Daniel Borkmann, John Fastabend, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
Will Deacon, Mark Rutland, Catalin Marinas, Leo Yan, Rob Herring,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, James Clark, Ian Rogers, Adrian Hunter, Shuah Khan,
Breno Leitao, Ravi Bangoria, Stephane Eranian,
Kumar Kartikeya Dwivedi, Usama Arif, linux-arm-kernel,
linux-perf-users, linux-kselftest, linux-kernel, kernel-team
In-Reply-To: <20260616155716.2631508-1-puranjay@kernel.org>
The get_branch_snapshot test checks that bpf_get_branch_snapshot()
doesn't waste too many branch entries on infrastructure overhead. The
threshold of < 10 was calibrated for x86 where about 7 entries are
wasted.
On ARM64, the BPF trampoline generates more branches than x86,
resulting in about 13 wasted entries. The overhead comes from the BPF
trampoline calling __bpf_prog_enter_recur which on ARM64 makes
out-of-line calls to __rcu_read_lock and generates more conditional
branches than x86:
[#12] bpf_testmod_loop_test+0x40 -> bpf_trampoline_...+0x48
[#11] bpf_trampoline_...+0x68 -> __bpf_prog_enter_recur+0x0
[#10] __bpf_prog_enter_recur+0x20 -> __bpf_prog_enter_recur+0x118
[#09] __bpf_prog_enter_recur+0x154 -> __bpf_prog_enter_recur+0x160
[#08] __bpf_prog_enter_recur+0x164 -> __bpf_prog_enter_recur+0x2c
[#07] __bpf_prog_enter_recur+0x2c -> __rcu_read_lock+0x0
[#06] __rcu_read_lock+0x18 -> __bpf_prog_enter_recur+0x30
[#05] __bpf_prog_enter_recur+0x9c -> __bpf_prog_enter_recur+0xf0
[#04] __bpf_prog_enter_recur+0xf4 -> __bpf_prog_enter_recur+0xa8
[#03] __bpf_prog_enter_recur+0xb8 -> __bpf_prog_enter_recur+0x100
[#02] __bpf_prog_enter_recur+0x114 -> bpf_trampoline_...+0x6c
[#01] bpf_trampoline_...+0x78 -> bpf_prog_...test1+0x0
[#00] bpf_prog_...test1+0x58 -> arm_brbe_snapshot_branch_stack+0x0
Use an architecture-specific threshold of < 14 for ARM64 to accommodate
this overhead while still detecting regressions.
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
.../selftests/bpf/prog_tests/get_branch_snapshot.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
index 0394a1156d99..8d1a3480767f 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
@@ -116,13 +116,18 @@ void serial_test_get_branch_snapshot(void)
ASSERT_GT(skel->bss->test1_hits, 6, "find_looptest_in_lbr");
- /* Given we stop LBR in software, we will waste a few entries.
+ /* Given we stop LBR/BRBE in software, we will waste a few entries.
* But we should try to waste as few as possible entries. We are at
- * about 7 on x86_64 systems.
- * Add a check for < 10 so that we get heads-up when something
- * changes and wastes too many entries.
+ * about 7 on x86_64 and about 13 on arm64 systems (the arm64 BPF
+ * trampoline generates more branches than x86_64).
+ * Add a check so that we get heads-up when something changes and
+ * wastes too many entries.
*/
+#if defined(__aarch64__)
+ ASSERT_LT(skel->bss->wasted_entries, 14, "check_wasted_entries");
+#else
ASSERT_LT(skel->bss->wasted_entries, 10, "check_wasted_entries");
+#endif
cleanup:
get_branch_snapshot__destroy(skel);
--
2.53.0-Meta
^ permalink raw reply related
* [PATCH v5 3/4] perf/arm64: Add BRBE support for bpf_get_branch_snapshot()
From: Puranjay Mohan @ 2026-06-16 15:57 UTC (permalink / raw)
To: bpf
Cc: Puranjay Mohan, Puranjay Mohan, Alexei Starovoitov,
Daniel Borkmann, John Fastabend, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
Will Deacon, Mark Rutland, Catalin Marinas, Leo Yan, Rob Herring,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, James Clark, Ian Rogers, Adrian Hunter, Shuah Khan,
Breno Leitao, Ravi Bangoria, Stephane Eranian,
Kumar Kartikeya Dwivedi, Usama Arif, linux-arm-kernel,
linux-perf-users, linux-kselftest, linux-kernel, kernel-team
In-Reply-To: <20260616155716.2631508-1-puranjay@kernel.org>
Enable bpf_get_branch_snapshot() on ARM64 by implementing the
perf_snapshot_branch_stack static call for BRBE.
BRBE is paused before masking exceptions to avoid branch buffer
pollution from trace_hardirqs_off(). Exceptions are then masked with
local_daif_save() to prevent PMU overflow pseudo-NMIs from interfering.
If an overflow between pause and DAIF save re-enables BRBE, the snapshot
detects this via BRBFCR_EL1.PAUSED and bails out.
Branch records are read using perf_entry_from_brbe_regset() with a NULL
event pointer to bypass event-specific filtering. The buffer is
invalidated after reading.
Introduce a for_each_brbe_entry() iterator to deduplicate bank
iteration between brbe_read_filtered_entries() and the snapshot.
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
---
drivers/perf/arm_brbe.c | 128 ++++++++++++++++++++++++++++++++-------
drivers/perf/arm_brbe.h | 9 +++
drivers/perf/arm_pmuv3.c | 5 +-
3 files changed, 120 insertions(+), 22 deletions(-)
diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c
index effbdeacfcbb..a141ad7abcf2 100644
--- a/drivers/perf/arm_brbe.c
+++ b/drivers/perf/arm_brbe.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/bitmap.h>
#include <linux/perf/arm_pmu.h>
+#include <asm/daifflags.h>
#include "arm_brbe.h"
#define BRBFCR_EL1_BRANCH_FILTERS (BRBFCR_EL1_DIRECT | \
@@ -256,6 +257,14 @@ static bool valid_brbe_version(int brbe_version)
brbe_version == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1;
}
+static __always_inline bool cpu_has_brbe(void)
+{
+ u64 aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+ int brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT);
+
+ return valid_brbe_version(brbe);
+}
+
static void select_brbe_bank(int bank)
{
u64 brbfcr;
@@ -271,6 +280,20 @@ static void select_brbe_bank(int bank)
isb();
}
+static inline void __brbe_advance(int *bank, int *idx, int nr_hw)
+{
+ if (++(*idx) >= BRBE_BANK_MAX_ENTRIES &&
+ *bank * BRBE_BANK_MAX_ENTRIES + *idx < nr_hw) {
+ *idx = 0;
+ select_brbe_bank(++(*bank));
+ }
+}
+
+#define for_each_brbe_entry(idx, nr_hw) \
+ for (int __bank = (select_brbe_bank(0), 0), idx = 0; \
+ __bank * BRBE_BANK_MAX_ENTRIES + idx < (nr_hw); \
+ __brbe_advance(&__bank, &idx, (nr_hw)))
+
static bool __read_brbe_regset(struct brbe_regset *entry, int idx)
{
entry->brbinf = get_brbinf_reg(idx);
@@ -474,11 +497,9 @@ unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu)
void brbe_probe(struct arm_pmu *armpmu)
{
- u64 brbidr, aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
- u32 brbe;
+ u64 brbidr;
- brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT);
- if (!valid_brbe_version(brbe))
+ if (!cpu_has_brbe())
return;
brbidr = read_sysreg_s(SYS_BRBIDR0_EL1);
@@ -618,10 +639,10 @@ static bool perf_entry_from_brbe_regset(int index, struct perf_branch_entry *ent
brbe_set_perf_entry_type(entry, brbinf);
- if (!branch_sample_no_cycles(event))
+ if (!event || !branch_sample_no_cycles(event))
entry->cycles = brbinf_get_cycles(brbinf);
- if (!branch_sample_no_flags(event)) {
+ if (!event || !branch_sample_no_flags(event)) {
/* Mispredict info is available for source only and complete branch records. */
if (!brbe_record_is_target_only(brbinf)) {
entry->mispred = brbinf_get_mispredict(brbinf);
@@ -774,32 +795,97 @@ void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
{
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
int nr_hw = brbe_num_branch_records(cpu_pmu);
- int nr_banks = DIV_ROUND_UP(nr_hw, BRBE_BANK_MAX_ENTRIES);
int nr_filtered = 0;
u64 branch_sample_type = event->attr.branch_sample_type;
DECLARE_BITMAP(event_type_mask, PERF_BR_ARM64_MAX);
prepare_event_branch_type_mask(branch_sample_type, event_type_mask);
- for (int bank = 0; bank < nr_banks; bank++) {
- int nr_remaining = nr_hw - (bank * BRBE_BANK_MAX_ENTRIES);
- int nr_this_bank = min(nr_remaining, BRBE_BANK_MAX_ENTRIES);
+ for_each_brbe_entry(i, nr_hw) {
+ struct perf_branch_entry *pbe = &branch_stack->entries[nr_filtered];
- select_brbe_bank(bank);
+ if (!perf_entry_from_brbe_regset(i, pbe, event))
+ break;
- for (int i = 0; i < nr_this_bank; i++) {
- struct perf_branch_entry *pbe = &branch_stack->entries[nr_filtered];
+ if (!filter_branch_record(pbe, branch_sample_type, event_type_mask))
+ continue;
- if (!perf_entry_from_brbe_regset(i, pbe, event))
- goto done;
+ nr_filtered++;
+ }
- if (!filter_branch_record(pbe, branch_sample_type, event_type_mask))
- continue;
+ branch_stack->nr = nr_filtered;
+}
- nr_filtered++;
- }
+/*
+ * Best-effort BRBE snapshot for BPF tracing. Pause BRBE to avoid
+ * self-recording and return 0 if the snapshot state appears disturbed.
+ */
+int arm_brbe_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+ unsigned long flags;
+ int nr_hw, nr_copied = 0;
+ u64 brbfcr, brbcr;
+
+ if (!cnt)
+ return 0;
+
+ /* Guard against running on a CPU without BRBE (e.g. big.LITTLE). */
+ if (!cpu_has_brbe())
+ return 0;
+
+ /*
+ * Pause BRBE first to avoid recording our own branches. The
+ * sysreg read/write and ISB are branchless, so pausing before
+ * checking BRBCR avoids polluting the buffer with our own
+ * conditional branches.
+ */
+ brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+ brbcr = read_sysreg_s(SYS_BRBCR_EL1);
+ write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
+ isb();
+
+ /* Bail out if BRBE is not enabled (BRBCR_EL1 == 0). */
+ if (!brbcr) {
+ write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
+ isb();
+ return 0;
}
-done:
- branch_stack->nr = nr_filtered;
+ /* Block local exception delivery while reading the buffer. */
+ flags = local_daif_save();
+
+ /*
+ * A PMU overflow before local_daif_save() could have re-enabled
+ * BRBE, clearing the PAUSED bit. The overflow handler already
+ * restored BRBE to its correct state, so just bail out.
+ */
+ if (!(read_sysreg_s(SYS_BRBFCR_EL1) & BRBFCR_EL1_PAUSED)) {
+ local_daif_restore(flags);
+ return 0;
+ }
+
+ nr_hw = FIELD_GET(BRBIDR0_EL1_NUMREC_MASK,
+ read_sysreg_s(SYS_BRBIDR0_EL1));
+
+ for_each_brbe_entry(i, nr_hw) {
+ if (nr_copied >= cnt)
+ break;
+
+ if (!perf_entry_from_brbe_regset(i, &entries[nr_copied], NULL))
+ break;
+
+ nr_copied++;
+ }
+
+ brbe_invalidate();
+
+ /* Restore BRBCR before unpausing via BRBFCR, matching brbe_enable(). */
+ write_sysreg_s(brbcr, SYS_BRBCR_EL1);
+ isb();
+ write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
+ /* Ensure BRBE is unpaused before returning to the caller. */
+ isb();
+ local_daif_restore(flags);
+
+ return nr_copied;
}
diff --git a/drivers/perf/arm_brbe.h b/drivers/perf/arm_brbe.h
index b7c7d8796c86..c2a1824437fb 100644
--- a/drivers/perf/arm_brbe.h
+++ b/drivers/perf/arm_brbe.h
@@ -10,6 +10,7 @@
struct arm_pmu;
struct perf_branch_stack;
struct perf_event;
+struct perf_branch_entry;
#ifdef CONFIG_ARM64_BRBE
void brbe_probe(struct arm_pmu *arm_pmu);
@@ -22,6 +23,8 @@ void brbe_disable(void);
bool brbe_branch_attr_valid(struct perf_event *event);
void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
const struct perf_event *event);
+int arm_brbe_snapshot_branch_stack(struct perf_branch_entry *entries,
+ unsigned int cnt);
#else
static inline void brbe_probe(struct arm_pmu *arm_pmu) { }
static inline unsigned int brbe_num_branch_records(const struct arm_pmu *armpmu)
@@ -44,4 +47,10 @@ static void brbe_read_filtered_entries(struct perf_branch_stack *branch_stack,
const struct perf_event *event)
{
}
+
+static inline int arm_brbe_snapshot_branch_stack(struct perf_branch_entry *entries,
+ unsigned int cnt)
+{
+ return 0;
+}
#endif
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 8014ff766cff..1a9f129a0f94 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -1449,8 +1449,11 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
- if (brbe_num_branch_records(cpu_pmu))
+ if (brbe_num_branch_records(cpu_pmu)) {
cpu_pmu->pmu.sched_task = armv8pmu_sched_task;
+ static_call_update(perf_snapshot_branch_stack,
+ arm_brbe_snapshot_branch_stack);
+ }
cpu_pmu->name = name;
cpu_pmu->map_event = map_event;
--
2.53.0-Meta
^ permalink raw reply related
* [PATCH v5 2/4] perf/core: Clear the whole branch entry in perf_clear_branch_entry()
From: Puranjay Mohan @ 2026-06-16 15:57 UTC (permalink / raw)
To: bpf
Cc: Puranjay Mohan, Puranjay Mohan, Alexei Starovoitov,
Daniel Borkmann, John Fastabend, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
Will Deacon, Mark Rutland, Catalin Marinas, Leo Yan, Rob Herring,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, James Clark, Ian Rogers, Adrian Hunter, Shuah Khan,
Breno Leitao, Ravi Bangoria, Stephane Eranian,
Kumar Kartikeya Dwivedi, Usama Arif, linux-arm-kernel,
linux-perf-users, linux-kselftest, linux-kernel, kernel-team
In-Reply-To: <20260616155716.2631508-1-puranjay@kernel.org>
perf_clear_branch_entry_bitfields() resets the bitfields of struct
perf_branch_entry one at a time and deliberately leaves from/to alone,
since callers overwrite those immediately. The list of assignments has to
be kept in sync with the struct by hand, and it has already drifted:
new_type and priv were added to perf_branch_entry but never cleared here,
so stale values can leak into the records handed to userspace.
Clear the entry with a single struct assignment instead:
*br = (struct perf_branch_entry){ };
Every caller writes from/to right after the clear, so zeroing them as well
is harmless and the dead stores are elided on the x86 LBR read paths.
There is no longer anything to keep in sync when a field is added.
The helper no longer touches only the bitfields, so rename it to
perf_clear_branch_entry() and update the callers, fixing up the
br+nr/br+out spacing on the touched lines while at it.
Fixes: b190bc4ac9e6 ("perf: Extend branch type classification")
Fixes: 5402d25aa571 ("perf: Capture branch privilege information")
Suggested-by: James Clark <james.clark@linaro.org>
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
arch/x86/events/amd/brs.c | 2 +-
arch/x86/events/amd/lbr.c | 2 +-
arch/x86/events/intel/lbr.c | 6 +++---
drivers/perf/arm_brbe.c | 2 +-
include/linux/perf_event.h | 16 ++--------------
5 files changed, 8 insertions(+), 20 deletions(-)
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index 06f35a6b58a5..68c5f42965e9 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -343,7 +343,7 @@ void amd_brs_drain(void)
rdmsrq(brs_from(brs_idx), from);
- perf_clear_branch_entry_bitfields(br+nr);
+ perf_clear_branch_entry(br + nr);
br[nr].from = from;
br[nr].to = to;
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index d24da377df77..08401fd60585 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -181,7 +181,7 @@ void amd_pmu_lbr_read(void)
entry.to.split.reserved)
continue;
- perf_clear_branch_entry_bitfields(br + out);
+ perf_clear_branch_entry(br + out);
br[out].from = sign_ext_branch_ip(entry.from.split.ip);
br[out].to = sign_ext_branch_ip(entry.to.split.ip);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 72f2adcda7c6..295da179fa74 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -755,7 +755,7 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
rdmsrq(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
- perf_clear_branch_entry_bitfields(br);
+ perf_clear_branch_entry(br);
br->from = msr_lastbranch.from;
br->to = msr_lastbranch.to;
@@ -846,7 +846,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (abort && x86_pmu.lbr_double_abort && out > 0)
out--;
- perf_clear_branch_entry_bitfields(br+out);
+ perf_clear_branch_entry(br + out);
br[out].from = from;
br[out].to = to;
br[out].mispred = mis;
@@ -920,7 +920,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
to = rdlbr_to(i, lbr);
info = rdlbr_info(i, lbr);
- perf_clear_branch_entry_bitfields(e);
+ perf_clear_branch_entry(e);
e->from = from;
e->to = to;
diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c
index ba554e0c846c..effbdeacfcbb 100644
--- a/drivers/perf/arm_brbe.c
+++ b/drivers/perf/arm_brbe.c
@@ -604,7 +604,7 @@ static bool perf_entry_from_brbe_regset(int index, struct perf_branch_entry *ent
return false;
brbinf = bregs.brbinf;
- perf_clear_branch_entry_bitfields(entry);
+ perf_clear_branch_entry(entry);
if (brbe_record_is_complete(brbinf)) {
entry->from = bregs.brbsrc;
entry->to = bregs.brbtgt;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 48d851fbd8ea..e034be4a473a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1467,21 +1467,9 @@ static inline u32 perf_sample_data_size(struct perf_sample_data *data,
return size;
}
-/*
- * Clear all bitfields in the perf_branch_entry.
- * The to and from fields are not cleared because they are
- * systematically modified by caller.
- */
-static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
+static inline void perf_clear_branch_entry(struct perf_branch_entry *br)
{
- br->mispred = 0;
- br->predicted = 0;
- br->in_tx = 0;
- br->abort = 0;
- br->cycles = 0;
- br->type = 0;
- br->spec = PERF_BR_SPEC_NA;
- br->reserved = 0;
+ *br = (struct perf_branch_entry){ };
}
extern void perf_output_sample(struct perf_output_handle *handle,
--
2.53.0-Meta
^ permalink raw reply related
* [PATCH v5 1/4] perf/core: Fix sched_task callbacks for CPU-wide branch stack events
From: Puranjay Mohan @ 2026-06-16 15:57 UTC (permalink / raw)
To: bpf
Cc: Puranjay Mohan, Puranjay Mohan, Alexei Starovoitov,
Daniel Borkmann, John Fastabend, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
Will Deacon, Mark Rutland, Catalin Marinas, Leo Yan, Rob Herring,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, James Clark, Ian Rogers, Adrian Hunter, Shuah Khan,
Breno Leitao, Ravi Bangoria, Stephane Eranian,
Kumar Kartikeya Dwivedi, Usama Arif, linux-arm-kernel,
linux-perf-users, linux-kselftest, linux-kernel, kernel-team
In-Reply-To: <20260616155716.2631508-1-puranjay@kernel.org>
perf_pmu_sched_task() returns early when cpuctx->task_ctx is non-NULL,
deferring to perf_ctx_sched_task_cb() in the context sched_in/out
paths. But perf_ctx_sched_task_cb() only walks the task context's
pmu_ctx_list -- PMUs that have only CPU-wide events are not on that
list and their sched_task callback is silently skipped.
On ARM64 with CPU-wide branch recording:
perf record -b -e cycles -a -- ls
armv8pmu_sched_task() is skipped whenever the scheduled task has an
unrelated perf event (e.g. a software event), and branch records leak
across task boundaries.
A second problem exists in __perf_pmu_sched_task(): it passes
cpc->task_epc directly to pmu->sched_task(), but task_epc is NULL for
PMUs with only CPU-wide events. When perf_pmu_sched_task() does reach
the loop (because cpuctx->task_ctx is NULL), this causes a NULL
pointer dereference:
Unable to handle kernel NULL pointer dereference at virtual address 00[.]
PC is at armv8pmu_sched_task+0x14/0x50
Call trace:
armv8pmu_sched_task+0x14/0x50 (P)
perf_pmu_sched_task+0xac/0x108
__perf_event_task_sched_out+0x6c/0xe0
Fix both:
- Remove the blanket early return in perf_pmu_sched_task() when
cpuctx->task_ctx is set. Instead, skip individual CPCs that have a
task_epc (those are handled by perf_ctx_sched_task_cb()). CPCs
without a task_epc are CPU-only and must be handled here.
- Fall back to &cpc->epc in __perf_pmu_sched_task() when task_epc is
NULL, so the callback always gets a valid pmu_ctx.
Fixes: bd2756811766 ("perf: Rewrite core context handling")
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
kernel/events/core.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6d1f8bad7e1c..6604f6e8f352 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3906,7 +3906,8 @@ static void __perf_pmu_sched_task(struct perf_cpu_pmu_context *cpc,
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
- pmu->sched_task(cpc->task_epc, task, sched_in);
+ pmu->sched_task(cpc->task_epc ? cpc->task_epc : &cpc->epc,
+ task, sched_in);
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
@@ -3919,12 +3920,20 @@ static void perf_pmu_sched_task(struct task_struct *prev,
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
struct perf_cpu_pmu_context *cpc;
- /* cpuctx->task_ctx will be handled in perf_event_context_sched_in/out */
- if (prev == next || cpuctx->task_ctx)
+ if (prev == next)
return;
- list_for_each_entry(cpc, this_cpu_ptr(&sched_cb_list), sched_cb_entry)
+ list_for_each_entry(cpc, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+ /*
+ * PMUs with per-task events are handled by
+ * perf_ctx_sched_task_cb() via perf_event_context_sched_in/out
+ * when a task context is active.
+ */
+ if (cpuctx->task_ctx && cpc->task_epc)
+ continue;
+
__perf_pmu_sched_task(cpc, sched_in ? next : prev, sched_in);
+ }
}
static void perf_event_switch(struct task_struct *task,
--
2.53.0-Meta
^ permalink raw reply related
* [PATCH v5 0/4] arm64: Add BRBE support for bpf_get_branch_snapshot()
From: Puranjay Mohan @ 2026-06-16 15:57 UTC (permalink / raw)
To: bpf
Cc: Puranjay Mohan, Puranjay Mohan, Alexei Starovoitov,
Daniel Borkmann, John Fastabend, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
Will Deacon, Mark Rutland, Catalin Marinas, Leo Yan, Rob Herring,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, James Clark, Ian Rogers, Adrian Hunter, Shuah Khan,
Breno Leitao, Ravi Bangoria, Stephane Eranian,
Kumar Kartikeya Dwivedi, Usama Arif, linux-arm-kernel,
linux-perf-users, linux-kselftest, linux-kernel, kernel-team
Changelog:
v4: https://lore.kernel.org/all/20260527121207.2312181-1-puranjay@kernel.org/
Changes in v5:
- Rework patch 2: drop the UAPI union. Instead rename the helper to perf_clear_branch_entry() and
clear the entry with a single
*br = (struct perf_branch_entry){ }.
This stays kernel-internal, with no changes to the uapi/ or tools/ headers (James Clark).
- Add an isb() to the BRBCR_EL1 == 0 early-exit in the snapshot, for consistency with the other
two exit paths (bpf-ci).
- Add Rob Herring's Reviewed-by to patch 3
v3: https://lore.kernel.org/all/20260413185740.3286146-1-puranjay@kernel.org/
Changes in v4:
- Fix leaking branch records when scheduled task has an unrelated perf event (Sashiko)
- Update tools/include/uapi/linux/perf_event.h as well for patch 2
- Introduce cpu_has_brbe() and use it in
arm_brbe_snapshot_branch_stack(0 to make sure we don't run on a CPU
without BRBE.
- Add explicit isb() after after writing to SYS_BRBFCR_EL1.
- Rebase on latest arm64 tree.
v2: https://lore.kernel.org/all/20260318171706.2840512-1-puranjay@kernel.org/
Changes in v3:
- Move NULL pmu_ctx fix from arm_pmuv3.c to perf core (Leo Yan)
- Use union to clear branch entry bitfields instead of per-field
zeroing (Leo Yan)
- Remove per-CPU brbe_active flag; check BRBCR_EL1 == 0 instead (Rob
Herring)
- Remove redundant valid_brbidr() check in snapshot path (Rob Herring)
- Introduce for_each_brbe_entry() iterator to deduplicate bank
iteration (Rob Herring)
- Include perf core maintainers (Leo Yan, Rob Herring)
v1: https://lore.kernel.org/all/20260313180352.3800358-1-puranjay@kernel.org/
Changes in v2:
- Rebased on arm64/for-next/core
- Add per-CPU brbe_active flag to guard against UNDEFINED sysreg access
on non-BRBE CPUs in heterogeneous big.LITTLE systems.
- Fix pre-existing bug in perf_clear_branch_entry_bitfields() that missed
zeroing new_type and priv bitfields, added as a separate patch with
Fixes tags (new patch 2).
- Use architecture-specific selftest threshold (#if defined(__aarch64__))
instead of raising the global threshold, to preserve x86 regression
detection.
RFC: https://lore.kernel.org/all/20260102214043.1410242-1-puranjay@kernel.org/
Changes from RFC:
- Fix pre-existing NULL pointer dereference in armv8pmu_sched_task()
found by Leo Yan during testing (patch 1)
- Pause BRBE before local_daif_save() to avoid branch pollution from
trace_hardirqs_off()
- Use local_daif_save() to prevent pNMI race from counter overflow
(Mark Rutland)
- Reuse perf_entry_from_brbe_regset() instead of duplicating register
read logic, by making it accept NULL event (Mark Rutland)
- Invalidate BRBE after reading to maintain record contiguity for
other consumers (Mark Rutland)
- Adjust selftest wasted_entries threshold for ARM64 (patch 3)
- Tested on ARM FVP with BRBE enabled
This series enables the bpf_get_branch_snapshot() BPF helper on ARM64
by implementing the perf_snapshot_branch_stack static call for ARM's
Branch Record Buffer Extension (BRBE).
bpf_get_branch_snapshot() [1] allows BPF programs to capture hardware
branch records on-demand from any BPF tracing context. This was
previously only available on x86 (Intel LBR) since v5.16. With BRBE
available on ARMv9, this series closes the gap for ARM64.
Usage model
-----------
The helper works in conjunction with perf events. The userspace
component of the BPF application opens a perf event with
PERF_SAMPLE_BRANCH_STACK on each CPU, which configures the hardware
to continuously record branches into BRBE (on ARM64) or LBR (on x86).
A BPF program attached to a tracepoint, kprobe, or fentry hook can
then call bpf_get_branch_snapshot() to snapshot the branch buffer at
any point. Without an active perf event, BRBE is not recording and
the buffer is empty.
On-demand branch snapshots from BPF are useful for diagnosing which
specific code path was taken inside a function. Stack traces only show
function boundaries, but branch records reveal the exact sequence of
jumps, calls, and returns within a function -- making it possible to
identify which specific error check triggered a failure, or which
callback implementation was invoked through a function pointer.
For example, retsnoop [2] is a BPF-based tool for non-intrusive
mass-tracing of kernel internals. Its LBR mode (--lbr) creates per-CPU
perf events with PERF_SAMPLE_BRANCH_STACK and then uses
bpf_get_branch_snapshot() in its fentry/fexit BPF programs to capture
branch records whenever a traced function returns an error.
Consider debugging a bpf() syscall that returns -EINVAL when creating
a BPF map with invalid parameters. Running retsnoop on an ARM64 FVP
with BRBE to trace the bpf() syscall and array_map_alloc_check():
$ retsnoop -e '*sys_bpf' -a 'array_map_alloc_check' --lbr=any \
-F -k vmlinux --debug full-lbr
$ simfail bpf-bad-map-max-entries-array # in another terminal
Output of retsnoop:
--- fentry BPF program (entries #63-#17) ---
[#63-#59] __htab_map_lookup_elem: hash table walk with memcmp (hashtab.c)
[#58] __htab_map_lookup_elem+0x98 -> dump_bpf_prog+0xc850 (hashtab.c:750)
[#57-#55] ... dump_bpf_prog internal branches ...
[#54] dump_bpf_prog+0xcab8 -> bpf_get_current_pid_tgid+0x0 (helpers.c:225)
[#53] bpf_get_current_pid_tgid+0x1c -> dump_bpf_prog+0xcabc (helpers.c:225)
[#52-#51] ... dump_bpf_prog -> __htab_map_lookup_elem ...
[#50-#47] __htab_map_lookup_elem: htab_map_hash (jhash2), select_bucket
[#46-#42] lookup_nulls_elem_raw: hash chain walk with memcmp (hashtab.c:717)
[#41] __htab_map_lookup_elem+0x98 -> dump_bpf_prog+0xcaf8 (hashtab.c:750)
[#40-#37] ... dump_bpf_prog -> bpf_ktime_get_ns ...
[#36] bpf_ktime_get_ns+0x10 -> ktime_get_mono_fast_ns+0x0 (helpers.c:178)
[#35-#32] ktime_get_mono_fast_ns: tk_clock_read -> arch_counter_get_cntpct
[#31] ktime_get_mono_fast_ns+0x9c -> bpf_ktime_get_ns+0x14 (timekeeping.c:493)
[#30] bpf_ktime_get_ns+0x18 -> dump_bpf_prog+0xcd50 (helpers.c:178)
[#29-#25] ... dump_bpf_prog internal branches ...
[#24] dump_bpf_prog+0x11b28 -> __bpf_prog_exit_recur+0x0 (trampoline.c:1190)
[#23-#17] __bpf_prog_exit_recur: rcu_read_unlock, migrate_enable (trampoline.c:1195)
--- array_map_alloc_check (entries #16-#12) ---
[#16] dump_bpf_prog+0x11b38 -> array_map_alloc_check+0x8 (arraymap.c:55)
[#15] array_map_alloc_check+0x18 -> array_map_alloc_check+0xb8 (arraymap.c:56)
. bpf_map_attr_numa_node . bpf_map_attr_numa_node
[#14] array_map_alloc_check+0xbc -> array_map_alloc_check+0x20 (arraymap.c:59)
. bpf_map_attr_numa_node
[#13] array_map_alloc_check+0x24 -> array_map_alloc_check+0x94 (arraymap.c:64)
[#12] array_map_alloc_check+0x98 -> dump_bpf_prog+0x11b3c (arraymap.c:82)
--- fexit trampoline overhead (entries #11-#00) ---
[#11] dump_bpf_prog+0x11b5c -> __bpf_prog_enter_recur+0x0 (trampoline.c:1145)
[#10-#03] __bpf_prog_enter_recur: rcu_read_lock, migrate_disable (trampoline.c:1146)
[#02] __bpf_prog_enter_recur+0x114 -> dump_bpf_prog+0x11b60 (trampoline.c:1157)
[#01] dump_bpf_prog+0x11b6c -> dump_bpf_prog+0xd230
[#00] dump_bpf_prog+0xd340 -> arm_brbe_snapshot_branch_stack+0x0 (arm_brbe.c:814)
el0t_64_sync+0x168
el0t_64_sync_handler+0x98
el0_svc+0x28
do_el0_svc+0x4c
invoke_syscall.constprop.0+0x54
373us [-EINVAL] __arm64_sys_bpf+0x8
__sys_bpf+0x87c
map_create+0x120
95us [-EINVAL] array_map_alloc_check+0x8
The FVP's BRBE buffer has 64 entries (BRBE supports 8, 16, 32, or
64). Of these, entries #63-#17 (47) are consumed by the fentry BPF
trampoline that ran before the function, and entries #11-#00 (12)
are consumed by the fexit trampoline that runs after. Entry #00
shows the very last branch recorded before BRBE is paused: the call
into arm_brbe_snapshot_branch_stack().
The 5 useful entries (#16-#12) show the exact path taken inside
array_map_alloc_check(). Record #14 shows a jump from line 56
(bpf_map_attr_numa_node) to line 59 (the if-condition), and #13
shows an immediate jump from line 59 (attr->max_entries == 0) to
line 64 (return -EINVAL), skipping lines 60-63. This pinpoints
max_entries==0 as the cause -- a diagnosis impossible with stack
traces alone.
[1] 856c02dbce4f ("bpf: Introduce helper bpf_get_branch_snapshot")
[2] https://github.com/anakryiko/retsnoop
Puranjay Mohan (4):
perf/core: Fix sched_task callbacks for CPU-wide branch stack events
perf/core: Clear the whole branch entry in perf_clear_branch_entry()
perf/arm64: Add BRBE support for bpf_get_branch_snapshot()
selftests/bpf: Adjust wasted entries threshold for ARM64 BRBE
arch/x86/events/amd/brs.c | 2 +-
arch/x86/events/amd/lbr.c | 2 +-
arch/x86/events/intel/lbr.c | 6 +-
drivers/perf/arm_brbe.c | 130 +++++++++++++++---
drivers/perf/arm_brbe.h | 9 ++
drivers/perf/arm_pmuv3.c | 5 +-
include/linux/perf_event.h | 16 +--
kernel/events/core.c | 17 ++-
.../bpf/prog_tests/get_branch_snapshot.c | 13 +-
9 files changed, 150 insertions(+), 50 deletions(-)
base-commit: 61c19a9feb1d87156e46e38d7759f3ad23710e24
--
2.53.0-Meta
^ permalink raw reply
* [PATCH v4 6/7] KVM: arm64: Support FFA_NOTIFICATION_GET in host handler
From: Sebastian Ene @ 2026-06-16 15:41 UTC (permalink / raw)
To: catalin.marinas, maz, oupton, will
Cc: joey.gouly, korneld, kvmarm, linux-arm-kernel, linux-kernel,
android-kvm, mrigendra.chaubey, perlarsen, sebastianene,
suzuki.poulose, vdonnefort, yuzenghui
In-Reply-To: <20260616154149.2763214-1-sebastianene@google.com>
Allow FF-A notification GET messages to be proxied from the pKVM
hypervisor to Trustzone and enforce MBZ/SBZ fields.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/kvm/hyp/nvhe/ffa.c | 30 +++++++++++++++++++++++++++++-
1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index fdf1e5fb6726..de4794338388 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -716,7 +716,6 @@ static bool ffa_call_supported(u64 func_id)
case FFA_MEM_DONATE:
case FFA_MEM_RETRIEVE_REQ:
/* Optional notification interfaces added in FF-A 1.1 */
- case FFA_NOTIFICATION_GET:
case FFA_NOTIFICATION_INFO_GET:
/* Optional interfaces added in FF-A 1.2 */
case FFA_MSG_SEND_DIRECT_REQ2: /* Optional per 7.5.1 */
@@ -1001,6 +1000,32 @@ static void do_ffa_notif_set(struct arm_smccc_1_2_regs *res,
hyp_smccc_1_2_smc(args, res);
}
+static void do_ffa_notif_get(struct arm_smccc_1_2_regs *res,
+ struct kvm_cpu_context *ctxt)
+{
+ DECLARE_REG(u32, endp_id, ctxt, 1);
+ DECLARE_REG(u32, flags, ctxt, 2);
+ struct arm_smccc_1_2_regs *args;
+
+ if (FIELD_GET(FFA_NOTIF_RECEIVER_ENDP_MASK, endp_id) != HOST_FFA_ID) {
+ ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
+ return;
+ }
+
+ if (ffa_check_unused_args_sbz(ctxt, 3)) {
+ ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
+ return;
+ }
+
+ if (flags & GENMASK(31, 4)) {
+ ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
+ return;
+ }
+
+ args = (void *)&ctxt->regs.regs[0];
+ hyp_smccc_1_2_smc(args, res);
+}
+
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
struct arm_smccc_1_2_regs res;
@@ -1072,6 +1097,9 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
case FFA_NOTIFICATION_SET:
do_ffa_notif_set(&res, host_ctxt);
goto out_handled;
+ case FFA_NOTIFICATION_GET:
+ do_ffa_notif_get(&res, host_ctxt);
+ goto out_handled;
}
if (ffa_call_supported(func_id))
--
2.54.0.1136.gdb2ca164c4-goog
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox