* [PATCH] arm64: clear_page[s] using memset
@ 2026-03-06 8:57 Linus Walleij
2026-04-02 20:57 ` Catalin Marinas
0 siblings, 1 reply; 2+ messages in thread
From: Linus Walleij @ 2026-03-06 8:57 UTC (permalink / raw)
To: Catalin Marinas, Will Deacon, Marc Zyngier, Oliver Upton,
Joey Gouly, Suzuki K Poulose, Zenghui Yu
Cc: linux-arm-kernel, kvmarm, Linus Walleij
There is no need to try to second-guess the compiler when
clearing memory. Just call memset() like everyone else.
Since memset() already has an architecture-local MOPS
optimization, we do not need to do anything else to preserve
the MOPS optimization.
While at it, implement the shorthand for directly calling
the new prototype clear_pages() for larger page chunks.
No performance regressions can be seen, the fastpath
benchmarks differences are in the noise.
Suggested-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/linux-arm-kernel/20260303-aarch64-clear-pages-v1-1-ad0c3ee9a555@kernel.org/
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
arch/arm64/include/asm/page.h | 13 +++++++++-
arch/arm64/kernel/image-vars.h | 1 -
arch/arm64/kvm/hyp/nvhe/Makefile | 2 +-
arch/arm64/lib/Makefile | 2 +-
arch/arm64/lib/clear_page.S | 53 ----------------------------------------
5 files changed, 14 insertions(+), 57 deletions(-)
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index b39cc1127e1f..d6ae2e53bf4a 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -13,6 +13,7 @@
#ifndef __ASSEMBLER__
#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
+#include <linux/string.h> /* for memset() */
#include <linux/types.h> /* for gfp_t */
#include <asm/pgtable-types.h>
@@ -20,7 +21,17 @@ struct page;
struct vm_area_struct;
extern void copy_page(void *to, const void *from);
-extern void clear_page(void *to);
+
+static inline void clear_pages(void *addr, unsigned int npages)
+{
+ memset(addr, 0, npages * PAGE_SIZE);
+}
+#define clear_pages clear_pages
+
+static inline void clear_page(void *addr)
+{
+ clear_pages(addr, 1);
+}
void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma);
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index d7b0d12b1015..7890454d595d 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -117,7 +117,6 @@ KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
/* Position-independent library routines */
-KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page);
KVM_NVHE_ALIAS_HYP(memcpy, __pi_memcpy);
KVM_NVHE_ALIAS_HYP(memset, __pi_memset);
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index a244ec25f8c5..4d7f3faf1da2 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -17,7 +17,7 @@ ccflags-y += -fno-stack-protector \
hostprogs := gen-hyprel
HOST_EXTRACFLAGS += -I$(objtree)/include
-lib-objs := clear_page.o copy_page.o memcpy.o memset.o
+lib-objs := copy_page.o memcpy.o memset.o
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
CFLAGS_switch.nvhe.o += -Wno-override-init
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 633e5223d944..1840e417dd9a 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
lib-y := clear_user.o delay.o copy_from_user.o \
copy_to_user.o copy_page.o \
- clear_page.o csum.o insn.o memchr.o memcpy.o \
+ csum.o insn.o memchr.o memcpy.o \
memset.o memcmp.o strcmp.o strncmp.o strlen.o \
strnlen.o strchr.o strrchr.o tishift.o
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
deleted file mode 100644
index bd6f7d5eb6eb..000000000000
--- a/arch/arm64/lib/clear_page.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-
-/*
- * Clear page @dest
- *
- * Parameters:
- * x0 - dest
- */
-SYM_FUNC_START(__pi_clear_page)
-#ifdef CONFIG_AS_HAS_MOPS
- .arch_extension mops
-alternative_if_not ARM64_HAS_MOPS
- b .Lno_mops
-alternative_else_nop_endif
-
- mov x1, #PAGE_SIZE
- setpn [x0]!, x1!, xzr
- setmn [x0]!, x1!, xzr
- seten [x0]!, x1!, xzr
- ret
-.Lno_mops:
-#endif
- mrs x1, dczid_el0
- tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */
- and w1, w1, #0xf
- mov x2, #4
- lsl x1, x2, x1
-
-1: dc zva, x0
- add x0, x0, x1
- tst x0, #(PAGE_SIZE - 1)
- b.ne 1b
- ret
-
-2: stnp xzr, xzr, [x0]
- stnp xzr, xzr, [x0, #16]
- stnp xzr, xzr, [x0, #32]
- stnp xzr, xzr, [x0, #48]
- add x0, x0, #64
- tst x0, #(PAGE_SIZE - 1)
- b.ne 2b
- ret
-SYM_FUNC_END(__pi_clear_page)
-SYM_FUNC_ALIAS(clear_page, __pi_clear_page)
-EXPORT_SYMBOL(clear_page)
---
base-commit: 6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f
change-id: 20260305-aarch64-clear-pages-c-590dae98c333
Best regards,
--
Linus Walleij <linusw@kernel.org>
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH] arm64: clear_page[s] using memset
2026-03-06 8:57 [PATCH] arm64: clear_page[s] using memset Linus Walleij
@ 2026-04-02 20:57 ` Catalin Marinas
0 siblings, 0 replies; 2+ messages in thread
From: Catalin Marinas @ 2026-04-02 20:57 UTC (permalink / raw)
To: Linus Walleij
Cc: Will Deacon, Marc Zyngier, Oliver Upton, Joey Gouly,
Suzuki K Poulose, Zenghui Yu, linux-arm-kernel, kvmarm
On Fri, Mar 06, 2026 at 09:57:50AM +0100, Linus Walleij wrote:
> There is no need to try to second-guess the compiler when
> clearing memory. Just call memset() like everyone else.
Hmm, that "like everyone else" made me think - why not move this to
generic code and only the 1-2 platforms that need their own should
override it? Could we do the same with copy_page()?
Sorry, more work all of a sudden ;).
> Since memset() already has an architecture-local MOPS
> optimization, we do not need to do anything else to preserve
> the MOPS optimization.
The custom clear_page() had the (very small) advantage that it can skip
the length/alignment checks as they are always page-size.
> While at it, implement the shorthand for directly calling
> the new prototype clear_pages() for larger page chunks.
>
> No performance regressions can be seen, the fastpath
> benchmarks differences are in the noise.
I assume the benchmarks ran on real hardware (had to ask, last time you
mentioned qemu ;)).
--
Catalin
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-04-02 20:57 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-06 8:57 [PATCH] arm64: clear_page[s] using memset Linus Walleij
2026-04-02 20:57 ` Catalin Marinas
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox