From: He Ying <heying24@huawei.com>
To: <catalin.marinas@arm.com>, <will@kernel.org>,
<mark.rutland@arm.com>, <marcan@marcan.st>, <maz@kernel.org>,
<joey.gouly@arm.com>, <pcc@google.com>
Cc: <linux-arm-kernel@lists.infradead.org>,
<linux-kernel@vger.kernel.org>, <heying24@huawei.com>
Subject: [PATCH] arm64: Make CONFIG_ARM64_PSEUDO_NMI macro wrap all the pseudo-NMI code
Date: Fri, 7 Jan 2022 03:55:36 -0500 [thread overview]
Message-ID: <20220107085536.214501-1-heying24@huawei.com> (raw)
Our product has been updating its kernel from 4.4 to 5.10 recently and
found a performance issue. We do a bussiness test called ARP test, which
tests the latency for a ping-pong packets traffic with a certain payload.
The result is as following.
- 4.4 kernel: avg = ~20s
- 5.10 kernel (CONFIG_ARM64_PSEUDO_NMI is not set): avg = ~40s
I have been just learning arm64 pseudo-NMI code and have a question,
why is the related code not wrapped by CONFIG_ARM64_PSEUDO_NMI?
I wonder if this brings some performance regression.
First, I make this patch and then do the test again. Here's the result.
- 5.10 kernel with this patch not applied: avg = ~40s
- 5.10 kernel with this patch applied: avg = ~23s
Amazing! Note that all kernel is built with CONFIG_ARM64_PSEUDO_NMI not
set. It seems the pseudo-NMI feature actually brings some overhead to
performance event if CONFIG_ARM64_PSEUDO_NMI is not set.
Furthermore, I find the feature also brings some overhead to vmlinux size.
I build 5.10 kernel with this patch applied or not while
CONFIG_ARM64_PSEUDO_NMI is not set.
- 5.10 kernel with this patch not applied: vmlinux size is 384060600 Bytes.
- 5.10 kernel with this patch applied: vmlinux size is 383842936 Bytes.
That means arm64 pseudo-NMI feature may bring ~200KB overhead to
vmlinux size.
Above all, arm64 pseudo-NMI feature brings some overhead to vmlinux size
and performance even if config is not set. To avoid it, add macro control
all around the related code.
Signed-off-by: He Ying <heying24@huawei.com>
---
arch/arm64/include/asm/irqflags.h | 38 +++++++++++++++++++++++++++++--
arch/arm64/kernel/entry.S | 4 ++++
2 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index b57b9b1e4344..82f771b41cf5 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -26,6 +26,7 @@
*/
static inline void arch_local_irq_enable(void)
{
+#ifdef CONFIG_ARM64_PSEUDO_NMI
if (system_has_prio_mask_debugging()) {
u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
@@ -41,10 +42,18 @@ static inline void arch_local_irq_enable(void)
: "memory");
pmr_sync();
+#else
+ asm volatile(
+ "msr daifclr, #3 // arch_local_irq_enable"
+ :
+ :
+ : "memory");
+#endif
}
static inline void arch_local_irq_disable(void)
{
+#ifdef CONFIG_ARM64_PSEUDO_NMI
if (system_has_prio_mask_debugging()) {
u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
@@ -58,6 +67,13 @@ static inline void arch_local_irq_disable(void)
:
: "r" ((unsigned long) GIC_PRIO_IRQOFF)
: "memory");
+#else
+ asm volatile(
+ "msr daifset, #3 // arch_local_irq_disable"
+ :
+ :
+ : "memory");
+#endif
}
/*
@@ -66,7 +82,7 @@ static inline void arch_local_irq_disable(void)
static inline unsigned long arch_local_save_flags(void)
{
unsigned long flags;
-
+#ifdef CONFIG_ARM64_PSEUDO_NMI
asm volatile(ALTERNATIVE(
"mrs %0, daif",
__mrs_s("%0", SYS_ICC_PMR_EL1),
@@ -74,12 +90,19 @@ static inline unsigned long arch_local_save_flags(void)
: "=&r" (flags)
:
: "memory");
-
+#else
+ asm volatile(
+ "mrs %0, daif"
+ : "=r" (flags)
+ :
+ : "memory");
+#endif
return flags;
}
static inline int arch_irqs_disabled_flags(unsigned long flags)
{
+#ifdef CONFIG_ARM64_PSEUDO_NMI
int res;
asm volatile(ALTERNATIVE(
@@ -91,6 +114,9 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
: "memory");
return res;
+#else
+ return flags & PSR_I_BIT;
+#endif
}
static inline int arch_irqs_disabled(void)
@@ -119,6 +145,7 @@ static inline unsigned long arch_local_irq_save(void)
*/
static inline void arch_local_irq_restore(unsigned long flags)
{
+#ifdef CONFIG_ARM64_PSEUDO_NMI
asm volatile(ALTERNATIVE(
"msr daif, %0",
__msr_s(SYS_ICC_PMR_EL1, "%0"),
@@ -128,6 +155,13 @@ static inline void arch_local_irq_restore(unsigned long flags)
: "memory");
pmr_sync();
+#else
+ asm volatile(
+ "msr daif, %0"
+ :
+ : "r" (flags)
+ : "memory");
+#endif
}
#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2f69ae43941d..ffc32d3d909a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -300,6 +300,7 @@ alternative_else_nop_endif
str w21, [sp, #S_SYSCALLNO]
.endif
+#ifdef CONFIG_ARM64_PSEUDO_NMI
/* Save pmr */
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mrs_s x20, SYS_ICC_PMR_EL1
@@ -307,6 +308,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
msr_s SYS_ICC_PMR_EL1, x20
alternative_else_nop_endif
+#endif
/* Re-enable tag checking (TCO set on exception entry) */
#ifdef CONFIG_ARM64_MTE
@@ -330,6 +332,7 @@ alternative_else_nop_endif
disable_daif
.endif
+#ifdef CONFIG_ARM64_PSEUDO_NMI
/* Restore pmr */
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
ldr x20, [sp, #S_PMR_SAVE]
@@ -339,6 +342,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
dsb sy // Ensure priority change is seen by redistributor
.L__skip_pmr_sync\@:
alternative_else_nop_endif
+#endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
--
2.17.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next reply other threads:[~2022-01-07 8:56 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-07 8:55 He Ying [this message]
2022-01-07 13:19 ` [PATCH] arm64: Make CONFIG_ARM64_PSEUDO_NMI macro wrap all the pseudo-NMI code Mark Rutland
2022-01-10 3:00 ` He Ying
2022-01-10 11:26 ` Mark Rutland
2022-01-11 8:52 ` He Ying
2022-01-11 11:05 ` Mark Rutland
2022-01-08 12:51 ` Marc Zyngier
2022-01-10 3:20 ` He Ying
2022-01-12 3:24 ` [PATCH] arm64: entry: Save some nops when CONFIG_ARM64_PSEUDO_NMI is not set He Ying
2022-01-19 6:40 ` He Ying
2022-01-19 9:35 ` Mark Rutland
2022-01-19 9:47 ` He Ying
2022-02-15 23:18 ` Will Deacon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220107085536.214501-1-heying24@huawei.com \
--to=heying24@huawei.com \
--cc=catalin.marinas@arm.com \
--cc=joey.gouly@arm.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=marcan@marcan.st \
--cc=mark.rutland@arm.com \
--cc=maz@kernel.org \
--cc=pcc@google.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).