From: Jisheng Zhang <jszhang3@mail.ustc.edu.cn>
To: Paul Walmsley <paul.walmsley@sifive.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Albert Ou <aou@eecs.berkeley.edu>
Cc: linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org
Subject: [PATCH] riscv: Turn has_fpu into a static key if FPU
Date: Wed, 12 May 2021 02:01:52 +0800 [thread overview]
Message-ID: <20210512020152.200a182d@xhacker> (raw)
From: Jisheng Zhang <jszhang@kernel.org>
The has_fpu check sits at hot code path: switch_to(). Currently, has_fpu
is a bool variable if FPU, switch_to() checks it each time, we can
optimize out this check by turning the has_fpu into a static key.
Before the patch:
...
ccc7c783 lbu a5,-820(a5) # ffffffff80cab098 <has_fpu>
e3b5 bnez a5,ffffffff8031a438 <__schedule+0x232>
...
After the patch:
If FPU isn't supported:
...
00000013 nop
...
If FPU is supported:
...
0540006f j ffffffff8031a418 <__schedule+0x21a>
...
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
arch/riscv/include/asm/switch_to.h | 11 ++++++++---
arch/riscv/kernel/cpufeature.c | 4 ++--
arch/riscv/kernel/process.c | 2 +-
arch/riscv/kernel/signal.c | 4 ++--
4 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 407bcc96a710..ea25ccb7785e 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -6,6 +6,7 @@
#ifndef _ASM_RISCV_SWITCH_TO_H
#define _ASM_RISCV_SWITCH_TO_H
+#include <linux/jump_label.h>
#include <linux/sched/task_stack.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
@@ -55,9 +56,13 @@ static inline void __switch_to_aux(struct task_struct *prev,
fstate_restore(next, task_pt_regs(next));
}
-extern bool has_fpu;
+extern struct static_key_false cpu_hwcap_fpu;
+static __always_inline bool has_fpu(void)
+{
+ return static_branch_unlikely(&cpu_hwcap_fpu);
+}
#else
-#define has_fpu false
+static __always_inline bool has_fpu(void) { return false; }
#define fstate_save(task, regs) do { } while (0)
#define fstate_restore(task, regs) do { } while (0)
#define __switch_to_aux(__prev, __next) do { } while (0)
@@ -70,7 +75,7 @@ extern struct task_struct *__switch_to(struct task_struct *,
do { \
struct task_struct *__prev = (prev); \
struct task_struct *__next = (next); \
- if (has_fpu) \
+ if (has_fpu()) \
__switch_to_aux(__prev, __next); \
((last) = __switch_to(__prev, __next)); \
} while (0)
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index ac202f44a670..a2848dc36927 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -19,7 +19,7 @@ unsigned long elf_hwcap __read_mostly;
static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
#ifdef CONFIG_FPU
-bool has_fpu __read_mostly;
+__ro_after_init DEFINE_STATIC_KEY_FALSE(cpu_hwcap_fpu);
#endif
/**
@@ -146,6 +146,6 @@ void riscv_fill_hwcap(void)
#ifdef CONFIG_FPU
if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
- has_fpu = true;
+ static_branch_enable(&cpu_hwcap_fpu);
#endif
}
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index f9cd57c9c67d..03ac3aa611f5 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -87,7 +87,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
regs->status = SR_PIE;
- if (has_fpu) {
+ if (has_fpu()) {
regs->status |= SR_FS_INITIAL;
/*
* Restore the initial value to the FP register
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 65942b3748b4..c2d5ecbe5526 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -90,7 +90,7 @@ static long restore_sigcontext(struct pt_regs *regs,
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs));
/* Restore the floating-point state. */
- if (has_fpu)
+ if (has_fpu())
err |= restore_fp_state(regs, &sc->sc_fpregs);
return err;
}
@@ -143,7 +143,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs));
/* Save the floating-point state. */
- if (has_fpu)
+ if (has_fpu())
err |= save_fp_state(regs, &sc->sc_fpregs);
return err;
}
--
2.31.0
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
WARNING: multiple messages have this Message-ID (diff)
From: Jisheng Zhang <jszhang3@mail.ustc.edu.cn>
To: Paul Walmsley <paul.walmsley@sifive.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Albert Ou <aou@eecs.berkeley.edu>
Cc: linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org
Subject: [PATCH] riscv: Turn has_fpu into a static key if FPU
Date: Wed, 12 May 2021 02:01:52 +0800 [thread overview]
Message-ID: <20210512020152.200a182d@xhacker> (raw)
From: Jisheng Zhang <jszhang@kernel.org>
The has_fpu check sits at hot code path: switch_to(). Currently, has_fpu
is a bool variable if FPU, switch_to() checks it each time, we can
optimize out this check by turning the has_fpu into a static key.
Before the patch:
...
ccc7c783 lbu a5,-820(a5) # ffffffff80cab098 <has_fpu>
e3b5 bnez a5,ffffffff8031a438 <__schedule+0x232>
...
After the patch:
If FPU isn't supported:
...
00000013 nop
...
If FPU is supported:
...
0540006f j ffffffff8031a418 <__schedule+0x21a>
...
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
arch/riscv/include/asm/switch_to.h | 11 ++++++++---
arch/riscv/kernel/cpufeature.c | 4 ++--
arch/riscv/kernel/process.c | 2 +-
arch/riscv/kernel/signal.c | 4 ++--
4 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 407bcc96a710..ea25ccb7785e 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -6,6 +6,7 @@
#ifndef _ASM_RISCV_SWITCH_TO_H
#define _ASM_RISCV_SWITCH_TO_H
+#include <linux/jump_label.h>
#include <linux/sched/task_stack.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
@@ -55,9 +56,13 @@ static inline void __switch_to_aux(struct task_struct *prev,
fstate_restore(next, task_pt_regs(next));
}
-extern bool has_fpu;
+extern struct static_key_false cpu_hwcap_fpu;
+static __always_inline bool has_fpu(void)
+{
+ return static_branch_unlikely(&cpu_hwcap_fpu);
+}
#else
-#define has_fpu false
+static __always_inline bool has_fpu(void) { return false; }
#define fstate_save(task, regs) do { } while (0)
#define fstate_restore(task, regs) do { } while (0)
#define __switch_to_aux(__prev, __next) do { } while (0)
@@ -70,7 +75,7 @@ extern struct task_struct *__switch_to(struct task_struct *,
do { \
struct task_struct *__prev = (prev); \
struct task_struct *__next = (next); \
- if (has_fpu) \
+ if (has_fpu()) \
__switch_to_aux(__prev, __next); \
((last) = __switch_to(__prev, __next)); \
} while (0)
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index ac202f44a670..a2848dc36927 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -19,7 +19,7 @@ unsigned long elf_hwcap __read_mostly;
static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
#ifdef CONFIG_FPU
-bool has_fpu __read_mostly;
+__ro_after_init DEFINE_STATIC_KEY_FALSE(cpu_hwcap_fpu);
#endif
/**
@@ -146,6 +146,6 @@ void riscv_fill_hwcap(void)
#ifdef CONFIG_FPU
if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
- has_fpu = true;
+ static_branch_enable(&cpu_hwcap_fpu);
#endif
}
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index f9cd57c9c67d..03ac3aa611f5 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -87,7 +87,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
regs->status = SR_PIE;
- if (has_fpu) {
+ if (has_fpu()) {
regs->status |= SR_FS_INITIAL;
/*
* Restore the initial value to the FP register
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 65942b3748b4..c2d5ecbe5526 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -90,7 +90,7 @@ static long restore_sigcontext(struct pt_regs *regs,
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs));
/* Restore the floating-point state. */
- if (has_fpu)
+ if (has_fpu())
err |= restore_fp_state(regs, &sc->sc_fpregs);
return err;
}
@@ -143,7 +143,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs));
/* Save the floating-point state. */
- if (has_fpu)
+ if (has_fpu())
err |= save_fp_state(regs, &sc->sc_fpregs);
return err;
}
--
2.31.0
next reply other threads:[~2021-05-11 18:07 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-11 18:01 Jisheng Zhang [this message]
2021-05-11 18:01 ` [PATCH] riscv: Turn has_fpu into a static key if FPU Jisheng Zhang
2021-05-26 5:32 ` Palmer Dabbelt
2021-05-26 5:32 ` Palmer Dabbelt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210512020152.200a182d@xhacker \
--to=jszhang3@mail.ustc.edu.cn \
--cc=aou@eecs.berkeley.edu \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=palmer@dabbelt.com \
--cc=paul.walmsley@sifive.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.