* [v5, 1/6] riscv: Add support for kernel mode vector
2023-12-14 15:57 [v5, 0/6] riscv: support kernel-mode Vector Andy Chiu
@ 2023-12-14 15:57 ` Andy Chiu
2023-12-15 6:24 ` Charlie Jenkins
2023-12-14 15:57 ` [v5, 2/6] riscv: vector: make Vector always available for softirq context Andy Chiu
` (4 subsequent siblings)
5 siblings, 1 reply; 16+ messages in thread
From: Andy Chiu @ 2023-12-14 15:57 UTC (permalink / raw)
To: linux-riscv, palmer
Cc: greentime.hu, guoren, bjorn, charlie, ardb, arnd, Vincent Chen,
Andy Chiu, Paul Walmsley, Albert Ou, Heiko Stuebner, Conor Dooley,
Clément Léger, Guo Ren, Xiao Wang,
Björn Töpel, Alexandre Ghiti, Sami Tolvanen,
Sia Jee Heng, Jisheng Zhang, Peter Zijlstra
From: Greentime Hu <greentime.hu@sifive.com>
Add kernel_vector_begin() and kernel_vector_end() function declarations
and corresponding definitions in kernel_mode_vector.c
These are needed to wrap uses of vector in kernel mode.
Co-developed-by: Vincent Chen <vincent.chen@sifive.com>
Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
---
Changelog v4:
- Use kernel_v_flags and helpers to track vector context.
Changelog v3:
- Reorder patch 1 to patch 3 to make use of
{get,put}_cpu_vector_context later.
- Export {get,put}_cpu_vector_context.
- Save V context after disabling preemption. (Guo)
- Fix a build fail. (Conor)
- Remove irqs_disabled() check as it is not needed, fix styling. (Björn)
Changelog v2:
- 's/kernel_rvv/kernel_vector' and return void in kernel_vector_begin
(Conor)
- export may_use_simd to include/asm/simd.h
---
arch/riscv/include/asm/processor.h | 15 +++-
arch/riscv/include/asm/simd.h | 42 ++++++++++++
arch/riscv/include/asm/vector.h | 21 ++++++
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/kernel_mode_vector.c | 95 ++++++++++++++++++++++++++
arch/riscv/kernel/process.c | 2 +-
6 files changed, 174 insertions(+), 2 deletions(-)
create mode 100644 arch/riscv/include/asm/simd.h
create mode 100644 arch/riscv/kernel/kernel_mode_vector.c
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index f19f861cda54..a47763c262e1 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -73,6 +73,18 @@
struct task_struct;
struct pt_regs;
+/*
+ * We use a flag to track in-kernel Vector context. Currently the flag has the
+ * following meaning:
+ *
+ * - bit 0 indicates whether the in-kernel Vector context is active. The
+ * activation of this state disables the preemption.
+ */
+
+#define RISCV_KERNEL_MODE_V_MASK 0x1
+
+#define RISCV_KERNEL_MODE_V 0x1
+
/* CPU-specific state of a task */
struct thread_struct {
/* Callee-saved registers */
@@ -81,7 +93,8 @@ struct thread_struct {
unsigned long s[12]; /* s[0]: frame pointer */
struct __riscv_d_ext_state fstate;
unsigned long bad_cause;
- unsigned long vstate_ctrl;
+ u32 riscv_v_flags;
+ u32 vstate_ctrl;
struct __riscv_v_ext_state vstate;
unsigned long align_ctl;
};
diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
new file mode 100644
index 000000000000..269752bfa2cc
--- /dev/null
+++ b/arch/riscv/include/asm/simd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_RISCV_ISA_V
+/*
+ * may_use_simd - whether it is allowable at this time to issue vector
+ * instructions or access the vector register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+ /*
+ * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
+ * and is clear whenever preemption is enabled.
+ */
+ return !in_hardirq() && !in_nmi() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
+}
+
+#else /* ! CONFIG_RISCV_ISA_V */
+
+static __must_check inline bool may_use_simd(void)
+{
+ return false;
+}
+
+#endif /* ! CONFIG_RISCV_ISA_V */
+
+#endif
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 87aaef656257..6254830c0668 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -22,6 +22,27 @@
extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void);
bool riscv_v_first_use_handler(struct pt_regs *regs);
+void kernel_vector_begin(void);
+void kernel_vector_end(void);
+void get_cpu_vector_context(void);
+void put_cpu_vector_context(void);
+
+static inline void riscv_v_ctx_cnt_add(u32 offset)
+{
+ current->thread.riscv_v_flags += offset;
+ barrier();
+}
+
+static inline void riscv_v_ctx_cnt_sub(u32 offset)
+{
+ barrier();
+ current->thread.riscv_v_flags -= offset;
+}
+
+static inline u32 riscv_v_ctx_cnt(void)
+{
+ return READ_ONCE(current->thread.riscv_v_flags);
+}
static __always_inline bool has_vector(void)
{
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index fee22a3d1b53..8c58595696b3 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o
+obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
new file mode 100644
index 000000000000..c9ccf21dd16c
--- /dev/null
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/simd.h>
+
+/*
+ * Claim ownership of the CPU vector context for use by the calling context.
+ *
+ * The caller may freely manipulate the vector context metadata until
+ * put_cpu_vector_context() is called.
+ */
+void get_cpu_vector_context(void)
+{
+ preempt_disable();
+
+ WARN_ON(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
+ riscv_v_ctx_cnt_add(RISCV_KERNEL_MODE_V);
+}
+
+/*
+ * Release the CPU vector context.
+ *
+ * Must be called from a context in which get_cpu_vector_context() was
+ * previously called, with no call to put_cpu_vector_context() in the
+ * meantime.
+ */
+void put_cpu_vector_context(void)
+{
+ WARN_ON(!(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK));
+ riscv_v_ctx_cnt_sub(RISCV_KERNEL_MODE_V);
+
+ preempt_enable();
+}
+
+/*
+ * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the vector registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_vector_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the vector registers until kernel_vector_end() is
+ * called.
+ */
+void kernel_vector_begin(void)
+{
+ if (WARN_ON(!has_vector()))
+ return;
+
+ BUG_ON(!may_use_simd());
+
+ get_cpu_vector_context();
+
+ riscv_v_vstate_save(current, task_pt_regs(current));
+
+ riscv_v_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_begin);
+
+/*
+ * kernel_vector_end(): give the CPU vector registers back to the current task
+ *
+ * Must be called from a context in which kernel_vector_begin() was previously
+ * called, with no call to kernel_vector_end() in the meantime.
+ *
+ * The caller must not use the vector registers after this function is called,
+ * unless kernel_vector_begin() is called again in the meantime.
+ */
+void kernel_vector_end(void)
+{
+ if (WARN_ON(!has_vector()))
+ return;
+
+ riscv_v_vstate_restore(current, task_pt_regs(current));
+
+ riscv_v_disable();
+
+ put_cpu_vector_context();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_end);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 4f21d970a129..5c4dcf518684 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -187,7 +187,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*dst = *src;
/* clear entire V context, including datap for a new task */
memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
-
return 0;
}
@@ -221,6 +220,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
childregs->a0 = 0; /* Return value of fork() */
p->thread.s[0] = 0;
}
+ p->thread.riscv_v_flags = 0;
p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
--
2.17.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [v5, 1/6] riscv: Add support for kernel mode vector
2023-12-14 15:57 ` [v5, 1/6] riscv: Add support for kernel mode vector Andy Chiu
@ 2023-12-15 6:24 ` Charlie Jenkins
2023-12-15 16:01 ` Andy Chiu
0 siblings, 1 reply; 16+ messages in thread
From: Charlie Jenkins @ 2023-12-15 6:24 UTC (permalink / raw)
To: Andy Chiu
Cc: linux-riscv, palmer, greentime.hu, guoren, bjorn, ardb, arnd,
Vincent Chen, Paul Walmsley, Albert Ou, Heiko Stuebner,
Conor Dooley, Clément Léger, Guo Ren, Xiao Wang,
Björn Töpel, Alexandre Ghiti, Sami Tolvanen,
Sia Jee Heng, Jisheng Zhang, Peter Zijlstra
On Thu, Dec 14, 2023 at 03:57:16PM +0000, Andy Chiu wrote:
> From: Greentime Hu <greentime.hu@sifive.com>
>
> Add kernel_vector_begin() and kernel_vector_end() function declarations
> and corresponding definitions in kernel_mode_vector.c
>
> These are needed to wrap uses of vector in kernel mode.
>
> Co-developed-by: Vincent Chen <vincent.chen@sifive.com>
> Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
> Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> ---
> Changelog v4:
> - Use kernel_v_flags and helpers to track vector context.
> Changelog v3:
> - Reorder patch 1 to patch 3 to make use of
> {get,put}_cpu_vector_context later.
> - Export {get,put}_cpu_vector_context.
> - Save V context after disabling preemption. (Guo)
> - Fix a build fail. (Conor)
> - Remove irqs_disabled() check as it is not needed, fix styling. (Björn)
> Changelog v2:
> - 's/kernel_rvv/kernel_vector' and return void in kernel_vector_begin
> (Conor)
> - export may_use_simd to include/asm/simd.h
> ---
> arch/riscv/include/asm/processor.h | 15 +++-
> arch/riscv/include/asm/simd.h | 42 ++++++++++++
> arch/riscv/include/asm/vector.h | 21 ++++++
> arch/riscv/kernel/Makefile | 1 +
> arch/riscv/kernel/kernel_mode_vector.c | 95 ++++++++++++++++++++++++++
> arch/riscv/kernel/process.c | 2 +-
> 6 files changed, 174 insertions(+), 2 deletions(-)
> create mode 100644 arch/riscv/include/asm/simd.h
> create mode 100644 arch/riscv/kernel/kernel_mode_vector.c
>
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index f19f861cda54..a47763c262e1 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -73,6 +73,18 @@
> struct task_struct;
> struct pt_regs;
>
> +/*
> + * We use a flag to track in-kernel Vector context. Currently the flag has the
> + * following meaning:
> + *
> + * - bit 0 indicates whether the in-kernel Vector context is active. The
> + * activation of this state disables the preemption.
> + */
> +
> +#define RISCV_KERNEL_MODE_V_MASK 0x1
> +
> +#define RISCV_KERNEL_MODE_V 0x1
> +
> /* CPU-specific state of a task */
> struct thread_struct {
> /* Callee-saved registers */
> @@ -81,7 +93,8 @@ struct thread_struct {
> unsigned long s[12]; /* s[0]: frame pointer */
> struct __riscv_d_ext_state fstate;
> unsigned long bad_cause;
> - unsigned long vstate_ctrl;
> + u32 riscv_v_flags;
> + u32 vstate_ctrl;
> struct __riscv_v_ext_state vstate;
> unsigned long align_ctl;
> };
> diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
> new file mode 100644
> index 000000000000..269752bfa2cc
> --- /dev/null
> +++ b/arch/riscv/include/asm/simd.h
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> + * Copyright (C) 2023 SiFive
> + */
> +
> +#ifndef __ASM_SIMD_H
> +#define __ASM_SIMD_H
> +
> +#include <linux/compiler.h>
> +#include <linux/irqflags.h>
> +#include <linux/percpu.h>
> +#include <linux/preempt.h>
> +#include <linux/types.h>
> +
> +#ifdef CONFIG_RISCV_ISA_V
> +/*
> + * may_use_simd - whether it is allowable at this time to issue vector
> + * instructions or access the vector register file
> + *
> + * Callers must not assume that the result remains true beyond the next
> + * preempt_enable() or return from softirq context.
> + */
> +static __must_check inline bool may_use_simd(void)
> +{
> + /*
> + * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
> + * and is clear whenever preemption is enabled.
> + */
> + return !in_hardirq() && !in_nmi() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
> +}
> +
> +#else /* ! CONFIG_RISCV_ISA_V */
> +
> +static __must_check inline bool may_use_simd(void)
> +{
> + return false;
> +}
> +
> +#endif /* ! CONFIG_RISCV_ISA_V */
> +
> +#endif
> diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> index 87aaef656257..6254830c0668 100644
> --- a/arch/riscv/include/asm/vector.h
> +++ b/arch/riscv/include/asm/vector.h
> @@ -22,6 +22,27 @@
> extern unsigned long riscv_v_vsize;
> int riscv_v_setup_vsize(void);
> bool riscv_v_first_use_handler(struct pt_regs *regs);
> +void kernel_vector_begin(void);
> +void kernel_vector_end(void);
> +void get_cpu_vector_context(void);
> +void put_cpu_vector_context(void);
> +
> +static inline void riscv_v_ctx_cnt_add(u32 offset)
> +{
> + current->thread.riscv_v_flags += offset;
> + barrier();
> +}
> +
> +static inline void riscv_v_ctx_cnt_sub(u32 offset)
> +{
> + barrier();
> + current->thread.riscv_v_flags -= offset;
> +}
> +
> +static inline u32 riscv_v_ctx_cnt(void)
> +{
> + return READ_ONCE(current->thread.riscv_v_flags);
> +}
>
> static __always_inline bool has_vector(void)
> {
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index fee22a3d1b53..8c58595696b3 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -63,6 +63,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
> obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
> obj-$(CONFIG_FPU) += fpu.o
> obj-$(CONFIG_RISCV_ISA_V) += vector.o
> +obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
> obj-$(CONFIG_SMP) += smpboot.o
> obj-$(CONFIG_SMP) += smp.o
> obj-$(CONFIG_SMP) += cpu_ops.o
> diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
> new file mode 100644
> index 000000000000..c9ccf21dd16c
> --- /dev/null
> +++ b/arch/riscv/kernel/kernel_mode_vector.c
> @@ -0,0 +1,95 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2012 ARM Ltd.
> + * Author: Catalin Marinas <catalin.marinas@arm.com>
> + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> + * Copyright (C) 2021 SiFive
> + */
> +#include <linux/compiler.h>
> +#include <linux/irqflags.h>
> +#include <linux/percpu.h>
> +#include <linux/preempt.h>
> +#include <linux/types.h>
> +
> +#include <asm/vector.h>
> +#include <asm/switch_to.h>
> +#include <asm/simd.h>
> +
> +/*
> + * Claim ownership of the CPU vector context for use by the calling context.
> + *
> + * The caller may freely manipulate the vector context metadata until
> + * put_cpu_vector_context() is called.
> + */
> +void get_cpu_vector_context(void)
> +{
> + preempt_disable();
> +
> + WARN_ON(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
This is a bigger issue than a warn. Calling riscv_v_ctx_cnt_add with
the same flag an even number of times will cause (riscv_v_ctx_cnt() &
RISCV_KERNEL_MODE_V_MASK) to return 0, even though vector is being used.
This could be solved by using a bitwise or instead of addition when
setting the flag.
> + riscv_v_ctx_cnt_add(RISCV_KERNEL_MODE_V);
> +}
> +
> +/*
> + * Release the CPU vector context.
> + *
> + * Must be called from a context in which get_cpu_vector_context() was
> + * previously called, with no call to put_cpu_vector_context() in the
> + * meantime.
> + */
> +void put_cpu_vector_context(void)
> +{
> + WARN_ON(!(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK));
> + riscv_v_ctx_cnt_sub(RISCV_KERNEL_MODE_V);
> +
> + preempt_enable();
> +}
> +
> +/*
> + * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
> + * context
> + *
> + * Must not be called unless may_use_simd() returns true.
> + * Task context in the vector registers is saved back to memory as necessary.
> + *
> + * A matching call to kernel_vector_end() must be made before returning from the
> + * calling context.
> + *
> + * The caller may freely use the vector registers until kernel_vector_end() is
> + * called.
> + */
> +void kernel_vector_begin(void)
> +{
> + if (WARN_ON(!has_vector()))
Should this be WARN_ONCE? If somebody runs a kernel compiled with vector
on hardware without vector, this warning has the potential to be thrown
an excessive amount of times.
> + return;
> +
> + BUG_ON(!may_use_simd());
> +
> + get_cpu_vector_context();
> +
> + riscv_v_vstate_save(current, task_pt_regs(current));
> +
> + riscv_v_enable();
> +}
> +EXPORT_SYMBOL_GPL(kernel_vector_begin);
> +
> +/*
> + * kernel_vector_end(): give the CPU vector registers back to the current task
> + *
> + * Must be called from a context in which kernel_vector_begin() was previously
> + * called, with no call to kernel_vector_end() in the meantime.
> + *
> + * The caller must not use the vector registers after this function is called,
> + * unless kernel_vector_begin() is called again in the meantime.
> + */
> +void kernel_vector_end(void)
> +{
> + if (WARN_ON(!has_vector()))
Same as above.
- Charlie
>+ return;
> +
> + riscv_v_vstate_restore(current, task_pt_regs(current));
> +
> + riscv_v_disable();
> +
> + put_cpu_vector_context();
> +}
> +EXPORT_SYMBOL_GPL(kernel_vector_end);
> diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> index 4f21d970a129..5c4dcf518684 100644
> --- a/arch/riscv/kernel/process.c
> +++ b/arch/riscv/kernel/process.c
> @@ -187,7 +187,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> *dst = *src;
> /* clear entire V context, including datap for a new task */
> memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
> -
> return 0;
> }
>
> @@ -221,6 +220,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> childregs->a0 = 0; /* Return value of fork() */
> p->thread.s[0] = 0;
> }
> + p->thread.riscv_v_flags = 0;
> p->thread.ra = (unsigned long)ret_from_fork;
> p->thread.sp = (unsigned long)childregs; /* kernel sp */
> return 0;
> --
> 2.17.1
>
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [v5, 1/6] riscv: Add support for kernel mode vector
2023-12-15 6:24 ` Charlie Jenkins
@ 2023-12-15 16:01 ` Andy Chiu
2023-12-15 18:41 ` Charlie Jenkins
0 siblings, 1 reply; 16+ messages in thread
From: Andy Chiu @ 2023-12-15 16:01 UTC (permalink / raw)
To: Charlie Jenkins
Cc: linux-riscv, palmer, greentime.hu, guoren, bjorn, ardb, arnd,
Vincent Chen, Paul Walmsley, Albert Ou, Heiko Stuebner,
Conor Dooley, Clément Léger, Guo Ren, Xiao Wang,
Björn Töpel, Alexandre Ghiti, Sami Tolvanen,
Sia Jee Heng, Jisheng Zhang, Peter Zijlstra
On Fri, Dec 15, 2023 at 2:24 PM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> On Thu, Dec 14, 2023 at 03:57:16PM +0000, Andy Chiu wrote:
> > From: Greentime Hu <greentime.hu@sifive.com>
> >
> > Add kernel_vector_begin() and kernel_vector_end() function declarations
> > and corresponding definitions in kernel_mode_vector.c
> >
> > These are needed to wrap uses of vector in kernel mode.
> >
> > Co-developed-by: Vincent Chen <vincent.chen@sifive.com>
> > Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
> > Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> > Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> > ---
> > Changelog v4:
> > - Use kernel_v_flags and helpers to track vector context.
> > Changelog v3:
> > - Reorder patch 1 to patch 3 to make use of
> > {get,put}_cpu_vector_context later.
> > - Export {get,put}_cpu_vector_context.
> > - Save V context after disabling preemption. (Guo)
> > - Fix a build fail. (Conor)
> > - Remove irqs_disabled() check as it is not needed, fix styling. (Björn)
> > Changelog v2:
> > - 's/kernel_rvv/kernel_vector' and return void in kernel_vector_begin
> > (Conor)
> > - export may_use_simd to include/asm/simd.h
> > ---
> > arch/riscv/include/asm/processor.h | 15 +++-
> > arch/riscv/include/asm/simd.h | 42 ++++++++++++
> > arch/riscv/include/asm/vector.h | 21 ++++++
> > arch/riscv/kernel/Makefile | 1 +
> > arch/riscv/kernel/kernel_mode_vector.c | 95 ++++++++++++++++++++++++++
> > arch/riscv/kernel/process.c | 2 +-
> > 6 files changed, 174 insertions(+), 2 deletions(-)
> > create mode 100644 arch/riscv/include/asm/simd.h
> > create mode 100644 arch/riscv/kernel/kernel_mode_vector.c
> >
> > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> > index f19f861cda54..a47763c262e1 100644
> > --- a/arch/riscv/include/asm/processor.h
> > +++ b/arch/riscv/include/asm/processor.h
> > @@ -73,6 +73,18 @@
> > struct task_struct;
> > struct pt_regs;
> >
> > +/*
> > + * We use a flag to track in-kernel Vector context. Currently the flag has the
> > + * following meaning:
> > + *
> > + * - bit 0 indicates whether the in-kernel Vector context is active. The
> > + * activation of this state disables the preemption.
> > + */
> > +
> > +#define RISCV_KERNEL_MODE_V_MASK 0x1
> > +
> > +#define RISCV_KERNEL_MODE_V 0x1
> > +
> > /* CPU-specific state of a task */
> > struct thread_struct {
> > /* Callee-saved registers */
> > @@ -81,7 +93,8 @@ struct thread_struct {
> > unsigned long s[12]; /* s[0]: frame pointer */
> > struct __riscv_d_ext_state fstate;
> > unsigned long bad_cause;
> > - unsigned long vstate_ctrl;
> > + u32 riscv_v_flags;
> > + u32 vstate_ctrl;
> > struct __riscv_v_ext_state vstate;
> > unsigned long align_ctl;
> > };
> > diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
> > new file mode 100644
> > index 000000000000..269752bfa2cc
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/simd.h
> > @@ -0,0 +1,42 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> > + * Copyright (C) 2023 SiFive
> > + */
> > +
> > +#ifndef __ASM_SIMD_H
> > +#define __ASM_SIMD_H
> > +
> > +#include <linux/compiler.h>
> > +#include <linux/irqflags.h>
> > +#include <linux/percpu.h>
> > +#include <linux/preempt.h>
> > +#include <linux/types.h>
> > +
> > +#ifdef CONFIG_RISCV_ISA_V
> > +/*
> > + * may_use_simd - whether it is allowable at this time to issue vector
> > + * instructions or access the vector register file
> > + *
> > + * Callers must not assume that the result remains true beyond the next
> > + * preempt_enable() or return from softirq context.
> > + */
> > +static __must_check inline bool may_use_simd(void)
> > +{
> > + /*
> > + * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
> > + * and is clear whenever preemption is enabled.
> > + */
> > + return !in_hardirq() && !in_nmi() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
> > +}
> > +
> > +#else /* ! CONFIG_RISCV_ISA_V */
> > +
> > +static __must_check inline bool may_use_simd(void)
> > +{
> > + return false;
> > +}
> > +
> > +#endif /* ! CONFIG_RISCV_ISA_V */
> > +
> > +#endif
> > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> > index 87aaef656257..6254830c0668 100644
> > --- a/arch/riscv/include/asm/vector.h
> > +++ b/arch/riscv/include/asm/vector.h
> > @@ -22,6 +22,27 @@
> > extern unsigned long riscv_v_vsize;
> > int riscv_v_setup_vsize(void);
> > bool riscv_v_first_use_handler(struct pt_regs *regs);
> > +void kernel_vector_begin(void);
> > +void kernel_vector_end(void);
> > +void get_cpu_vector_context(void);
> > +void put_cpu_vector_context(void);
> > +
> > +static inline void riscv_v_ctx_cnt_add(u32 offset)
> > +{
> > + current->thread.riscv_v_flags += offset;
> > + barrier();
> > +}
> > +
> > +static inline void riscv_v_ctx_cnt_sub(u32 offset)
> > +{
> > + barrier();
> > + current->thread.riscv_v_flags -= offset;
> > +}
> > +
> > +static inline u32 riscv_v_ctx_cnt(void)
> > +{
> > + return READ_ONCE(current->thread.riscv_v_flags);
> > +}
> >
> > static __always_inline bool has_vector(void)
> > {
> > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > index fee22a3d1b53..8c58595696b3 100644
> > --- a/arch/riscv/kernel/Makefile
> > +++ b/arch/riscv/kernel/Makefile
> > @@ -63,6 +63,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
> > obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
> > obj-$(CONFIG_FPU) += fpu.o
> > obj-$(CONFIG_RISCV_ISA_V) += vector.o
> > +obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
> > obj-$(CONFIG_SMP) += smpboot.o
> > obj-$(CONFIG_SMP) += smp.o
> > obj-$(CONFIG_SMP) += cpu_ops.o
> > diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
> > new file mode 100644
> > index 000000000000..c9ccf21dd16c
> > --- /dev/null
> > +++ b/arch/riscv/kernel/kernel_mode_vector.c
> > @@ -0,0 +1,95 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Copyright (C) 2012 ARM Ltd.
> > + * Author: Catalin Marinas <catalin.marinas@arm.com>
> > + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> > + * Copyright (C) 2021 SiFive
> > + */
> > +#include <linux/compiler.h>
> > +#include <linux/irqflags.h>
> > +#include <linux/percpu.h>
> > +#include <linux/preempt.h>
> > +#include <linux/types.h>
> > +
> > +#include <asm/vector.h>
> > +#include <asm/switch_to.h>
> > +#include <asm/simd.h>
> > +
> > +/*
> > + * Claim ownership of the CPU vector context for use by the calling context.
> > + *
> > + * The caller may freely manipulate the vector context metadata until
> > + * put_cpu_vector_context() is called.
> > + */
> > +void get_cpu_vector_context(void)
> > +{
> > + preempt_disable();
> > +
> > + WARN_ON(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
>
> This is a bigger issue than a warn. Calling riscv_v_ctx_cnt_add with
> the same flag an even number of times will cause (riscv_v_ctx_cnt() &
> RISCV_KERNEL_MODE_V_MASK) to return 0, even though vector is being used.
> This could be solved by using a bitwise or instead of addition when
> setting the flag.
Yes, we should use bitwise to operate it. At the same time, I am
thinking if we should allow calling kernel_vector_begin() multiple
times on a call chain. Or provide a fast check (in_kernel_vector()) in
a vectorized function to reduce the cost of calling multiple
kernel_vector_begin(). For example,
kernel_vector_begin()
memcpy()<- allow calling kernel_vector_begin/end() again
- or prevent calling it again with in_kernel_vector()
do_something_with_vector()
kernel_vector_end()
>
> > + riscv_v_ctx_cnt_add(RISCV_KERNEL_MODE_V);
> > +}
> > +
> > +/*
> > + * Release the CPU vector context.
> > + *
> > + * Must be called from a context in which get_cpu_vector_context() was
> > + * previously called, with no call to put_cpu_vector_context() in the
> > + * meantime.
> > + */
> > +void put_cpu_vector_context(void)
> > +{
> > + WARN_ON(!(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK));
> > + riscv_v_ctx_cnt_sub(RISCV_KERNEL_MODE_V);
> > +
> > + preempt_enable();
> > +}
> > +
> > +/*
> > + * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
> > + * context
> > + *
> > + * Must not be called unless may_use_simd() returns true.
> > + * Task context in the vector registers is saved back to memory as necessary.
> > + *
> > + * A matching call to kernel_vector_end() must be made before returning from the
> > + * calling context.
> > + *
> > + * The caller may freely use the vector registers until kernel_vector_end() is
> > + * called.
> > + */
> > +void kernel_vector_begin(void)
> > +{
> > + if (WARN_ON(!has_vector()))
>
> Should this be WARN_ONCE? If somebody runs a kernel compiled with vector
> on hardware without vector, this warning has the potential to be thrown
> an excessive amount of times.
Callers of this function should check with may_use_simd() and only
proceed to call this function if it returns true.
>
> > + return;
> > +
> > + BUG_ON(!may_use_simd());
> > +
> > + get_cpu_vector_context();
> > +
> > + riscv_v_vstate_save(current, task_pt_regs(current));
> > +
> > + riscv_v_enable();
> > +}
> > +EXPORT_SYMBOL_GPL(kernel_vector_begin);
> > +
> > +/*
> > + * kernel_vector_end(): give the CPU vector registers back to the current task
> > + *
> > + * Must be called from a context in which kernel_vector_begin() was previously
> > + * called, with no call to kernel_vector_end() in the meantime.
> > + *
> > + * The caller must not use the vector registers after this function is called,
> > + * unless kernel_vector_begin() is called again in the meantime.
> > + */
> > +void kernel_vector_end(void)
> > +{
> > + if (WARN_ON(!has_vector()))
>
> Same as above.
>
> - Charlie
>
> >+ return;
> > +
> > + riscv_v_vstate_restore(current, task_pt_regs(current));
> > +
> > + riscv_v_disable();
> > +
> > + put_cpu_vector_context();
> > +}
> > +EXPORT_SYMBOL_GPL(kernel_vector_end);
> > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> > index 4f21d970a129..5c4dcf518684 100644
> > --- a/arch/riscv/kernel/process.c
> > +++ b/arch/riscv/kernel/process.c
> > @@ -187,7 +187,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> > *dst = *src;
> > /* clear entire V context, including datap for a new task */
> > memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
> > -
> > return 0;
> > }
> >
> > @@ -221,6 +220,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> > childregs->a0 = 0; /* Return value of fork() */
> > p->thread.s[0] = 0;
> > }
> > + p->thread.riscv_v_flags = 0;
> > p->thread.ra = (unsigned long)ret_from_fork;
> > p->thread.sp = (unsigned long)childregs; /* kernel sp */
> > return 0;
> > --
> > 2.17.1
> >
Regards,
Andy
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [v5, 1/6] riscv: Add support for kernel mode vector
2023-12-15 16:01 ` Andy Chiu
@ 2023-12-15 18:41 ` Charlie Jenkins
2023-12-19 6:04 ` Andy Chiu
0 siblings, 1 reply; 16+ messages in thread
From: Charlie Jenkins @ 2023-12-15 18:41 UTC (permalink / raw)
To: Andy Chiu
Cc: linux-riscv, palmer, greentime.hu, guoren, bjorn, ardb, arnd,
Vincent Chen, Paul Walmsley, Albert Ou, Heiko Stuebner,
Conor Dooley, Clément Léger, Guo Ren, Xiao Wang,
Björn Töpel, Alexandre Ghiti, Sami Tolvanen,
Sia Jee Heng, Jisheng Zhang, Peter Zijlstra
On Sat, Dec 16, 2023 at 12:01:53AM +0800, Andy Chiu wrote:
> On Fri, Dec 15, 2023 at 2:24 PM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >
> > On Thu, Dec 14, 2023 at 03:57:16PM +0000, Andy Chiu wrote:
> > > From: Greentime Hu <greentime.hu@sifive.com>
> > >
> > > Add kernel_vector_begin() and kernel_vector_end() function declarations
> > > and corresponding definitions in kernel_mode_vector.c
> > >
> > > These are needed to wrap uses of vector in kernel mode.
> > >
> > > Co-developed-by: Vincent Chen <vincent.chen@sifive.com>
> > > Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
> > > Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> > > Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> > > ---
> > > Changelog v4:
> > > - Use kernel_v_flags and helpers to track vector context.
> > > Changelog v3:
> > > - Reorder patch 1 to patch 3 to make use of
> > > {get,put}_cpu_vector_context later.
> > > - Export {get,put}_cpu_vector_context.
> > > - Save V context after disabling preemption. (Guo)
> > > - Fix a build fail. (Conor)
> > > - Remove irqs_disabled() check as it is not needed, fix styling. (Björn)
> > > Changelog v2:
> > > - 's/kernel_rvv/kernel_vector' and return void in kernel_vector_begin
> > > (Conor)
> > > - export may_use_simd to include/asm/simd.h
> > > ---
> > > arch/riscv/include/asm/processor.h | 15 +++-
> > > arch/riscv/include/asm/simd.h | 42 ++++++++++++
> > > arch/riscv/include/asm/vector.h | 21 ++++++
> > > arch/riscv/kernel/Makefile | 1 +
> > > arch/riscv/kernel/kernel_mode_vector.c | 95 ++++++++++++++++++++++++++
> > > arch/riscv/kernel/process.c | 2 +-
> > > 6 files changed, 174 insertions(+), 2 deletions(-)
> > > create mode 100644 arch/riscv/include/asm/simd.h
> > > create mode 100644 arch/riscv/kernel/kernel_mode_vector.c
> > >
> > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> > > index f19f861cda54..a47763c262e1 100644
> > > --- a/arch/riscv/include/asm/processor.h
> > > +++ b/arch/riscv/include/asm/processor.h
> > > @@ -73,6 +73,18 @@
> > > struct task_struct;
> > > struct pt_regs;
> > >
> > > +/*
> > > + * We use a flag to track in-kernel Vector context. Currently the flag has the
> > > + * following meaning:
> > > + *
> > > + * - bit 0 indicates whether the in-kernel Vector context is active. The
> > > + * activation of this state disables the preemption.
> > > + */
> > > +
> > > +#define RISCV_KERNEL_MODE_V_MASK 0x1
> > > +
> > > +#define RISCV_KERNEL_MODE_V 0x1
> > > +
> > > /* CPU-specific state of a task */
> > > struct thread_struct {
> > > /* Callee-saved registers */
> > > @@ -81,7 +93,8 @@ struct thread_struct {
> > > unsigned long s[12]; /* s[0]: frame pointer */
> > > struct __riscv_d_ext_state fstate;
> > > unsigned long bad_cause;
> > > - unsigned long vstate_ctrl;
> > > + u32 riscv_v_flags;
> > > + u32 vstate_ctrl;
> > > struct __riscv_v_ext_state vstate;
> > > unsigned long align_ctl;
> > > };
> > > diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
> > > new file mode 100644
> > > index 000000000000..269752bfa2cc
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/simd.h
> > > @@ -0,0 +1,42 @@
> > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > +/*
> > > + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> > > + * Copyright (C) 2023 SiFive
> > > + */
> > > +
> > > +#ifndef __ASM_SIMD_H
> > > +#define __ASM_SIMD_H
> > > +
> > > +#include <linux/compiler.h>
> > > +#include <linux/irqflags.h>
> > > +#include <linux/percpu.h>
> > > +#include <linux/preempt.h>
> > > +#include <linux/types.h>
> > > +
> > > +#ifdef CONFIG_RISCV_ISA_V
> > > +/*
> > > + * may_use_simd - whether it is allowable at this time to issue vector
> > > + * instructions or access the vector register file
> > > + *
> > > + * Callers must not assume that the result remains true beyond the next
> > > + * preempt_enable() or return from softirq context.
> > > + */
> > > +static __must_check inline bool may_use_simd(void)
> > > +{
> > > + /*
> > > + * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
> > > + * and is clear whenever preemption is enabled.
> > > + */
> > > + return !in_hardirq() && !in_nmi() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
> > > +}
> > > +
> > > +#else /* ! CONFIG_RISCV_ISA_V */
> > > +
> > > +static __must_check inline bool may_use_simd(void)
> > > +{
> > > + return false;
> > > +}
> > > +
> > > +#endif /* ! CONFIG_RISCV_ISA_V */
> > > +
> > > +#endif
> > > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> > > index 87aaef656257..6254830c0668 100644
> > > --- a/arch/riscv/include/asm/vector.h
> > > +++ b/arch/riscv/include/asm/vector.h
> > > @@ -22,6 +22,27 @@
> > > extern unsigned long riscv_v_vsize;
> > > int riscv_v_setup_vsize(void);
> > > bool riscv_v_first_use_handler(struct pt_regs *regs);
> > > +void kernel_vector_begin(void);
> > > +void kernel_vector_end(void);
> > > +void get_cpu_vector_context(void);
> > > +void put_cpu_vector_context(void);
> > > +
> > > +static inline void riscv_v_ctx_cnt_add(u32 offset)
> > > +{
> > > + current->thread.riscv_v_flags += offset;
> > > + barrier();
> > > +}
> > > +
> > > +static inline void riscv_v_ctx_cnt_sub(u32 offset)
> > > +{
> > > + barrier();
> > > + current->thread.riscv_v_flags -= offset;
> > > +}
> > > +
> > > +static inline u32 riscv_v_ctx_cnt(void)
> > > +{
> > > + return READ_ONCE(current->thread.riscv_v_flags);
> > > +}
> > >
> > > static __always_inline bool has_vector(void)
> > > {
> > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > > index fee22a3d1b53..8c58595696b3 100644
> > > --- a/arch/riscv/kernel/Makefile
> > > +++ b/arch/riscv/kernel/Makefile
> > > @@ -63,6 +63,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
> > > obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
> > > obj-$(CONFIG_FPU) += fpu.o
> > > obj-$(CONFIG_RISCV_ISA_V) += vector.o
> > > +obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
> > > obj-$(CONFIG_SMP) += smpboot.o
> > > obj-$(CONFIG_SMP) += smp.o
> > > obj-$(CONFIG_SMP) += cpu_ops.o
> > > diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
> > > new file mode 100644
> > > index 000000000000..c9ccf21dd16c
> > > --- /dev/null
> > > +++ b/arch/riscv/kernel/kernel_mode_vector.c
> > > @@ -0,0 +1,95 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +/*
> > > + * Copyright (C) 2012 ARM Ltd.
> > > + * Author: Catalin Marinas <catalin.marinas@arm.com>
> > > + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> > > + * Copyright (C) 2021 SiFive
> > > + */
> > > +#include <linux/compiler.h>
> > > +#include <linux/irqflags.h>
> > > +#include <linux/percpu.h>
> > > +#include <linux/preempt.h>
> > > +#include <linux/types.h>
> > > +
> > > +#include <asm/vector.h>
> > > +#include <asm/switch_to.h>
> > > +#include <asm/simd.h>
> > > +
> > > +/*
> > > + * Claim ownership of the CPU vector context for use by the calling context.
> > > + *
> > > + * The caller may freely manipulate the vector context metadata until
> > > + * put_cpu_vector_context() is called.
> > > + */
> > > +void get_cpu_vector_context(void)
> > > +{
> > > + preempt_disable();
> > > +
> > > + WARN_ON(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
> >
> > This is a bigger issue than a warn. Calling riscv_v_ctx_cnt_add with
> > the same flag an even number of times will cause (riscv_v_ctx_cnt() &
> > RISCV_KERNEL_MODE_V_MASK) to return 0, even though vector is being used.
> > This could be solved by using a bitwise or instead of addition when
> > setting the flag.
>
> Yes, we should use bitwise to operate it. At the same time, I am
> thinking if we should allow calling kernel_vector_begin() multiple
> times on a call chain. Or provide a fast check (in_kernel_vector()) in
> a vectorized function to reduce the cost of calling multiple
> kernel_vector_begin(). For example,
>
> kernel_vector_begin()
> memcpy()<- allow calling kernel_vector_begin/end() again
> - or prevent calling it again with in_kernel_vector()
> do_something_with_vector()
> kernel_vector_end()
>
Yes that seems like that could be useful.
> >
> > > + riscv_v_ctx_cnt_add(RISCV_KERNEL_MODE_V);
> > > +}
> > > +
> > > +/*
> > > + * Release the CPU vector context.
> > > + *
> > > + * Must be called from a context in which get_cpu_vector_context() was
> > > + * previously called, with no call to put_cpu_vector_context() in the
> > > + * meantime.
> > > + */
> > > +void put_cpu_vector_context(void)
> > > +{
> > > + WARN_ON(!(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK));
> > > + riscv_v_ctx_cnt_sub(RISCV_KERNEL_MODE_V);
> > > +
> > > + preempt_enable();
> > > +}
> > > +
> > > +/*
> > > + * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
> > > + * context
> > > + *
> > > + * Must not be called unless may_use_simd() returns true.
> > > + * Task context in the vector registers is saved back to memory as necessary.
> > > + *
> > > + * A matching call to kernel_vector_end() must be made before returning from the
> > > + * calling context.
> > > + *
> > > + * The caller may freely use the vector registers until kernel_vector_end() is
> > > + * called.
> > > + */
> > > +void kernel_vector_begin(void)
> > > +{
> > > + if (WARN_ON(!has_vector()))
> >
> > Should this be WARN_ONCE? If somebody runs a kernel compiled with vector
> > on hardware without vector, this warning has the potential to be thrown
> > an excessive amount of times.
>
> Callers of this function should check with may_use_simd() and only
> proceed to call this function if it returns true.
>
Yes it is a bug if they don't call may_use_simd() first, but I was more
concerned about the number of logs that are generated with WARN_ON.
A single log seems like it would be sufficient.
- Charlie
> >
> > > + return;
> > > +
> > > + BUG_ON(!may_use_simd());
> > > +
> > > + get_cpu_vector_context();
> > > +
> > > + riscv_v_vstate_save(current, task_pt_regs(current));
> > > +
> > > + riscv_v_enable();
> > > +}
> > > +EXPORT_SYMBOL_GPL(kernel_vector_begin);
> > > +
> > > +/*
> > > + * kernel_vector_end(): give the CPU vector registers back to the current task
> > > + *
> > > + * Must be called from a context in which kernel_vector_begin() was previously
> > > + * called, with no call to kernel_vector_end() in the meantime.
> > > + *
> > > + * The caller must not use the vector registers after this function is called,
> > > + * unless kernel_vector_begin() is called again in the meantime.
> > > + */
> > > +void kernel_vector_end(void)
> > > +{
> > > + if (WARN_ON(!has_vector()))
> >
> > Same as above.
> >
> > - Charlie
> >
> > >+ return;
> > > +
> > > + riscv_v_vstate_restore(current, task_pt_regs(current));
> > > +
> > > + riscv_v_disable();
> > > +
> > > + put_cpu_vector_context();
> > > +}
> > > +EXPORT_SYMBOL_GPL(kernel_vector_end);
> > > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> > > index 4f21d970a129..5c4dcf518684 100644
> > > --- a/arch/riscv/kernel/process.c
> > > +++ b/arch/riscv/kernel/process.c
> > > @@ -187,7 +187,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> > > *dst = *src;
> > > /* clear entire V context, including datap for a new task */
> > > memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
> > > -
> > > return 0;
> > > }
> > >
> > > @@ -221,6 +220,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> > > childregs->a0 = 0; /* Return value of fork() */
> > > p->thread.s[0] = 0;
> > > }
> > > + p->thread.riscv_v_flags = 0;
> > > p->thread.ra = (unsigned long)ret_from_fork;
> > > p->thread.sp = (unsigned long)childregs; /* kernel sp */
> > > return 0;
> > > --
> > > 2.17.1
> > >
>
> Regards,
> Andy
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [v5, 1/6] riscv: Add support for kernel mode vector
2023-12-15 18:41 ` Charlie Jenkins
@ 2023-12-19 6:04 ` Andy Chiu
0 siblings, 0 replies; 16+ messages in thread
From: Andy Chiu @ 2023-12-19 6:04 UTC (permalink / raw)
To: Charlie Jenkins
Cc: linux-riscv, palmer, greentime.hu, guoren, bjorn, ardb, arnd,
Vincent Chen, Paul Walmsley, Albert Ou, Heiko Stuebner,
Conor Dooley, Clément Léger, Guo Ren, Xiao Wang,
Björn Töpel, Alexandre Ghiti, Sami Tolvanen,
Sia Jee Heng, Jisheng Zhang, Peter Zijlstra
On Sat, Dec 16, 2023 at 2:41 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> On Sat, Dec 16, 2023 at 12:01:53AM +0800, Andy Chiu wrote:
> > On Fri, Dec 15, 2023 at 2:24 PM Charlie Jenkins <charlie@rivosinc.com> wrote:
> > >
> > > On Thu, Dec 14, 2023 at 03:57:16PM +0000, Andy Chiu wrote:
> > > > From: Greentime Hu <greentime.hu@sifive.com>
> > > >
> > > > Add kernel_vector_begin() and kernel_vector_end() function declarations
> > > > and corresponding definitions in kernel_mode_vector.c
> > > >
> > > > These are needed to wrap uses of vector in kernel mode.
> > > >
> > > > Co-developed-by: Vincent Chen <vincent.chen@sifive.com>
> > > > Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
> > > > Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> > > > Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> > > > ---
> > > > Changelog v4:
> > > > - Use kernel_v_flags and helpers to track vector context.
> > > > Changelog v3:
> > > > - Reorder patch 1 to patch 3 to make use of
> > > > {get,put}_cpu_vector_context later.
> > > > - Export {get,put}_cpu_vector_context.
> > > > - Save V context after disabling preemption. (Guo)
> > > > - Fix a build fail. (Conor)
> > > > - Remove irqs_disabled() check as it is not needed, fix styling. (Björn)
> > > > Changelog v2:
> > > > - 's/kernel_rvv/kernel_vector' and return void in kernel_vector_begin
> > > > (Conor)
> > > > - export may_use_simd to include/asm/simd.h
> > > > ---
> > > > arch/riscv/include/asm/processor.h | 15 +++-
> > > > arch/riscv/include/asm/simd.h | 42 ++++++++++++
> > > > arch/riscv/include/asm/vector.h | 21 ++++++
> > > > arch/riscv/kernel/Makefile | 1 +
> > > > arch/riscv/kernel/kernel_mode_vector.c | 95 ++++++++++++++++++++++++++
> > > > arch/riscv/kernel/process.c | 2 +-
> > > > 6 files changed, 174 insertions(+), 2 deletions(-)
> > > > create mode 100644 arch/riscv/include/asm/simd.h
> > > > create mode 100644 arch/riscv/kernel/kernel_mode_vector.c
> > > >
> > > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> > > > index f19f861cda54..a47763c262e1 100644
> > > > --- a/arch/riscv/include/asm/processor.h
> > > > +++ b/arch/riscv/include/asm/processor.h
> > > > @@ -73,6 +73,18 @@
> > > > struct task_struct;
> > > > struct pt_regs;
> > > >
> > > > +/*
> > > > + * We use a flag to track in-kernel Vector context. Currently the flag has the
> > > > + * following meaning:
> > > > + *
> > > > + * - bit 0 indicates whether the in-kernel Vector context is active. The
> > > > + * activation of this state disables the preemption.
> > > > + */
> > > > +
> > > > +#define RISCV_KERNEL_MODE_V_MASK 0x1
> > > > +
> > > > +#define RISCV_KERNEL_MODE_V 0x1
> > > > +
> > > > /* CPU-specific state of a task */
> > > > struct thread_struct {
> > > > /* Callee-saved registers */
> > > > @@ -81,7 +93,8 @@ struct thread_struct {
> > > > unsigned long s[12]; /* s[0]: frame pointer */
> > > > struct __riscv_d_ext_state fstate;
> > > > unsigned long bad_cause;
> > > > - unsigned long vstate_ctrl;
> > > > + u32 riscv_v_flags;
> > > > + u32 vstate_ctrl;
> > > > struct __riscv_v_ext_state vstate;
> > > > unsigned long align_ctl;
> > > > };
> > > > diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
> > > > new file mode 100644
> > > > index 000000000000..269752bfa2cc
> > > > --- /dev/null
> > > > +++ b/arch/riscv/include/asm/simd.h
> > > > @@ -0,0 +1,42 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > +/*
> > > > + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> > > > + * Copyright (C) 2023 SiFive
> > > > + */
> > > > +
> > > > +#ifndef __ASM_SIMD_H
> > > > +#define __ASM_SIMD_H
> > > > +
> > > > +#include <linux/compiler.h>
> > > > +#include <linux/irqflags.h>
> > > > +#include <linux/percpu.h>
> > > > +#include <linux/preempt.h>
> > > > +#include <linux/types.h>
> > > > +
> > > > +#ifdef CONFIG_RISCV_ISA_V
> > > > +/*
> > > > + * may_use_simd - whether it is allowable at this time to issue vector
> > > > + * instructions or access the vector register file
> > > > + *
> > > > + * Callers must not assume that the result remains true beyond the next
> > > > + * preempt_enable() or return from softirq context.
> > > > + */
> > > > +static __must_check inline bool may_use_simd(void)
> > > > +{
> > > > + /*
> > > > + * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
> > > > + * and is clear whenever preemption is enabled.
> > > > + */
> > > > + return !in_hardirq() && !in_nmi() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
> > > > +}
> > > > +
> > > > +#else /* ! CONFIG_RISCV_ISA_V */
> > > > +
> > > > +static __must_check inline bool may_use_simd(void)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +#endif /* ! CONFIG_RISCV_ISA_V */
> > > > +
> > > > +#endif
> > > > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> > > > index 87aaef656257..6254830c0668 100644
> > > > --- a/arch/riscv/include/asm/vector.h
> > > > +++ b/arch/riscv/include/asm/vector.h
> > > > @@ -22,6 +22,27 @@
> > > > extern unsigned long riscv_v_vsize;
> > > > int riscv_v_setup_vsize(void);
> > > > bool riscv_v_first_use_handler(struct pt_regs *regs);
> > > > +void kernel_vector_begin(void);
> > > > +void kernel_vector_end(void);
> > > > +void get_cpu_vector_context(void);
> > > > +void put_cpu_vector_context(void);
> > > > +
> > > > +static inline void riscv_v_ctx_cnt_add(u32 offset)
> > > > +{
> > > > + current->thread.riscv_v_flags += offset;
> > > > + barrier();
> > > > +}
> > > > +
> > > > +static inline void riscv_v_ctx_cnt_sub(u32 offset)
> > > > +{
> > > > + barrier();
> > > > + current->thread.riscv_v_flags -= offset;
> > > > +}
> > > > +
> > > > +static inline u32 riscv_v_ctx_cnt(void)
> > > > +{
> > > > + return READ_ONCE(current->thread.riscv_v_flags);
> > > > +}
> > > >
> > > > static __always_inline bool has_vector(void)
> > > > {
> > > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > > > index fee22a3d1b53..8c58595696b3 100644
> > > > --- a/arch/riscv/kernel/Makefile
> > > > +++ b/arch/riscv/kernel/Makefile
> > > > @@ -63,6 +63,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
> > > > obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
> > > > obj-$(CONFIG_FPU) += fpu.o
> > > > obj-$(CONFIG_RISCV_ISA_V) += vector.o
> > > > +obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
> > > > obj-$(CONFIG_SMP) += smpboot.o
> > > > obj-$(CONFIG_SMP) += smp.o
> > > > obj-$(CONFIG_SMP) += cpu_ops.o
> > > > diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
> > > > new file mode 100644
> > > > index 000000000000..c9ccf21dd16c
> > > > --- /dev/null
> > > > +++ b/arch/riscv/kernel/kernel_mode_vector.c
> > > > @@ -0,0 +1,95 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > +/*
> > > > + * Copyright (C) 2012 ARM Ltd.
> > > > + * Author: Catalin Marinas <catalin.marinas@arm.com>
> > > > + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> > > > + * Copyright (C) 2021 SiFive
> > > > + */
> > > > +#include <linux/compiler.h>
> > > > +#include <linux/irqflags.h>
> > > > +#include <linux/percpu.h>
> > > > +#include <linux/preempt.h>
> > > > +#include <linux/types.h>
> > > > +
> > > > +#include <asm/vector.h>
> > > > +#include <asm/switch_to.h>
> > > > +#include <asm/simd.h>
> > > > +
> > > > +/*
> > > > + * Claim ownership of the CPU vector context for use by the calling context.
> > > > + *
> > > > + * The caller may freely manipulate the vector context metadata until
> > > > + * put_cpu_vector_context() is called.
> > > > + */
> > > > +void get_cpu_vector_context(void)
> > > > +{
> > > > + preempt_disable();
> > > > +
> > > > + WARN_ON(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
> > >
> > > This is a bigger issue than a warn. Calling riscv_v_ctx_cnt_add with
> > > the same flag an even number of times will cause (riscv_v_ctx_cnt() &
> > > RISCV_KERNEL_MODE_V_MASK) to return 0, even though vector is being used.
> > > This could be solved by using a bitwise or instead of addition when
> > > setting the flag.
> >
> > Yes, we should use bitwise to operate it. At the same time, I am
> > thinking if we should allow calling kernel_vector_begin() multiple
> > times on a call chain. Or provide a fast check (in_kernel_vector()) in
> > a vectorized function to reduce the cost of calling multiple
> > kernel_vector_begin(). For example,
> >
> > kernel_vector_begin()
> > memcpy()<- allow calling kernel_vector_begin/end() again
> > - or prevent calling it again with in_kernel_vector()
> > do_something_with_vector()
> > kernel_vector_end()
> >
>
> Yes that seems like that could be useful.
>
> > >
> > > > + riscv_v_ctx_cnt_add(RISCV_KERNEL_MODE_V);
> > > > +}
> > > > +
> > > > +/*
> > > > + * Release the CPU vector context.
> > > > + *
> > > > + * Must be called from a context in which get_cpu_vector_context() was
> > > > + * previously called, with no call to put_cpu_vector_context() in the
> > > > + * meantime.
> > > > + */
> > > > +void put_cpu_vector_context(void)
> > > > +{
> > > > + WARN_ON(!(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK));
> > > > + riscv_v_ctx_cnt_sub(RISCV_KERNEL_MODE_V);
> > > > +
> > > > + preempt_enable();
> > > > +}
> > > > +
> > > > +/*
> > > > + * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
> > > > + * context
> > > > + *
> > > > + * Must not be called unless may_use_simd() returns true.
> > > > + * Task context in the vector registers is saved back to memory as necessary.
> > > > + *
> > > > + * A matching call to kernel_vector_end() must be made before returning from the
> > > > + * calling context.
> > > > + *
> > > > + * The caller may freely use the vector registers until kernel_vector_end() is
> > > > + * called.
> > > > + */
> > > > +void kernel_vector_begin(void)
> > > > +{
> > > > + if (WARN_ON(!has_vector()))
> > >
> > > Should this be WARN_ONCE? If somebody runs a kernel compiled with vector
> > > on hardware without vector, this warning has the potential to be thrown
> > > an excessive amount of times.
> >
> > Callers of this function should check with may_use_simd() and only
> > proceed to call this function if it returns true.
> >
>
> Yes it is a bug if they don't call may_use_simd() first, but I was more
> concerned about the number of logs that are generated with WARN_ON.
> A single log seems like it would be sufficient.
In fact this will just panic the kernel if a user proceeds
kernel_vector_begin() regardless of may_use_simd(). To demonstrate,
has_vector() returns false under these conditions:
1. The kernel is compiled without CONFIG_RISCV_ISA_V.
2. The kernel has CONFIG_RISCV_ISA_V but the hardware doesn't support V
3. The kernel has CONFIG_RISCV_ISA_V. The hardware supports V, but in
a way where it is not recognized by the kernel, e.g uneven VLEN.
For case 1, kernel_vector_begin() won't even exist so the user will
experience a compile failure.
For case 2 and 3, the kernel will panic, hitting an illegal
instruction error when it starts executing V because VS will be off by
then. Unless the user goes very off and manually enables V by calling
riscv_v_enable() on case 3.
Please tell me if you still have any concerns about this.
>
> - Charlie
>
> > >
> > > > + return;
> > > > +
> > > > + BUG_ON(!may_use_simd());
> > > > +
> > > > + get_cpu_vector_context();
> > > > +
> > > > + riscv_v_vstate_save(current, task_pt_regs(current));
> > > > +
> > > > + riscv_v_enable();
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(kernel_vector_begin);
> > > > +
> > > > +/*
> > > > + * kernel_vector_end(): give the CPU vector registers back to the current task
> > > > + *
> > > > + * Must be called from a context in which kernel_vector_begin() was previously
> > > > + * called, with no call to kernel_vector_end() in the meantime.
> > > > + *
> > > > + * The caller must not use the vector registers after this function is called,
> > > > + * unless kernel_vector_begin() is called again in the meantime.
> > > > + */
> > > > +void kernel_vector_end(void)
> > > > +{
> > > > + if (WARN_ON(!has_vector()))
> > >
> > > Same as above.
> > >
> > > - Charlie
> > >
> > > >+ return;
> > > > +
> > > > + riscv_v_vstate_restore(current, task_pt_regs(current));
> > > > +
> > > > + riscv_v_disable();
> > > > +
> > > > + put_cpu_vector_context();
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(kernel_vector_end);
> > > > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> > > > index 4f21d970a129..5c4dcf518684 100644
> > > > --- a/arch/riscv/kernel/process.c
> > > > +++ b/arch/riscv/kernel/process.c
> > > > @@ -187,7 +187,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> > > > *dst = *src;
> > > > /* clear entire V context, including datap for a new task */
> > > > memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
> > > > -
> > > > return 0;
> > > > }
> > > >
> > > > @@ -221,6 +220,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> > > > childregs->a0 = 0; /* Return value of fork() */
> > > > p->thread.s[0] = 0;
> > > > }
> > > > + p->thread.riscv_v_flags = 0;
> > > > p->thread.ra = (unsigned long)ret_from_fork;
> > > > p->thread.sp = (unsigned long)childregs; /* kernel sp */
> > > > return 0;
> > > > --
> > > > 2.17.1
> > > >
> >
> > Regards,
> > Andy
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread
* [v5, 2/6] riscv: vector: make Vector always available for softirq context
2023-12-14 15:57 [v5, 0/6] riscv: support kernel-mode Vector Andy Chiu
2023-12-14 15:57 ` [v5, 1/6] riscv: Add support for kernel mode vector Andy Chiu
@ 2023-12-14 15:57 ` Andy Chiu
2023-12-14 15:57 ` [v5, 3/6] riscv: Add vector extension XOR implementation Andy Chiu
` (3 subsequent siblings)
5 siblings, 0 replies; 16+ messages in thread
From: Andy Chiu @ 2023-12-14 15:57 UTC (permalink / raw)
To: linux-riscv, palmer
Cc: greentime.hu, guoren, bjorn, charlie, ardb, arnd, Andy Chiu,
Paul Walmsley, Albert Ou, Vincent Chen, Conor Dooley
By disabling bottom halves in active kerne-mode Vector, softirq will not
be able to nest on top of any kernel-mode Vector.
After this patch, Vector context cannot start with irqs disabled.
Otherwise local_bh_enable() may run in a wrong context.
Disabling bh is not enough for RT-kernel to prevent preeemption. So
we must disable preemption, which also implies disabling bh on RT.
Related-to: commit 696207d4258b ("arm64/sve: Make kernel FPU protection RT friendly")
Related-to: commit 66c3ec5a7120 ("arm64: neon: Forbid when irqs are disabled")
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
---
Changelog v4:
- new patch since v4
---
arch/riscv/include/asm/simd.h | 6 +++++-
arch/riscv/kernel/kernel_mode_vector.c | 10 ++++++++--
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
index 269752bfa2cc..cd6180fe37c0 100644
--- a/arch/riscv/include/asm/simd.h
+++ b/arch/riscv/include/asm/simd.h
@@ -26,8 +26,12 @@ static __must_check inline bool may_use_simd(void)
/*
* RISCV_KERNEL_MODE_V is only set while preemption is disabled,
* and is clear whenever preemption is enabled.
+ *
+ * Kernel-mode Vector temperarily disables bh. So we must not return
+ * true on irq_disabled(). Otherwise we would fail the lockdep check
+ * calling local_bh_enable()
*/
- return !in_hardirq() && !in_nmi() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
+ return !in_hardirq() && !in_nmi() && !irqs_disabled() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
}
#else /* ! CONFIG_RISCV_ISA_V */
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
index c9ccf21dd16c..52e42f74ec9a 100644
--- a/arch/riscv/kernel/kernel_mode_vector.c
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -23,7 +23,10 @@
*/
void get_cpu_vector_context(void)
{
- preempt_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
WARN_ON(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
riscv_v_ctx_cnt_add(RISCV_KERNEL_MODE_V);
@@ -41,7 +44,10 @@ void put_cpu_vector_context(void)
WARN_ON(!(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK));
riscv_v_ctx_cnt_sub(RISCV_KERNEL_MODE_V);
- preempt_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
}
/*
--
2.17.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 16+ messages in thread* [v5, 3/6] riscv: Add vector extension XOR implementation
2023-12-14 15:57 [v5, 0/6] riscv: support kernel-mode Vector Andy Chiu
2023-12-14 15:57 ` [v5, 1/6] riscv: Add support for kernel mode vector Andy Chiu
2023-12-14 15:57 ` [v5, 2/6] riscv: vector: make Vector always available for softirq context Andy Chiu
@ 2023-12-14 15:57 ` Andy Chiu
2023-12-14 15:57 ` [v5, 4/6] riscv: sched: defer restoring Vector context for user Andy Chiu
` (2 subsequent siblings)
5 siblings, 0 replies; 16+ messages in thread
From: Andy Chiu @ 2023-12-14 15:57 UTC (permalink / raw)
To: linux-riscv, palmer
Cc: greentime.hu, guoren, bjorn, charlie, ardb, arnd, Han-Kuan Chen,
Andy Chiu, Paul Walmsley, Albert Ou, Conor Dooley, Andrew Jones,
Heiko Stuebner
From: Greentime Hu <greentime.hu@sifive.com>
This patch adds support for vector optimized XOR and it is tested in
qemu.
Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
---
Changelog v2:
- 's/rvv/vector/' (Conor)
---
arch/riscv/include/asm/xor.h | 82 ++++++++++++++++++++++++++++++++++++
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/xor.S | 81 +++++++++++++++++++++++++++++++++++
3 files changed, 164 insertions(+)
create mode 100644 arch/riscv/include/asm/xor.h
create mode 100644 arch/riscv/lib/xor.S
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..903c3275f8d0
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_RISCV_ISA_V
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+
+void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2);
+void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3);
+void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4);
+void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4,
+ const unsigned long *__restrict p5);
+
+static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2)
+{
+ kernel_vector_begin();
+ xor_regs_2_(bytes, p1, p2);
+ kernel_vector_end();
+}
+
+static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3)
+{
+ kernel_vector_begin();
+ xor_regs_3_(bytes, p1, p2, p3);
+ kernel_vector_end();
+}
+
+static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4)
+{
+ kernel_vector_begin();
+ xor_regs_4_(bytes, p1, p2, p3, p4);
+ kernel_vector_end();
+}
+
+static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4,
+ const unsigned long *__restrict p5)
+{
+ kernel_vector_begin();
+ xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+ kernel_vector_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+ .name = "rvv",
+ .do_2 = xor_vector_2,
+ .do_3 = xor_vector_3,
+ .do_4 = xor_vector_4,
+ .do_5 = xor_vector_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (has_vector()) { \
+ xor_speed(&xor_block_rvv);\
+ } \
+ } while (0)
+#endif
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 26cb2502ecf8..494f9cd1a00c 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -11,3 +11,4 @@ lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_RISCV_ISA_V) += xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..3bc059e18171
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+ENTRY(xor_regs_2_)
+ vsetvli a3, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a3
+ vxor.vv v16, v0, v8
+ add a2, a2, a3
+ vse8.v v16, (a1)
+ add a1, a1, a3
+ bnez a0, xor_regs_2_
+ ret
+END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+ENTRY(xor_regs_3_)
+ vsetvli a4, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a4
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a4
+ vxor.vv v16, v0, v16
+ add a3, a3, a4
+ vse8.v v16, (a1)
+ add a1, a1, a4
+ bnez a0, xor_regs_3_
+ ret
+END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+ENTRY(xor_regs_4_)
+ vsetvli a5, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a5
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a5
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a5
+ vxor.vv v16, v0, v24
+ add a4, a4, a5
+ vse8.v v16, (a1)
+ add a1, a1, a5
+ bnez a0, xor_regs_4_
+ ret
+END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+ENTRY(xor_regs_5_)
+ vsetvli a6, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a6
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a6
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a6
+ vxor.vv v0, v0, v24
+ vle8.v v8, (a5)
+ add a4, a4, a6
+ vxor.vv v16, v0, v8
+ add a5, a5, a6
+ vse8.v v16, (a1)
+ add a1, a1, a6
+ bnez a0, xor_regs_5_
+ ret
+END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
--
2.17.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 16+ messages in thread* [v5, 4/6] riscv: sched: defer restoring Vector context for user
2023-12-14 15:57 [v5, 0/6] riscv: support kernel-mode Vector Andy Chiu
` (2 preceding siblings ...)
2023-12-14 15:57 ` [v5, 3/6] riscv: Add vector extension XOR implementation Andy Chiu
@ 2023-12-14 15:57 ` Andy Chiu
2023-12-14 15:57 ` [v5, 5/6] riscv: lib: vectorize copy_to_user/copy_from_user Andy Chiu
2023-12-14 15:57 ` [v5, 6/6] riscv: lib: add vectorized mem* routines Andy Chiu
5 siblings, 0 replies; 16+ messages in thread
From: Andy Chiu @ 2023-12-14 15:57 UTC (permalink / raw)
To: linux-riscv, palmer
Cc: greentime.hu, guoren, bjorn, charlie, ardb, arnd, Andy Chiu,
Paul Walmsley, Albert Ou, Oleg Nesterov, Björn Töpel,
Guo Ren, Clément Léger, Conor Dooley, Sami Tolvanen,
Jisheng Zhang, Deepak Gupta, Vincent Chen, Heiko Stuebner,
Xiao Wang, Peter Zijlstra, Mathis Salmen, Haorong Lu,
Joel Granados
User will use its Vector registers only after the kernel really returns
to the userspace. So we can delay restoring Vector registers as long as
we are still running in kernel mode. So, add a thread flag to indicates
the need of restoring Vector and do the restore at the last
arch-specific exit-to-user hook. This save the context restoring cost
when we switch over multiple processes that run V in kernel mode. For
example, if the kernel performs a context swicth from A->B->C, and
returns to C's userspace, then there is no need to restore B's
V-register.
Besides, this also prevents us from repeatedly restoring V context when
executing kernel-mode Vector multiple times.
The cost of this is that we must disable preemption and mark vector as
busy during vstate_{save,restore}. Because then the V context will not
get restored back immediately when a trap-causing context switch happens
in the middle of vstate_{save,restore}.
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
---
Changelog v4:
- fix typos and re-add Conor's A-b.
Changelog v3:
- Guard {get,put}_cpu_vector_context between vstate_* operation and
explain it in the commit msg.
- Drop R-b from Björn and A-b from Conor.
Changelog v2:
- rename and add comment for the new thread flag (Conor)
---
arch/riscv/include/asm/entry-common.h | 17 +++++++++++++++++
arch/riscv/include/asm/thread_info.h | 2 ++
arch/riscv/include/asm/vector.h | 11 ++++++++++-
arch/riscv/kernel/kernel_mode_vector.c | 2 +-
arch/riscv/kernel/process.c | 2 ++
arch/riscv/kernel/ptrace.c | 5 ++++-
arch/riscv/kernel/signal.c | 5 ++++-
arch/riscv/kernel/vector.c | 2 +-
8 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h
index 7ab5e34318c8..6361a8488642 100644
--- a/arch/riscv/include/asm/entry-common.h
+++ b/arch/riscv/include/asm/entry-common.h
@@ -4,6 +4,23 @@
#define _ASM_RISCV_ENTRY_COMMON_H
#include <asm/stacktrace.h>
+#include <asm/thread_info.h>
+#include <asm/vector.h>
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) {
+ clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE);
+ /*
+ * We are already called with irq disabled, so go without
+ * keeping track of vector_context_busy.
+ */
+ riscv_v_vstate_restore(current, regs);
+ }
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
void handle_page_fault(struct pt_regs *regs);
void handle_break(struct pt_regs *regs);
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 574779900bfb..1047a97ddbc8 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -103,12 +103,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
#define TIF_32BIT 11 /* compat-mode 32bit process */
+#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
#define _TIF_WORK_MASK \
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 6254830c0668..e706613aae2c 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -205,6 +205,15 @@ static inline void riscv_v_vstate_restore(struct task_struct *task,
}
}
+static inline void riscv_v_vstate_set_restore(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ if ((regs->status & SR_VS) != SR_VS_OFF) {
+ set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
+ riscv_v_vstate_on(regs);
+ }
+}
+
static inline void __switch_to_vector(struct task_struct *prev,
struct task_struct *next)
{
@@ -212,7 +221,7 @@ static inline void __switch_to_vector(struct task_struct *prev,
regs = task_pt_regs(prev);
riscv_v_vstate_save(prev, regs);
- riscv_v_vstate_restore(next, task_pt_regs(next));
+ riscv_v_vstate_set_restore(next, task_pt_regs(next));
}
void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
index 52e42f74ec9a..c5b86b554d1a 100644
--- a/arch/riscv/kernel/kernel_mode_vector.c
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -92,7 +92,7 @@ void kernel_vector_end(void)
if (WARN_ON(!has_vector()))
return;
- riscv_v_vstate_restore(current, task_pt_regs(current));
+ riscv_v_vstate_set_restore(current, task_pt_regs(current));
riscv_v_disable();
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 5c4dcf518684..58127b1c6c71 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -171,6 +171,7 @@ void flush_thread(void)
riscv_v_vstate_off(task_pt_regs(current));
kfree(current->thread.vstate.datap);
memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+ clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
#endif
}
@@ -187,6 +188,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*dst = *src;
/* clear entire V context, including datap for a new task */
memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+ clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
return 0;
}
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 2afe460de16a..7b93bcbdf9fa 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target,
* Ensure the vector registers have been saved to the memory before
* copying them to membuf.
*/
- if (target == current)
+ if (target == current) {
+ get_cpu_vector_context();
riscv_v_vstate_save(current, task_pt_regs(current));
+ put_cpu_vector_context();
+ }
ptrace_vstate.vstart = vstate->vstart;
ptrace_vstate.vl = vstate->vl;
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 88b6220b2608..aca4a12c8416 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
/* datap is designed to be 16 byte aligned for better performance */
WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
+ get_cpu_vector_context();
riscv_v_vstate_save(current, regs);
+ put_cpu_vector_context();
+
/* Copy everything of vstate but datap. */
err = __copy_to_user(&state->v_state, ¤t->thread.vstate,
offsetof(struct __riscv_v_ext_state, datap));
@@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
if (unlikely(err))
return err;
- riscv_v_vstate_restore(current, regs);
+ riscv_v_vstate_set_restore(current, regs);
return err;
}
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 578b6292487e..66e8c6ab09d2 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -167,7 +167,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
return true;
}
riscv_v_vstate_on(regs);
- riscv_v_vstate_restore(current, regs);
+ riscv_v_vstate_set_restore(current, regs);
return true;
}
--
2.17.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 16+ messages in thread* [v5, 5/6] riscv: lib: vectorize copy_to_user/copy_from_user
2023-12-14 15:57 [v5, 0/6] riscv: support kernel-mode Vector Andy Chiu
` (3 preceding siblings ...)
2023-12-14 15:57 ` [v5, 4/6] riscv: sched: defer restoring Vector context for user Andy Chiu
@ 2023-12-14 15:57 ` Andy Chiu
2023-12-15 6:25 ` Charlie Jenkins
2023-12-14 15:57 ` [v5, 6/6] riscv: lib: add vectorized mem* routines Andy Chiu
5 siblings, 1 reply; 16+ messages in thread
From: Andy Chiu @ 2023-12-14 15:57 UTC (permalink / raw)
To: linux-riscv, palmer
Cc: greentime.hu, guoren, bjorn, charlie, ardb, arnd, Andy Chiu,
Paul Walmsley, Albert Ou, Conor Dooley, Andrew Jones,
Han-Kuan Chen, Heiko Stuebner, Aurelien Jarno, Bo YU,
Alexandre Ghiti, Clément Léger
This patch utilizes Vector to perform copy_to_user/copy_from_user. If
Vector is available and the size of copy is large enough for Vector to
perform better than scalar, then direct the kernel to do Vector copies
for userspace. Though the best programming practice for users is to
reduce the copy, this provides a faster variant when copies are
inevitable.
The optimal size for using Vector, copy_to_user_thres, is only a
heuristic for now. We can add DT parsing if people feel the need of
customizing it.
The exception fixup code of the __asm_vector_usercopy must fallback to
the scalar one because accessing user pages might fault, and must be
sleepable. Current kernel-mode Vector does not allow tasks to be
preemptible, so we must disactivate Vector and perform a scalar fallback
in such case.
The original implementation of Vector operations comes from
https://github.com/sifive/sifive-libc, which we agree to contribute to
Linux kernel.
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
---
Changelog v4:
- new patch since v4
---
arch/riscv/lib/Makefile | 2 ++
arch/riscv/lib/riscv_v_helpers.c | 38 ++++++++++++++++++++++
arch/riscv/lib/uaccess.S | 11 +++++++
arch/riscv/lib/uaccess_vector.S | 55 ++++++++++++++++++++++++++++++++
4 files changed, 106 insertions(+)
create mode 100644 arch/riscv/lib/riscv_v_helpers.c
create mode 100644 arch/riscv/lib/uaccess_vector.S
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 494f9cd1a00c..1fe8d797e0f2 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -12,3 +12,5 @@ lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
+lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
+lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
new file mode 100644
index 000000000000..d763b9c69fb7
--- /dev/null
+++ b/arch/riscv/lib/riscv_v_helpers.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ * Author: Andy Chiu <andy.chiu@sifive.com>
+ */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#include <asm/vector.h>
+#include <asm/simd.h>
+
+size_t riscv_v_usercopy_thres = 768;
+int __asm_vector_usercopy(void *dst, void *src, size_t n);
+int fallback_scalar_usercopy(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
+{
+ size_t remain, copied;
+
+ /* skip has_vector() check because it has been done by the asm */
+ if (!may_use_simd())
+ goto fallback;
+
+ kernel_vector_begin();
+ remain = __asm_vector_usercopy(dst, src, n);
+ kernel_vector_end();
+
+ if (remain) {
+ copied = n - remain;
+ dst += copied;
+ src += copied;
+ goto fallback;
+ }
+
+ return remain;
+
+fallback:
+ return fallback_scalar_usercopy(dst, src, n);
+}
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 3ab438f30d13..ae8c1453cfcf 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -3,6 +3,8 @@
#include <asm/asm.h>
#include <asm/asm-extable.h>
#include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
.macro fixup op reg addr lbl
100:
@@ -11,6 +13,14 @@
.endm
SYM_FUNC_START(__asm_copy_to_user)
+#ifdef CONFIG_RISCV_ISA_V
+ ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
+ la t0, riscv_v_usercopy_thres
+ REG_L t0, (t0)
+ bltu a2, t0, fallback_scalar_usercopy
+ tail enter_vector_usercopy
+#endif
+SYM_FUNC_START(fallback_scalar_usercopy)
/* Enable access to user memory */
li t6, SR_SUM
@@ -181,6 +191,7 @@ SYM_FUNC_START(__asm_copy_to_user)
sub a0, t5, a0
ret
SYM_FUNC_END(__asm_copy_to_user)
+SYM_FUNC_END(fallback_scalar_usercopy)
EXPORT_SYMBOL(__asm_copy_to_user)
SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
EXPORT_SYMBOL(__asm_copy_from_user)
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
new file mode 100644
index 000000000000..5bebcb1276a2
--- /dev/null
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+#include <asm/asm-extable.h>
+#include <asm/csr.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+ .macro fixup op reg addr lbl
+100:
+ \op \reg, \addr
+ _asm_extable 100b, \lbl
+ .endm
+
+SYM_FUNC_START(__asm_vector_usercopy)
+ /* Enable access to user memory */
+ li t6, SR_SUM
+ csrs CSR_STATUS, t6
+
+ /* Save for return value */
+ mv t5, a2
+
+ mv pDstPtr, pDst
+loop:
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+ fixup vle8.v vData, (pSrc), 10f
+ fixup vse8.v vData, (pDstPtr), 10f
+ sub iNum, iNum, iVL
+ add pSrc, pSrc, iVL
+ add pDstPtr, pDstPtr, iVL
+ bnez iNum, loop
+
+.Lout_copy_user:
+ /* Disable access to user memory */
+ csrc CSR_STATUS, t6
+ li a0, 0
+ ret
+
+ /* Exception fixup code */
+10:
+ /* Disable access to user memory */
+ csrc CSR_STATUS, t6
+ mv a0, iNum
+ ret
+SYM_FUNC_END(__asm_vector_usercopy)
--
2.17.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [v5, 5/6] riscv: lib: vectorize copy_to_user/copy_from_user
2023-12-14 15:57 ` [v5, 5/6] riscv: lib: vectorize copy_to_user/copy_from_user Andy Chiu
@ 2023-12-15 6:25 ` Charlie Jenkins
2023-12-15 13:52 ` Andrew Jones
2023-12-19 9:58 ` Andy Chiu
0 siblings, 2 replies; 16+ messages in thread
From: Charlie Jenkins @ 2023-12-15 6:25 UTC (permalink / raw)
To: Andy Chiu
Cc: linux-riscv, palmer, greentime.hu, guoren, bjorn, ardb, arnd,
Paul Walmsley, Albert Ou, Conor Dooley, Andrew Jones,
Han-Kuan Chen, Heiko Stuebner, Aurelien Jarno, Bo YU,
Alexandre Ghiti, Clément Léger
On Thu, Dec 14, 2023 at 03:57:20PM +0000, Andy Chiu wrote:
> This patch utilizes Vector to perform copy_to_user/copy_from_user. If
> Vector is available and the size of copy is large enough for Vector to
> perform better than scalar, then direct the kernel to do Vector copies
> for userspace. Though the best programming practice for users is to
> reduce the copy, this provides a faster variant when copies are
> inevitable.
>
> The optimal size for using Vector, copy_to_user_thres, is only a
> heuristic for now. We can add DT parsing if people feel the need of
> customizing it.
>
> The exception fixup code of the __asm_vector_usercopy must fallback to
> the scalar one because accessing user pages might fault, and must be
> sleepable. Current kernel-mode Vector does not allow tasks to be
> preemptible, so we must disactivate Vector and perform a scalar fallback
> in such case.
>
> The original implementation of Vector operations comes from
> https://github.com/sifive/sifive-libc, which we agree to contribute to
> Linux kernel.
>
> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> ---
> Changelog v4:
> - new patch since v4
> ---
> arch/riscv/lib/Makefile | 2 ++
> arch/riscv/lib/riscv_v_helpers.c | 38 ++++++++++++++++++++++
> arch/riscv/lib/uaccess.S | 11 +++++++
> arch/riscv/lib/uaccess_vector.S | 55 ++++++++++++++++++++++++++++++++
> 4 files changed, 106 insertions(+)
> create mode 100644 arch/riscv/lib/riscv_v_helpers.c
> create mode 100644 arch/riscv/lib/uaccess_vector.S
>
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index 494f9cd1a00c..1fe8d797e0f2 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -12,3 +12,5 @@ lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
>
> obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
> lib-$(CONFIG_RISCV_ISA_V) += xor.o
> +lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
> +lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
> diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
> new file mode 100644
> index 000000000000..d763b9c69fb7
> --- /dev/null
> +++ b/arch/riscv/lib/riscv_v_helpers.c
> @@ -0,0 +1,38 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2023 SiFive
> + * Author: Andy Chiu <andy.chiu@sifive.com>
> + */
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#include <asm/vector.h>
> +#include <asm/simd.h>
> +
> +size_t riscv_v_usercopy_thres = 768;
> +int __asm_vector_usercopy(void *dst, void *src, size_t n);
> +int fallback_scalar_usercopy(void *dst, void *src, size_t n);
> +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
> +{
> + size_t remain, copied;
> +
> + /* skip has_vector() check because it has been done by the asm */
> + if (!may_use_simd())
> + goto fallback;
> +
> + kernel_vector_begin();
> + remain = __asm_vector_usercopy(dst, src, n);
> + kernel_vector_end();
> +
> + if (remain) {
> + copied = n - remain;
> + dst += copied;
> + src += copied;
> + goto fallback;
> + }
> +
> + return remain;
> +
> +fallback:
> + return fallback_scalar_usercopy(dst, src, n);
> +}
> diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
> index 3ab438f30d13..ae8c1453cfcf 100644
> --- a/arch/riscv/lib/uaccess.S
> +++ b/arch/riscv/lib/uaccess.S
> @@ -3,6 +3,8 @@
> #include <asm/asm.h>
> #include <asm/asm-extable.h>
> #include <asm/csr.h>
> +#include <asm/hwcap.h>
> +#include <asm/alternative-macros.h>
>
> .macro fixup op reg addr lbl
> 100:
> @@ -11,6 +13,14 @@
> .endm
>
> SYM_FUNC_START(__asm_copy_to_user)
> +#ifdef CONFIG_RISCV_ISA_V
> + ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
has_vector uses riscv_has_extension_unlikely, but this is the equivalent
of riscv_has_extension_likely. It seems like this should be consistent
across all call sites. Since has_vector uses the unlikely version, this
should probably be rearranged so that the nop is in the non-vector
version and the jump is for the vector version.
A neat optimization you can do here is replace the "nop" with the
instruction that will be executed first. With how it's written right now
you could replace the nop with the la instruction. It's just a nop so
the performance difference is probably not going to be noticable but
it's theoretically better without the nop. The downside of doing this is
that it seems like alternatives do not work with macros so you couldn't
replace the nop with a REG_L instruction, unless there is some trick to
make it work.
> + la t0, riscv_v_usercopy_thres
> + REG_L t0, (t0)
The assembler does something really silly here it seems. With both
binutils 2.41 and clang 18 the following is generated:
6: 00000297 auipc t0,0x0
a: 00028293 mv t0,t0
e: 0002b283 ld t0,0(t0) # 6 <__asm_copy_from_user+0x4>
However, this la is not needed. You can replace the la + REG_L with just
a REG_L as follows:
REG_L t0, riscv_v_usercopy_thres
This then generates the following code:
6: 00000297 auipc t0,0x0
a: 0002b283 ld t0,0(t0) # 6 <__asm_copy_from_user+0x4>
> + bltu a2, t0, fallback_scalar_usercopy
> + tail enter_vector_usercopy
> +#endif
> +SYM_FUNC_START(fallback_scalar_usercopy)
>
> /* Enable access to user memory */
> li t6, SR_SUM
> @@ -181,6 +191,7 @@ SYM_FUNC_START(__asm_copy_to_user)
> sub a0, t5, a0
> ret
> SYM_FUNC_END(__asm_copy_to_user)
> +SYM_FUNC_END(fallback_scalar_usercopy)
> EXPORT_SYMBOL(__asm_copy_to_user)
> SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
> EXPORT_SYMBOL(__asm_copy_from_user)
> diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
> new file mode 100644
> index 000000000000..5bebcb1276a2
> --- /dev/null
> +++ b/arch/riscv/lib/uaccess_vector.S
> @@ -0,0 +1,55 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <linux/linkage.h>
> +#include <asm-generic/export.h>
> +#include <asm/asm.h>
> +#include <asm/asm-extable.h>
> +#include <asm/csr.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> + .macro fixup op reg addr lbl
> +100:
> + \op \reg, \addr
> + _asm_extable 100b, \lbl
> + .endm
> +
> +SYM_FUNC_START(__asm_vector_usercopy)
> + /* Enable access to user memory */
> + li t6, SR_SUM
> + csrs CSR_STATUS, t6
> +
> + /* Save for return value */
> + mv t5, a2
What's the point of this?
> +
> + mv pDstPtr, pDst
Why do this move? pDst isn't used anywhere else so you can safely
continue to use pDst everywhere that pDstPtr is used.
- Charlie
> +loop:
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> + fixup vle8.v vData, (pSrc), 10f
> + fixup vse8.v vData, (pDstPtr), 10f
> + sub iNum, iNum, iVL
> + add pSrc, pSrc, iVL
> + add pDstPtr, pDstPtr, iVL
> + bnez iNum, loop
> +
> +.Lout_copy_user:
> + /* Disable access to user memory */
> + csrc CSR_STATUS, t6
> + li a0, 0
> + ret
> +
> + /* Exception fixup code */
> +10:
> + /* Disable access to user memory */
> + csrc CSR_STATUS, t6
> + mv a0, iNum
> + ret
> +SYM_FUNC_END(__asm_vector_usercopy)
> --
> 2.17.1
>
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [v5, 5/6] riscv: lib: vectorize copy_to_user/copy_from_user
2023-12-15 6:25 ` Charlie Jenkins
@ 2023-12-15 13:52 ` Andrew Jones
2023-12-19 14:43 ` Andy Chiu
2023-12-19 9:58 ` Andy Chiu
1 sibling, 1 reply; 16+ messages in thread
From: Andrew Jones @ 2023-12-15 13:52 UTC (permalink / raw)
To: Charlie Jenkins
Cc: Andy Chiu, linux-riscv, palmer, greentime.hu, guoren, bjorn, ardb,
arnd, Paul Walmsley, Albert Ou, Conor Dooley, Han-Kuan Chen,
Heiko Stuebner, Aurelien Jarno, Bo YU, Alexandre Ghiti,
Clément Léger
On Thu, Dec 14, 2023 at 10:25:49PM -0800, Charlie Jenkins wrote:
> On Thu, Dec 14, 2023 at 03:57:20PM +0000, Andy Chiu wrote:
...
> > SYM_FUNC_START(__asm_copy_to_user)
> > +#ifdef CONFIG_RISCV_ISA_V
> > + ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
>
> has_vector uses riscv_has_extension_unlikely, but this is the equivalent
> of riscv_has_extension_likely. It seems like this should be consistent
> across all call sites. Since has_vector uses the unlikely version, this
> should probably be rearranged so that the nop is in the non-vector
> version and the jump is for the vector version.
I think I prefer it the way it is, where the optimized path is fully
optimized and the fallback path also suffers the jump. (I've also
taken that approach for clear_page()). Also, as extensions are adopted
by more an more platforms, and we start to consider switching unlikelys
to likelys, then it would be easy to miss stuff like this.
>
> A neat optimization you can do here is replace the "nop" with the
> instruction that will be executed first. With how it's written right now
> you could replace the nop with the la instruction. It's just a nop so
> the performance difference is probably not going to be noticable but
> it's theoretically better without the nop. The downside of doing this is
I think I prefer the nop, because it's easier to read and maintain the
assembly function when the ALTERNATIVE doesn't do anything other than
choose the entry point.
> that it seems like alternatives do not work with macros so you couldn't
> replace the nop with a REG_L instruction, unless there is some trick to
> make it work.
One should be able to use REG_L in an alternative since macro expansion
will result in the string "ld" or "lw", which can then be concatenated
with its parameters, e.g.
ALTERNATIVE(REG_L " a1, 0(a2)", "nop", 0, 0, 0)
(But note the space before the a1. Without it, we'd get "lda1,")
Thanks,
drew
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [v5, 5/6] riscv: lib: vectorize copy_to_user/copy_from_user
2023-12-15 13:52 ` Andrew Jones
@ 2023-12-19 14:43 ` Andy Chiu
0 siblings, 0 replies; 16+ messages in thread
From: Andy Chiu @ 2023-12-19 14:43 UTC (permalink / raw)
To: Andrew Jones
Cc: Charlie Jenkins, linux-riscv, palmer, greentime.hu, guoren, bjorn,
ardb, arnd, Paul Walmsley, Albert Ou, Conor Dooley, Han-Kuan Chen,
Heiko Stuebner, Aurelien Jarno, Bo YU, Alexandre Ghiti,
Clément Léger
On Fri, Dec 15, 2023 at 9:52 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Thu, Dec 14, 2023 at 10:25:49PM -0800, Charlie Jenkins wrote:
> > On Thu, Dec 14, 2023 at 03:57:20PM +0000, Andy Chiu wrote:
> ...
> > > SYM_FUNC_START(__asm_copy_to_user)
> > > +#ifdef CONFIG_RISCV_ISA_V
> > > + ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
> >
> > has_vector uses riscv_has_extension_unlikely, but this is the equivalent
> > of riscv_has_extension_likely. It seems like this should be consistent
> > across all call sites. Since has_vector uses the unlikely version, this
> > should probably be rearranged so that the nop is in the non-vector
> > version and the jump is for the vector version.
>
> I think I prefer it the way it is, where the optimized path is fully
> optimized and the fallback path also suffers the jump. (I've also
> taken that approach for clear_page()). Also, as extensions are adopted
> by more an more platforms, and we start to consider switching unlikelys
> to likelys, then it would be easy to miss stuff like this.
>
> >
> > A neat optimization you can do here is replace the "nop" with the
> > instruction that will be executed first. With how it's written right now
> > you could replace the nop with the la instruction. It's just a nop so
> > the performance difference is probably not going to be noticable but
> > it's theoretically better without the nop. The downside of doing this is
>
> I think I prefer the nop, because it's easier to read and maintain the
> assembly function when the ALTERNATIVE doesn't do anything other than
> choose the entry point.
Good point. I would prefer this approach as well. Loading from a
symbol can take 2 instructions, so we will have to insert a nop
padding for the default path. Though the nop will never execute, it
will make assembly code a bit harder to read. Maybe we could leave it
for future optimization.
>
> > that it seems like alternatives do not work with macros so you couldn't
> > replace the nop with a REG_L instruction, unless there is some trick to
> > make it work.
>
> One should be able to use REG_L in an alternative since macro expansion
> will result in the string "ld" or "lw", which can then be concatenated
> with its parameters, e.g.
>
> ALTERNATIVE(REG_L " a1, 0(a2)", "nop", 0, 0, 0)
>
> (But note the space before the a1. Without it, we'd get "lda1,")
>
Umm, perhaps I am using an older toolchain. it reports:
arch/riscv/lib/uaccess.S:17: Error: too many positional arguments
on binutil 2.38
> Thanks,
> drew
Thanks,
Andy
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [v5, 5/6] riscv: lib: vectorize copy_to_user/copy_from_user
2023-12-15 6:25 ` Charlie Jenkins
2023-12-15 13:52 ` Andrew Jones
@ 2023-12-19 9:58 ` Andy Chiu
1 sibling, 0 replies; 16+ messages in thread
From: Andy Chiu @ 2023-12-19 9:58 UTC (permalink / raw)
To: Charlie Jenkins
Cc: linux-riscv, palmer, greentime.hu, guoren, bjorn, ardb, arnd,
Paul Walmsley, Albert Ou, Conor Dooley, Andrew Jones,
Han-Kuan Chen, Heiko Stuebner, Aurelien Jarno, Bo YU,
Alexandre Ghiti, Clément Léger
On Fri, Dec 15, 2023 at 2:25 PM Charlie Jenkins <charlie@rivosinc.com> wrote:
> On Thu, Dec 14, 2023 at 03:57:20PM +0000, Andy Chiu wrote:
> > + la t0, riscv_v_usercopy_thres
> > + REG_L t0, (t0)
>
> The assembler does something really silly here it seems. With both
> binutils 2.41 and clang 18 the following is generated:
>
> 6: 00000297 auipc t0,0x0
> a: 00028293 mv t0,t0
> e: 0002b283 ld t0,0(t0) # 6 <__asm_copy_from_user+0x4>
>
> However, this la is not needed. You can replace the la + REG_L with just
> a REG_L as follows:
>
> REG_L t0, riscv_v_usercopy_thres
>
> This then generates the following code:
>
> 6: 00000297 auipc t0,0x0
> a: 0002b283 ld t0,0(t0) # 6 <__asm_copy_from_user+0x4>
>
Thanks, this will be fixed in v5
> > + bltu a2, t0, fallback_scalar_usercopy
> > + tail enter_vector_usercopy
> > +#endif
> > +SYM_FUNC_START(fallback_scalar_usercopy)
> >
> > /* Enable access to user memory */
> > li t6, SR_SUM
> > @@ -181,6 +191,7 @@ SYM_FUNC_START(__asm_copy_to_user)
> > sub a0, t5, a0
> > ret
> > SYM_FUNC_END(__asm_copy_to_user)
> > +SYM_FUNC_END(fallback_scalar_usercopy)
> > EXPORT_SYMBOL(__asm_copy_to_user)
> > SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
> > EXPORT_SYMBOL(__asm_copy_from_user)
> > diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
> > new file mode 100644
> > index 000000000000..5bebcb1276a2
> > --- /dev/null
> > +++ b/arch/riscv/lib/uaccess_vector.S
> > @@ -0,0 +1,55 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +
> > +#include <linux/linkage.h>
> > +#include <asm-generic/export.h>
> > +#include <asm/asm.h>
> > +#include <asm/asm-extable.h>
> > +#include <asm/csr.h>
> > +
> > +#define pDst a0
> > +#define pSrc a1
> > +#define iNum a2
> > +
> > +#define iVL a3
> > +#define pDstPtr a4
> > +
> > +#define ELEM_LMUL_SETTING m8
> > +#define vData v0
> > +
> > + .macro fixup op reg addr lbl
> > +100:
> > + \op \reg, \addr
> > + _asm_extable 100b, \lbl
> > + .endm
> > +
> > +SYM_FUNC_START(__asm_vector_usercopy)
> > + /* Enable access to user memory */
> > + li t6, SR_SUM
> > + csrs CSR_STATUS, t6
> > +
> > + /* Save for return value */
> > + mv t5, a2
>
> What's the point of this?
Oops, I will remove it
>
> > +
> > + mv pDstPtr, pDst
>
> Why do this move? pDst isn't used anywhere else so you can safely
> continue to use pDst everywhere that pDstPtr is used.
Yes, it makes more sense to remove pDstPtr and use just pDst.
Thanks,
Andy
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread
* [v5, 6/6] riscv: lib: add vectorized mem* routines
2023-12-14 15:57 [v5, 0/6] riscv: support kernel-mode Vector Andy Chiu
` (4 preceding siblings ...)
2023-12-14 15:57 ` [v5, 5/6] riscv: lib: vectorize copy_to_user/copy_from_user Andy Chiu
@ 2023-12-14 15:57 ` Andy Chiu
2023-12-15 19:56 ` Charlie Jenkins
5 siblings, 1 reply; 16+ messages in thread
From: Andy Chiu @ 2023-12-14 15:57 UTC (permalink / raw)
To: linux-riscv, palmer
Cc: greentime.hu, guoren, bjorn, charlie, ardb, arnd, Andy Chiu,
Paul Walmsley, Albert Ou, Conor Dooley, Andrew Jones,
Han-Kuan Chen, Heiko Stuebner
Provide vectorized memcpy/memset/memmove to accelerate common memory
operations. Also, group them into V_OPT_TEMPLATE3 macro because their
setup/tear-down and fallback logics are the same.
The original implementation of Vector operations comes from
https://github.com/sifive/sifive-libc, which we agree to contribute to
Linux kernel.
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
---
Changelog v4:
- new patch since v4
---
arch/riscv/lib/Makefile | 3 ++
arch/riscv/lib/memcpy_vector.S | 29 +++++++++++++++++++
arch/riscv/lib/memmove_vector.S | 49 ++++++++++++++++++++++++++++++++
arch/riscv/lib/memset_vector.S | 33 +++++++++++++++++++++
arch/riscv/lib/riscv_v_helpers.c | 21 ++++++++++++++
5 files changed, 135 insertions(+)
create mode 100644 arch/riscv/lib/memcpy_vector.S
create mode 100644 arch/riscv/lib/memmove_vector.S
create mode 100644 arch/riscv/lib/memset_vector.S
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 1fe8d797e0f2..3111863afd2e 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -14,3 +14,6 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
+lib-$(CONFIG_RISCV_ISA_V) += memset_vector.o
+lib-$(CONFIG_RISCV_ISA_V) += memcpy_vector.o
+lib-$(CONFIG_RISCV_ISA_V) += memmove_vector.o
diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S
new file mode 100644
index 000000000000..4176b6e0a53c
--- /dev/null
+++ b/arch/riscv/lib/memcpy_vector.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+
+/* void *memcpy(void *, const void *, size_t) */
+SYM_FUNC_START(__asm_memcpy_vector)
+ mv pDstPtr, pDst
+loop:
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+ vle8.v vData, (pSrc)
+ sub iNum, iNum, iVL
+ add pSrc, pSrc, iVL
+ vse8.v vData, (pDstPtr)
+ add pDstPtr, pDstPtr, iVL
+ bnez iNum, loop
+ ret
+SYM_FUNC_END(__asm_memcpy_vector)
diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S
new file mode 100644
index 000000000000..4cea9d244dc9
--- /dev/null
+++ b/arch/riscv/lib/memmove_vector.S
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+#define pDstPtr a4
+#define pSrcBackwardPtr a5
+#define pDstBackwardPtr a6
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+SYM_FUNC_START(__asm_memmove_vector)
+
+ mv pDstPtr, pDst
+
+ bgeu pSrc, pDst, forward_copy_loop
+ add pSrcBackwardPtr, pSrc, iNum
+ add pDstBackwardPtr, pDst, iNum
+ bltu pDst, pSrcBackwardPtr, backward_copy_loop
+
+forward_copy_loop:
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+ vle8.v vData, (pSrc)
+ sub iNum, iNum, iVL
+ add pSrc, pSrc, iVL
+ vse8.v vData, (pDstPtr)
+ add pDstPtr, pDstPtr, iVL
+
+ bnez iNum, forward_copy_loop
+ ret
+
+backward_copy_loop:
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+
+ sub pSrcBackwardPtr, pSrcBackwardPtr, iVL
+ vle8.v vData, (pSrcBackwardPtr)
+ sub iNum, iNum, iVL
+ sub pDstBackwardPtr, pDstBackwardPtr, iVL
+ vse8.v vData, (pDstBackwardPtr)
+ bnez iNum, backward_copy_loop
+ ret
+
+SYM_FUNC_END(__asm_memmove_vector)
diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S
new file mode 100644
index 000000000000..4611feed72ac
--- /dev/null
+++ b/arch/riscv/lib/memset_vector.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#define pDst a0
+#define iValue a1
+#define iNum a2
+
+#define iVL a3
+#define iTemp a4
+#define pDstPtr a5
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+/* void *memset(void *, int, size_t) */
+SYM_FUNC_START(__asm_memset_vector)
+
+ mv pDstPtr, pDst
+
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+ vmv.v.x vData, iValue
+
+loop:
+ vse8.v vData, (pDstPtr)
+ sub iNum, iNum, iVL
+ add pDstPtr, pDstPtr, iVL
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+ bnez iNum, loop
+
+ ret
+
+SYM_FUNC_END(__asm_memset_vector)
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
index d763b9c69fb7..12e8c5deb013 100644
--- a/arch/riscv/lib/riscv_v_helpers.c
+++ b/arch/riscv/lib/riscv_v_helpers.c
@@ -36,3 +36,24 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
fallback:
return fallback_scalar_usercopy(dst, src, n);
}
+
+#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1) \
+extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n); \
+type_r prefix(type_0 a0, type_1 a1, size_t n) \
+{ \
+ type_r ret; \
+ if (has_vector() && may_use_simd() && n > riscv_v_##prefix##_thres) { \
+ kernel_vector_begin(); \
+ ret = __asm_##prefix##_vector(a0, a1, n); \
+ kernel_vector_end(); \
+ return ret; \
+ } \
+ return __##prefix(a0, a1, n); \
+}
+
+static size_t riscv_v_memset_thres = 1280;
+V_OPT_TEMPLATE3(memset, void *, void*, int)
+static size_t riscv_v_memcpy_thres = 768;
+V_OPT_TEMPLATE3(memcpy, void *, void*, const void *)
+static size_t riscv_v_memmove_thres = 512;
+V_OPT_TEMPLATE3(memmove, void *, void*, const void *)
--
2.17.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [v5, 6/6] riscv: lib: add vectorized mem* routines
2023-12-14 15:57 ` [v5, 6/6] riscv: lib: add vectorized mem* routines Andy Chiu
@ 2023-12-15 19:56 ` Charlie Jenkins
0 siblings, 0 replies; 16+ messages in thread
From: Charlie Jenkins @ 2023-12-15 19:56 UTC (permalink / raw)
To: Andy Chiu
Cc: linux-riscv, palmer, greentime.hu, guoren, bjorn, ardb, arnd,
Paul Walmsley, Albert Ou, Conor Dooley, Andrew Jones,
Han-Kuan Chen, Heiko Stuebner
On Thu, Dec 14, 2023 at 03:57:21PM +0000, Andy Chiu wrote:
> Provide vectorized memcpy/memset/memmove to accelerate common memory
> operations. Also, group them into V_OPT_TEMPLATE3 macro because their
> setup/tear-down and fallback logics are the same.
>
> The original implementation of Vector operations comes from
> https://github.com/sifive/sifive-libc, which we agree to contribute to
> Linux kernel.
>
> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> ---
> Changelog v4:
> - new patch since v4
> ---
> arch/riscv/lib/Makefile | 3 ++
> arch/riscv/lib/memcpy_vector.S | 29 +++++++++++++++++++
> arch/riscv/lib/memmove_vector.S | 49 ++++++++++++++++++++++++++++++++
> arch/riscv/lib/memset_vector.S | 33 +++++++++++++++++++++
> arch/riscv/lib/riscv_v_helpers.c | 21 ++++++++++++++
> 5 files changed, 135 insertions(+)
> create mode 100644 arch/riscv/lib/memcpy_vector.S
> create mode 100644 arch/riscv/lib/memmove_vector.S
> create mode 100644 arch/riscv/lib/memset_vector.S
>
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index 1fe8d797e0f2..3111863afd2e 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -14,3 +14,6 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
> lib-$(CONFIG_RISCV_ISA_V) += xor.o
> lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
> lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
> +lib-$(CONFIG_RISCV_ISA_V) += memset_vector.o
> +lib-$(CONFIG_RISCV_ISA_V) += memcpy_vector.o
> +lib-$(CONFIG_RISCV_ISA_V) += memmove_vector.o
> diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S
> new file mode 100644
> index 000000000000..4176b6e0a53c
> --- /dev/null
> +++ b/arch/riscv/lib/memcpy_vector.S
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +
> +/* void *memcpy(void *, const void *, size_t) */
> +SYM_FUNC_START(__asm_memcpy_vector)
> + mv pDstPtr, pDst
> +loop:
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> + vle8.v vData, (pSrc)
> + sub iNum, iNum, iVL
> + add pSrc, pSrc, iVL
> + vse8.v vData, (pDstPtr)
> + add pDstPtr, pDstPtr, iVL
> + bnez iNum, loop
> + ret
> +SYM_FUNC_END(__asm_memcpy_vector)
> diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S
> new file mode 100644
> index 000000000000..4cea9d244dc9
> --- /dev/null
> +++ b/arch/riscv/lib/memmove_vector.S
> @@ -0,0 +1,49 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define pSrc a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define pDstPtr a4
> +#define pSrcBackwardPtr a5
> +#define pDstBackwardPtr a6
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +SYM_FUNC_START(__asm_memmove_vector)
> +
> + mv pDstPtr, pDst
> +
> + bgeu pSrc, pDst, forward_copy_loop
> + add pSrcBackwardPtr, pSrc, iNum
> + add pDstBackwardPtr, pDst, iNum
> + bltu pDst, pSrcBackwardPtr, backward_copy_loop
> +
> +forward_copy_loop:
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> + vle8.v vData, (pSrc)
> + sub iNum, iNum, iVL
> + add pSrc, pSrc, iVL
> + vse8.v vData, (pDstPtr)
> + add pDstPtr, pDstPtr, iVL
> +
> + bnez iNum, forward_copy_loop
> + ret
> +
> +backward_copy_loop:
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> +
> + sub pSrcBackwardPtr, pSrcBackwardPtr, iVL
> + vle8.v vData, (pSrcBackwardPtr)
> + sub iNum, iNum, iVL
> + sub pDstBackwardPtr, pDstBackwardPtr, iVL
> + vse8.v vData, (pDstBackwardPtr)
> + bnez iNum, backward_copy_loop
> + ret
> +
> +SYM_FUNC_END(__asm_memmove_vector)
> diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S
> new file mode 100644
> index 000000000000..4611feed72ac
> --- /dev/null
> +++ b/arch/riscv/lib/memset_vector.S
> @@ -0,0 +1,33 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#include <linux/linkage.h>
> +#include <asm/asm.h>
> +
> +#define pDst a0
> +#define iValue a1
> +#define iNum a2
> +
> +#define iVL a3
> +#define iTemp a4
> +#define pDstPtr a5
> +
> +#define ELEM_LMUL_SETTING m8
> +#define vData v0
> +
> +/* void *memset(void *, int, size_t) */
> +SYM_FUNC_START(__asm_memset_vector)
> +
> + mv pDstPtr, pDst
> +
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> + vmv.v.x vData, iValue
> +
> +loop:
> + vse8.v vData, (pDstPtr)
> + sub iNum, iNum, iVL
> + add pDstPtr, pDstPtr, iVL
> + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
> + bnez iNum, loop
> +
> + ret
> +
> +SYM_FUNC_END(__asm_memset_vector)
> diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
> index d763b9c69fb7..12e8c5deb013 100644
> --- a/arch/riscv/lib/riscv_v_helpers.c
> +++ b/arch/riscv/lib/riscv_v_helpers.c
> @@ -36,3 +36,24 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
> fallback:
> return fallback_scalar_usercopy(dst, src, n);
> }
> +
> +#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1) \
> +extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n); \
> +type_r prefix(type_0 a0, type_1 a1, size_t n) \
> +{ \
> + type_r ret; \
> + if (has_vector() && may_use_simd() && n > riscv_v_##prefix##_thres) { \
I forgot to bring it up on the other patch, but the phrase "thres" is
not intuitive to me. I think spelling threshold out is better, or using
"thresh" instead would make this much more clear.
> + kernel_vector_begin(); \
> + ret = __asm_##prefix##_vector(a0, a1, n); \
> + kernel_vector_end(); \
> + return ret; \
> + } \
> + return __##prefix(a0, a1, n); \
> +}
> +
> +static size_t riscv_v_memset_thres = 1280;
> +V_OPT_TEMPLATE3(memset, void *, void*, int)
> +static size_t riscv_v_memcpy_thres = 768;
> +V_OPT_TEMPLATE3(memcpy, void *, void*, const void *)
> +static size_t riscv_v_memmove_thres = 512;
How were these values selected? I would imagine that this could be
different for different vector hardware and it might be valuable to make
these the default values but allow a kconfig option to change it.
- Charlie
> +V_OPT_TEMPLATE3(memmove, void *, void*, const void *)
> --
> 2.17.1
>
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 16+ messages in thread