* [RFC PATCH 01/12] arch: Add ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:02 ` Christoph Hellwig
2023-12-08 5:54 ` [RFC PATCH 02/12] ARM: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
` (10 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
Several architectures provide an API to enable the FPU and run
floating-point SIMD code in kernel space. However, the function names,
header locations, and semantics are inconsistent across architectures,
and FPU support may be gated behind other Kconfig options.
Provide a standard way for architectures to declare that kernel space
FPU support is available. Architectures selecting this option must
implement what is currently the most common API (kernel_fpu_begin() and
kernel_fpu_end(), plus a new function kernel_fpu_available()) and
provide the appropriate CFLAGS for compiling floating-point C code.
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
Makefile | 4 ++++
arch/Kconfig | 9 +++++++++
2 files changed, 13 insertions(+)
diff --git a/Makefile b/Makefile
index 511b5616aa41..e65c186cf2c9 100644
--- a/Makefile
+++ b/Makefile
@@ -969,6 +969,10 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI)
export CC_FLAGS_CFI
endif
+# Architectures can define flags to add/remove for floating-point support
+export CC_FLAGS_FPU
+export CC_FLAGS_NO_FPU
+
ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT)
endif
diff --git a/arch/Kconfig b/arch/Kconfig
index f4b210ab0612..6df834e18e9c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1478,6 +1478,15 @@ config ARCH_HAS_NONLEAF_PMD_YOUNG
address translations. Page table walkers that clear the accessed bit
may use this capability to reduce their search space.
+config ARCH_HAS_KERNEL_FPU_SUPPORT
+ bool
+ help
+ An architecture should select this option if it supports running
+ floating-point code in kernel space. It must export the functions
+ kernel_fpu_available(), kernel_fpu_begin(), and kernel_fpu_end() from
+ <asm/fpu.h>, and define CC_FLAGS_FPU and/or CC_FLAGS_NO_FPU as
+ necessary in its Makefile.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* [RFC PATCH 02/12] ARM: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
2023-12-08 5:54 ` [RFC PATCH 01/12] arch: Add ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:04 ` Christoph Hellwig
2023-12-08 5:54 ` [RFC PATCH 03/12] ARM: crypto: Use CC_FLAGS_FPU for NEON CFLAGS Samuel Holland
` (9 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
ARM provides an equivalent to the common kernel-mode FPU API, but in a
different header and using different function names. Add a wrapper
header, and export CFLAGS adjustments as found in lib/raid6/Makefile.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
arch/arm/Kconfig | 1 +
arch/arm/Makefile | 7 +++++++
arch/arm/include/asm/fpu.h | 17 +++++++++++++++++
3 files changed, 25 insertions(+)
create mode 100644 arch/arm/include/asm/fpu.h
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f8567e95f98b..92e21a4a2903 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -14,6 +14,7 @@ config ARM
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 5ba42f69f8ce..1dd860dba5f5 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -130,6 +130,13 @@ endif
# Accept old syntax despite ".syntax unified"
AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
+# The GCC option -ffreestanding is required in order to compile code containing
+# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
+CC_FLAGS_FPU := -ffreestanding
+# Enable <arm_neon.h>
+CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include)
+CC_FLAGS_FPU += -march=armv7-a -mfloat-abi=softfp -mfpu=neon
+
ifeq ($(CONFIG_THUMB2_KERNEL),y)
CFLAGS_ISA :=-Wa,-mimplicit-it=always $(AFLAGS_NOWARN)
AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb
diff --git a/arch/arm/include/asm/fpu.h b/arch/arm/include/asm/fpu.h
new file mode 100644
index 000000000000..d01ca06e700a
--- /dev/null
+++ b/arch/arm/include/asm/fpu.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/include/asm/fpu.h
+ *
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_FPU_H
+#define __ASM_FPU_H
+
+#include <asm/neon.h>
+
+#define kernel_fpu_available() cpu_has_neon()
+#define kernel_fpu_begin() kernel_neon_begin()
+#define kernel_fpu_end() kernel_neon_end()
+
+#endif /* ! __ASM_FPU_H */
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 02/12] ARM: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 ` [RFC PATCH 02/12] ARM: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-11 16:04 ` Christoph Hellwig
0 siblings, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-11 16:04 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch
> --- /dev/null
> +++ b/arch/arm/include/asm/fpu.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * linux/arch/arm/include/asm/fpu.h
Please don't add the file name to top of the file comments. It serves
no purpose and easily gets out of date.
Except for that:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [RFC PATCH 03/12] ARM: crypto: Use CC_FLAGS_FPU for NEON CFLAGS
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
2023-12-08 5:54 ` [RFC PATCH 01/12] arch: Add ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
2023-12-08 5:54 ` [RFC PATCH 02/12] ARM: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:04 ` Christoph Hellwig
2023-12-08 5:54 ` [RFC PATCH 04/12] arm64: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
` (8 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
Now that CC_FLAGS_FPU is exported and can be used anywhere in the source
tree, use it instead of duplicating the flags here.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
arch/arm/lib/Makefile | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 650404be6768..0ca5aae1bcc3 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -40,8 +40,7 @@ $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
- NEON_FLAGS := -march=armv7-a -mfloat-abi=softfp -mfpu=neon
- CFLAGS_xor-neon.o += $(NEON_FLAGS)
+ CFLAGS_xor-neon.o += $(CC_FLAGS_FPU)
obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
endif
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 03/12] ARM: crypto: Use CC_FLAGS_FPU for NEON CFLAGS
2023-12-08 5:54 ` [RFC PATCH 03/12] ARM: crypto: Use CC_FLAGS_FPU for NEON CFLAGS Samuel Holland
@ 2023-12-11 16:04 ` Christoph Hellwig
0 siblings, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-11 16:04 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch
On Thu, Dec 07, 2023 at 09:54:33PM -0800, Samuel Holland wrote:
> Now that CC_FLAGS_FPU is exported and can be used anywhere in the source
> tree, use it instead of duplicating the flags here.
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [RFC PATCH 04/12] arm64: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (2 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 03/12] ARM: crypto: Use CC_FLAGS_FPU for NEON CFLAGS Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:05 ` Christoph Hellwig
2023-12-13 16:19 ` Will Deacon
2023-12-08 5:54 ` [RFC PATCH 05/12] lib/raid6: Use CC_FLAGS_FPU for NEON CFLAGS Samuel Holland
` (7 subsequent siblings)
11 siblings, 2 replies; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
arm64 provides an equivalent to the common kernel-mode FPU API, but in a
different header and using different function names. Add a wrapper
header, and export CFLAGS adjustments as found in lib/raid6/Makefile.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
arch/arm64/Kconfig | 1 +
arch/arm64/Makefile | 9 ++++++++-
arch/arm64/include/asm/fpu.h | 17 +++++++++++++++++
3 files changed, 26 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/include/asm/fpu.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b071a00425d..485ac389ac11 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -30,6 +30,7 @@ config ARM64
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 9a2d3723cd0f..4a65f24c7998 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -36,7 +36,14 @@ ifeq ($(CONFIG_BROKEN_GAS_INST),y)
$(warning Detected assembler with broken .inst; disassembly will be unreliable)
endif
-KBUILD_CFLAGS += -mgeneral-regs-only \
+# The GCC option -ffreestanding is required in order to compile code containing
+# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
+CC_FLAGS_FPU := -ffreestanding
+# Enable <arm_neon.h>
+CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include)
+CC_FLAGS_NO_FPU := -mgeneral-regs-only
+
+KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \
$(compat_vdso) $(cc_has_k_constraint)
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(compat_vdso)
diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
new file mode 100644
index 000000000000..664c0a192ab1
--- /dev/null
+++ b/arch/arm64/include/asm/fpu.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm64/include/asm/fpu.h
+ *
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_FPU_H
+#define __ASM_FPU_H
+
+#include <asm/neon.h>
+
+#define kernel_fpu_available() cpu_has_neon()
+#define kernel_fpu_begin() kernel_neon_begin()
+#define kernel_fpu_end() kernel_neon_end()
+
+#endif /* ! __ASM_FPU_H */
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 04/12] arm64: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 ` [RFC PATCH 04/12] arm64: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-11 16:05 ` Christoph Hellwig
2023-12-13 16:19 ` Will Deacon
1 sibling, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-11 16:05 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch
> + * linux/arch/arm64/include/asm/fpu.h
Same comment as for arm here. Except for that:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [RFC PATCH 04/12] arm64: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 ` [RFC PATCH 04/12] arm64: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
2023-12-11 16:05 ` Christoph Hellwig
@ 2023-12-13 16:19 ` Will Deacon
1 sibling, 0 replies; 33+ messages in thread
From: Will Deacon @ 2023-12-13 16:19 UTC (permalink / raw)
To: Samuel Holland, ardb
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch
On Thu, Dec 07, 2023 at 09:54:34PM -0800, Samuel Holland wrote:
> arm64 provides an equivalent to the common kernel-mode FPU API, but in a
> different header and using different function names. Add a wrapper
> header, and export CFLAGS adjustments as found in lib/raid6/Makefile.
>
> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
> ---
>
> arch/arm64/Kconfig | 1 +
> arch/arm64/Makefile | 9 ++++++++-
> arch/arm64/include/asm/fpu.h | 17 +++++++++++++++++
> 3 files changed, 26 insertions(+), 1 deletion(-)
> create mode 100644 arch/arm64/include/asm/fpu.h
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 7b071a00425d..485ac389ac11 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -30,6 +30,7 @@ config ARM64
> select ARCH_HAS_GCOV_PROFILE_ALL
> select ARCH_HAS_GIGANTIC_PAGE
> select ARCH_HAS_KCOV
> + select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON
> select ARCH_HAS_KEEPINITRD
> select ARCH_HAS_MEMBARRIER_SYNC_CORE
> select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 9a2d3723cd0f..4a65f24c7998 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -36,7 +36,14 @@ ifeq ($(CONFIG_BROKEN_GAS_INST),y)
> $(warning Detected assembler with broken .inst; disassembly will be unreliable)
> endif
>
> -KBUILD_CFLAGS += -mgeneral-regs-only \
> +# The GCC option -ffreestanding is required in order to compile code containing
> +# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
> +CC_FLAGS_FPU := -ffreestanding
> +# Enable <arm_neon.h>
> +CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include)
> +CC_FLAGS_NO_FPU := -mgeneral-regs-only
> +
> +KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \
> $(compat_vdso) $(cc_has_k_constraint)
> KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
> KBUILD_AFLAGS += $(compat_vdso)
Can you use this to replace the same logic in arch/arm64/lib/Makefile,
like you do for arch/arm/?
Will
^ permalink raw reply [flat|nested] 33+ messages in thread
* [RFC PATCH 05/12] lib/raid6: Use CC_FLAGS_FPU for NEON CFLAGS
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (3 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 04/12] arm64: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:07 ` Christoph Hellwig
2023-12-08 5:54 ` [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
` (6 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
Now that CC_FLAGS_FPU is exported and can be used anywhere in the source
tree, use it instead of duplicating the flags here.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
lib/raid6/Makefile | 31 ++++++++-----------------------
1 file changed, 8 insertions(+), 23 deletions(-)
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 1c5420ff254e..309fea97efc6 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -33,25 +33,6 @@ CFLAGS_REMOVE_vpermxor8.o += -msoft-float
endif
endif
-# The GCC option -ffreestanding is required in order to compile code containing
-# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
-ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
-NEON_FLAGS := -ffreestanding
-# Enable <arm_neon.h>
-NEON_FLAGS += -isystem $(shell $(CC) -print-file-name=include)
-ifeq ($(ARCH),arm)
-NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon
-endif
-CFLAGS_recov_neon_inner.o += $(NEON_FLAGS)
-ifeq ($(ARCH),arm64)
-CFLAGS_REMOVE_recov_neon_inner.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only
-endif
-endif
-
quiet_cmd_unroll = UNROLL $@
cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@
@@ -75,10 +56,14 @@ targets += vpermxor1.c vpermxor2.c vpermxor4.c vpermxor8.c
$(obj)/vpermxor%.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
-CFLAGS_neon1.o += $(NEON_FLAGS)
-CFLAGS_neon2.o += $(NEON_FLAGS)
-CFLAGS_neon4.o += $(NEON_FLAGS)
-CFLAGS_neon8.o += $(NEON_FLAGS)
+CFLAGS_neon1.o += $(CC_FLAGS_FPU)
+CFLAGS_neon2.o += $(CC_FLAGS_FPU)
+CFLAGS_neon4.o += $(CC_FLAGS_FPU)
+CFLAGS_neon8.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_neon1.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon2.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon4.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon8.o += $(CC_FLAGS_NO_FPU)
targets += neon1.c neon2.c neon4.c neon8.c
$(obj)/neon%.c: $(src)/neon.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 05/12] lib/raid6: Use CC_FLAGS_FPU for NEON CFLAGS
2023-12-08 5:54 ` [RFC PATCH 05/12] lib/raid6: Use CC_FLAGS_FPU for NEON CFLAGS Samuel Holland
@ 2023-12-11 16:07 ` Christoph Hellwig
2023-12-11 16:12 ` Samuel Holland
0 siblings, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-11 16:07 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch
> +CFLAGS_REMOVE_neon1.o += $(CC_FLAGS_NO_FPU)
> +CFLAGS_REMOVE_neon2.o += $(CC_FLAGS_NO_FPU)
> +CFLAGS_REMOVE_neon4.o += $(CC_FLAGS_NO_FPU)
> +CFLAGS_REMOVE_neon8.o += $(CC_FLAGS_NO_FPU)
Btw, do we even really need the extra variables for compiler flags
to remove? Don't gcc/clang options work so that if you add a
no-prefixed version of the option later it transparently gets removed?
Except for that:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [RFC PATCH 05/12] lib/raid6: Use CC_FLAGS_FPU for NEON CFLAGS
2023-12-11 16:07 ` Christoph Hellwig
@ 2023-12-11 16:12 ` Samuel Holland
2023-12-12 7:11 ` Christoph Hellwig
0 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-11 16:12 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
linux-kernel, amd-gfx, linux-arch
On 2023-12-11 10:07 AM, Christoph Hellwig wrote:
>> +CFLAGS_REMOVE_neon1.o += $(CC_FLAGS_NO_FPU)
>> +CFLAGS_REMOVE_neon2.o += $(CC_FLAGS_NO_FPU)
>> +CFLAGS_REMOVE_neon4.o += $(CC_FLAGS_NO_FPU)
>> +CFLAGS_REMOVE_neon8.o += $(CC_FLAGS_NO_FPU)
>
> Btw, do we even really need the extra variables for compiler flags
> to remove? Don't gcc/clang options work so that if you add a
> no-prefixed version of the option later it transparently gets removed?
Unfortunately, not all of the relevant options can be no-prefixed:
$ cat float.c
int main(void) { volatile float f = 123.456; return f / 10; }
$ aarch64-linux-musl-gcc float.c
$ aarch64-linux-musl-gcc -mgeneral-regs-only float.c
float.c: In function 'main':
float.c:1:33: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
1 | int main(void) { volatile float f = 123.456; return f / 10; }
| ^
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
1 | int main(void) { volatile float f = 123.456; return f / 10; }
| ~~^~~~
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
float.c:1:55: error: '-mgeneral-regs-only' is incompatible with the use of floating-point types
$ aarch64-linux-musl-gcc -mgeneral-regs-only -mno-general-regs-only float.c
aarch64-linux-musl-gcc: error: unrecognized command-line option '-mno-general-regs-only'; did you mean '-mgeneral-regs-only'?
$
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [RFC PATCH 05/12] lib/raid6: Use CC_FLAGS_FPU for NEON CFLAGS
2023-12-11 16:12 ` Samuel Holland
@ 2023-12-12 7:11 ` Christoph Hellwig
0 siblings, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-12 7:11 UTC (permalink / raw)
To: Samuel Holland
Cc: Christoph Hellwig, linux-arm-kernel, loongarch, linuxppc-dev, x86,
linux-riscv, linux-kernel, amd-gfx, linux-arch
On Mon, Dec 11, 2023 at 10:12:27AM -0600, Samuel Holland wrote:
> On 2023-12-11 10:07 AM, Christoph Hellwig wrote:
>
> Unfortunately, not all of the relevant options can be no-prefixed:
Ok. That is another good argument for having the obj-fpu += syntax
I proposed. You might need help from the kbuild maintainers from that
as trying to understand the kbuild magic isn't something I'd expect
from a normal contributor (including myself..).
^ permalink raw reply [flat|nested] 33+ messages in thread
* [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (4 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 05/12] lib/raid6: Use CC_FLAGS_FPU for NEON CFLAGS Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:07 ` Christoph Hellwig
2023-12-13 16:13 ` WANG Xuerui
2023-12-08 5:54 ` [RFC PATCH 07/12] powerpc: " Samuel Holland
` (5 subsequent siblings)
11 siblings, 2 replies; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
LoongArch already provides kernel_fpu_begin() and kernel_fpu_end() in
asm/fpu.h, so it only needs to add kernel_fpu_available() and export
the CFLAGS adjustments.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
arch/loongarch/Kconfig | 1 +
arch/loongarch/Makefile | 5 ++++-
arch/loongarch/include/asm/fpu.h | 1 +
3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ee123820a476..65d4475565b8 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -15,6 +15,7 @@ config LOONGARCH
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if CPU_HAS_FPU
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 204b94b2e6aa..f5c4f7e921db 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -25,6 +25,9 @@ endif
32bit-emul = elf32loongarch
64bit-emul = elf64loongarch
+CC_FLAGS_FPU := -mfpu=64
+CC_FLAGS_NO_FPU := -msoft-float
+
ifdef CONFIG_DYNAMIC_FTRACE
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
@@ -46,7 +49,7 @@ ld-emul = $(64bit-emul)
cflags-y += -mabi=lp64s
endif
-cflags-y += -pipe -msoft-float
+cflags-y += -pipe $(CC_FLAGS_NO_FPU)
LDFLAGS_vmlinux += -static -n -nostdlib
# When the assembler supports explicit relocation hint, we must use it.
diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index c2d8962fda00..3177674228f8 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -21,6 +21,7 @@
struct sigcontext;
+#define kernel_fpu_available() cpu_has_fpu
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 ` [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-11 16:07 ` Christoph Hellwig
2023-12-13 16:13 ` WANG Xuerui
1 sibling, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-11 16:07 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch
On Thu, Dec 07, 2023 at 09:54:36PM -0800, Samuel Holland wrote:
> LoongArch already provides kernel_fpu_begin() and kernel_fpu_end() in
> asm/fpu.h, so it only needs to add kernel_fpu_available() and export
> the CFLAGS adjustments.
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 ` [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
2023-12-11 16:07 ` Christoph Hellwig
@ 2023-12-13 16:13 ` WANG Xuerui
1 sibling, 0 replies; 33+ messages in thread
From: WANG Xuerui @ 2023-12-13 16:13 UTC (permalink / raw)
To: Samuel Holland, linux-arm-kernel, loongarch, linuxppc-dev, x86,
linux-riscv, Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch
On 12/8/23 13:54, Samuel Holland wrote:
> LoongArch already provides kernel_fpu_begin() and kernel_fpu_end() in
> asm/fpu.h, so it only needs to add kernel_fpu_available() and export
> the CFLAGS adjustments.
>
> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
> ---
>
> arch/loongarch/Kconfig | 1 +
> arch/loongarch/Makefile | 5 ++++-
> arch/loongarch/include/asm/fpu.h | 1 +
> 3 files changed, 6 insertions(+), 1 deletion(-)
This is all intuitive wrapping, so:
Acked-by: WANG Xuerui <git@xen0n.name>
Thanks!
--
WANG "xen0n" Xuerui
Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/
^ permalink raw reply [flat|nested] 33+ messages in thread
* [RFC PATCH 07/12] powerpc: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (5 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 06/12] LoongArch: Implement ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:08 ` Christoph Hellwig
2023-12-08 5:54 ` [RFC PATCH 08/12] x86: " Samuel Holland
` (4 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
PowerPC provides an equivalent to the common kernel-mode FPU API, but in
a different header and using different function names. The PowerPC API
also requires a non-preemptible context. Add a wrapper header, and
export the CFLAGS adjustments.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/Makefile | 5 ++++-
arch/powerpc/include/asm/fpu.h | 28 ++++++++++++++++++++++++++++
3 files changed, 33 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/include/asm/fpu.h
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6f105ee4f3cf..e96cb5b7c571 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,6 +137,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC_FPU
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index f19dbaa1d541..2d5f21baf6ff 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -142,6 +142,9 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD))
CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)
+CC_FLAGS_FPU := $(call cc-option,-mhard-float)
+CC_FLAGS_NO_FPU += $(call cc-option,-msoft-float)
+
ifdef CONFIG_FUNCTION_TRACER
ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
@@ -163,7 +166,7 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr)
KBUILD_AFLAGS += $(AFLAGS-y)
-KBUILD_CFLAGS += $(call cc-option,-msoft-float)
+KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU)
KBUILD_CFLAGS += $(CFLAGS-y)
CPP = $(CC) -E $(KBUILD_CFLAGS)
diff --git a/arch/powerpc/include/asm/fpu.h b/arch/powerpc/include/asm/fpu.h
new file mode 100644
index 000000000000..ca584e4bc40f
--- /dev/null
+++ b/arch/powerpc/include/asm/fpu.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_POWERPC_FPU_H
+#define _ASM_POWERPC_FPU_H
+
+#include <linux/preempt.h>
+
+#include <asm/cpu_has_feature.h>
+#include <asm/switch_to.h>
+
+#define kernel_fpu_available() (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+
+static inline void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ enable_kernel_fp();
+}
+
+static inline void kernel_fpu_end(void)
+{
+ disable_kernel_fp();
+ preempt_enable();
+}
+
+#endif /* ! _ASM_POWERPC_FPU_H */
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 07/12] powerpc: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 ` [RFC PATCH 07/12] powerpc: " Samuel Holland
@ 2023-12-11 16:08 ` Christoph Hellwig
0 siblings, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-11 16:08 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch
On Thu, Dec 07, 2023 at 09:54:37PM -0800, Samuel Holland wrote:
> PowerPC provides an equivalent to the common kernel-mode FPU API, but in
> a different header and using different function names. The PowerPC API
> also requires a non-preemptible context. Add a wrapper header, and
> export the CFLAGS adjustments.
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [RFC PATCH 08/12] x86: Implement ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (6 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 07/12] powerpc: " Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:08 ` Christoph Hellwig
2023-12-08 5:54 ` [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU Samuel Holland
` (3 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
x86 already provides kernel_fpu_begin() and kernel_fpu_end(), but in a
different header. Add a wrapper header, and export the CFLAGS
adjustments as found in lib/Makefile.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
arch/x86/Kconfig | 1 +
arch/x86/Makefile | 20 ++++++++++++++++++++
arch/x86/include/asm/fpu.h | 13 +++++++++++++
3 files changed, 34 insertions(+)
create mode 100644 arch/x86/include/asm/fpu.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3762f41bb092..1fe7f2d8d017 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,6 +81,7 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
+ select ARCH_HAS_KERNEL_FPU_SUPPORT
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1a068de12a56..71576c8dbe79 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -70,6 +70,26 @@ export BITS
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
+#
+# CFLAGS for compiling floating point code inside the kernel.
+#
+CC_FLAGS_FPU := -msse -msse2
+ifdef CONFIG_CC_IS_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
+#
+# The "-msse" in the first argument is there so that the
+# -mpreferred-stack-boundary=3 build error:
+#
+# -mpreferred-stack-boundary=3 is not between 4 and 12
+#
+# can be triggered. Otherwise gcc doesn't complain.
+CC_FLAGS_FPU += -mhard-float
+CC_FLAGS_FPU += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
+endif
+
ifeq ($(CONFIG_X86_KERNEL_IBT),y)
#
# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate
diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h
new file mode 100644
index 000000000000..b2743fe19339
--- /dev/null
+++ b/arch/x86/include/asm/fpu.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+#include <asm/fpu/api.h>
+
+#define kernel_fpu_available() true
+
+#endif /* ! _ASM_X86_FPU_H */
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (7 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 08/12] x86: " Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:11 ` Christoph Hellwig
2023-12-08 5:54 ` [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
` (2 subsequent siblings)
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
This is motivated by the amdgpu DRM driver, which needs floating-point
code to support recent hardware. That code is not performance-critical,
so only provide a minimal non-preemptible implementation for now.
Use a similar trick as ARM to force placing floating-point code in a
separate translation unit, so it is not possible for compiler-generated
floating-point code to appear outside kernel_fpu_{begin,end}().
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
arch/riscv/Kconfig | 1 +
arch/riscv/Makefile | 3 +++
arch/riscv/include/asm/fpu.h | 26 ++++++++++++++++++++++++++
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/kernel_mode_fpu.c | 28 ++++++++++++++++++++++++++++
5 files changed, 59 insertions(+)
create mode 100644 arch/riscv/include/asm/fpu.h
create mode 100644 arch/riscv/kernel/kernel_mode_fpu.c
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 95a2a06acc6a..cf0967928e6d 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -27,6 +27,7 @@ config RISCV
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if FPU
select ARCH_HAS_MMIOWB
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index a74be78678eb..2e719c369210 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -81,6 +81,9 @@ KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64i
KBUILD_AFLAGS += -march=$(riscv-march-y)
+# For C code built with floating-point support, exclude V but keep F and D.
+CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/')
+
KBUILD_CFLAGS += -mno-save-restore
KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)
diff --git a/arch/riscv/include/asm/fpu.h b/arch/riscv/include/asm/fpu.h
new file mode 100644
index 000000000000..8cd027acc015
--- /dev/null
+++ b/arch/riscv/include/asm/fpu.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_RISCV_FPU_H
+#define _ASM_RISCV_FPU_H
+
+#include <asm/switch_to.h>
+
+#define kernel_fpu_available() has_fpu()
+
+#ifdef __riscv_f
+
+#define kernel_fpu_begin() \
+ static_assert(false, "floating-point code must use a separate translation unit")
+#define kernel_fpu_end() kernel_fpu_begin()
+
+#else
+
+void kernel_fpu_begin(void);
+void kernel_fpu_end(void);
+
+#endif
+
+#endif /* ! _ASM_RISCV_FPU_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index fee22a3d1b53..662c483e338d 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_FPU) += fpu.o
+obj-$(CONFIG_FPU) += kernel_mode_fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c
new file mode 100644
index 000000000000..9b2024cc056b
--- /dev/null
+++ b/arch/riscv/kernel/kernel_mode_fpu.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#include <linux/export.h>
+#include <linux/preempt.h>
+
+#include <asm/csr.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+#include <asm/switch_to.h>
+
+void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ fstate_save(current, task_pt_regs(current));
+ csr_set(CSR_SSTATUS, SR_FS);
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ csr_clear(CSR_SSTATUS, SR_FS);
+ fstate_restore(current, task_pt_regs(current));
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU
2023-12-08 5:54 ` [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU Samuel Holland
@ 2023-12-11 16:11 ` Christoph Hellwig
2023-12-11 16:16 ` Samuel Holland
0 siblings, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-11 16:11 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch
> +#ifdef __riscv_f
> +
> +#define kernel_fpu_begin() \
> + static_assert(false, "floating-point code must use a separate translation unit")
> +#define kernel_fpu_end() kernel_fpu_begin()
> +
> +#else
> +
> +void kernel_fpu_begin(void);
> +void kernel_fpu_end(void);
> +
> +#endif
I'll assume this is related to trick that places code in a separate
translation unit, but I fail to understand it. Can you add a comment
explaining it?
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU
2023-12-11 16:11 ` Christoph Hellwig
@ 2023-12-11 16:16 ` Samuel Holland
0 siblings, 0 replies; 33+ messages in thread
From: Samuel Holland @ 2023-12-11 16:16 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
linux-kernel, amd-gfx, linux-arch
On 2023-12-11 10:11 AM, Christoph Hellwig wrote:
>> +#ifdef __riscv_f
>> +
>> +#define kernel_fpu_begin() \
>> + static_assert(false, "floating-point code must use a separate translation unit")
>> +#define kernel_fpu_end() kernel_fpu_begin()
>> +
>> +#else
>> +
>> +void kernel_fpu_begin(void);
>> +void kernel_fpu_end(void);
>> +
>> +#endif
>
> I'll assume this is related to trick that places code in a separate
> translation unit, but I fail to understand it. Can you add a comment
> explaining it?
Yes, I can add a comment. Here, __riscv_f refers to RISC-V's F extension for
single-precision floating point, which is enabled by CC_FLAGS_FPU.
^ permalink raw reply [flat|nested] 33+ messages in thread
* [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (8 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 09/12] riscv: Add support for kernel-mode FPU Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 12:23 ` Michael Ellerman
2023-12-08 5:54 ` [RFC PATCH 11/12] selftests/fpu: Move FP code to a separate translation unit Samuel Holland
2023-12-08 5:54 ` [RFC PATCH 12/12] selftests/fpu: Allow building on other architectures Samuel Holland
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
Now that all previously-supported architectures select
ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
of the existing list of architectures. It can also take advantage of the
common kernel-mode FPU API and method of adjusting CFLAGS.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
drivers/gpu/drm/amd/display/Kconfig | 2 +-
.../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 33 +----------------
drivers/gpu/drm/amd/display/dc/dml/Makefile | 36 ++-----------------
drivers/gpu/drm/amd/display/dc/dml2/Makefile | 36 ++-----------------
4 files changed, 6 insertions(+), 101 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 901d1961b739..5fcd4f778dc3 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -8,7 +8,7 @@ config DRM_AMD_DC
depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64
select SND_HDA_COMPONENT if SND_HDA_CORE
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
- select DRM_AMD_DC_FP if (X86 || LOONGARCH || (PPC64 && ALTIVEC) || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG))
+ select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 4ae4720535a5..b64f917174ca 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -26,16 +26,7 @@
#include "dc_trace.h"
-#if defined(CONFIG_X86)
-#include <asm/fpu/api.h>
-#elif defined(CONFIG_PPC64)
-#include <asm/switch_to.h>
-#include <asm/cputable.h>
-#elif defined(CONFIG_ARM64)
-#include <asm/neon.h>
-#elif defined(CONFIG_LOONGARCH)
#include <asm/fpu.h>
-#endif
/**
* DOC: DC FPU manipulation overview
@@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
WARN_ON_ONCE(!in_task());
preempt_disable();
depth = __this_cpu_inc_return(fpu_recursion_depth);
-
if (depth == 1) {
-#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
+ BUG_ON(!kernel_fpu_available());
kernel_fpu_begin();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- enable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- enable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
- enable_kernel_fp();
-#elif defined(CONFIG_ARM64)
- kernel_neon_begin();
-#endif
}
TRACE_DCN_FPU(true, function_name, line, depth);
@@ -122,18 +102,7 @@ void dc_fpu_end(const char *function_name, const int line)
depth = __this_cpu_dec_return(fpu_recursion_depth);
if (depth == 0) {
-#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_end();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- disable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- disable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
- disable_kernel_fp();
-#elif defined(CONFIG_ARM64)
- kernel_neon_end();
-#endif
} else {
WARN_ON_ONCE(depth < 0);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index ea7d60f9a9b4..5aad0f572ba3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -25,40 +25,8 @@
# It provides the general basic services required by other DAL
# subcomponents.
-ifdef CONFIG_X86
-dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
-dml_ccflags := $(dml_ccflags-y) -msse
-endif
-
-ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_ARM64
-dml_rcflags := -mgeneral-regs-only
-endif
-
-ifdef CONFIG_LOONGARCH
-dml_ccflags := -mfpu=64
-dml_rcflags := -msoft-float
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifneq ($(call gcc-min-version, 70100),y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dml_ccflags += -mpreferred-stack-boundary=4
-else
-dml_ccflags += -msse2
-endif
-endif
+dml_ccflags := $(CC_FLAGS_FPU)
+dml_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
frame_warn_flag := -Wframe-larger-than=2048
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index acff3449b8d7..4f6c804a26ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -24,40 +24,8 @@
#
# Makefile for dml2.
-ifdef CONFIG_X86
-dml2_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
-dml2_ccflags := $(dml2_ccflags-y) -msse
-endif
-
-ifdef CONFIG_PPC64
-dml2_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_ARM64
-dml2_rcflags := -mgeneral-regs-only
-endif
-
-ifdef CONFIG_LOONGARCH
-dml2_ccflags := -mfpu=64
-dml2_rcflags := -msoft-float
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dml2_ccflags += -mpreferred-stack-boundary=4
-else
-dml2_ccflags += -msse2
-endif
-endif
+dml2_ccflags := $(CC_FLAGS_FPU)
+dml2_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-08 5:54 ` [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-11 12:23 ` Michael Ellerman
2023-12-14 1:03 ` Samuel Holland
0 siblings, 1 reply; 33+ messages in thread
From: Michael Ellerman @ 2023-12-11 12:23 UTC (permalink / raw)
To: Samuel Holland, linux-arm-kernel, loongarch, linuxppc-dev, x86,
linux-riscv, Christoph Hellwig, Timothy Pearson
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
Hi Samuel,
Thanks for trying to clean all this up.
One problem below.
Samuel Holland <samuel.holland@sifive.com> writes:
> Now that all previously-supported architectures select
> ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
> of the existing list of architectures. It can also take advantage of the
> common kernel-mode FPU API and method of adjusting CFLAGS.
>
> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
...
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> index 4ae4720535a5..b64f917174ca 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
> WARN_ON_ONCE(!in_task());
> preempt_disable();
> depth = __this_cpu_inc_return(fpu_recursion_depth);
> -
> if (depth == 1) {
> -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
> + BUG_ON(!kernel_fpu_available());
> kernel_fpu_begin();
> -#elif defined(CONFIG_PPC64)
> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
> - enable_kernel_vsx();
> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
> - enable_kernel_altivec();
Note altivec.
> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> - enable_kernel_fp();
> -#elif defined(CONFIG_ARM64)
> - kernel_neon_begin();
> -#endif
> }
>
> TRACE_DCN_FPU(true, function_name, line, depth);
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index ea7d60f9a9b4..5aad0f572ba3 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -25,40 +25,8 @@
> # It provides the general basic services required by other DAL
> # subcomponents.
>
> -ifdef CONFIG_X86
> -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
> -dml_ccflags := $(dml_ccflags-y) -msse
> -endif
> -
> -ifdef CONFIG_PPC64
> -dml_ccflags := -mhard-float -maltivec
> -endif
And altivec is enabled in the flags there.
That doesn't match your implementation for powerpc in patch 7, which
only deals with float.
I suspect the AMD driver actually doesn't need altivec enabled, but I
don't know that for sure. It compiles without it, but I don't have a GPU
to actually test. I've added Timothy on Cc who added the support for
powerpc to the driver originally, hopefully he has a test system.
Anyway if that's true that it doesn't need altivec we should probably do
a lead-up patch that drops altivec from the AMD driver explicitly, eg.
as below.
cheers
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 4ae4720535a5..0de16796466b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -92,11 +92,7 @@ void dc_fpu_begin(const char *function_name, const int line)
#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_begin();
#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- enable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- enable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+ if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
enable_kernel_fp();
#elif defined(CONFIG_ARM64)
kernel_neon_begin();
@@ -125,11 +121,7 @@ void dc_fpu_end(const char *function_name, const int line)
#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_end();
#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- disable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- disable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+ if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
disable_kernel_fp();
#elif defined(CONFIG_ARM64)
kernel_neon_end();
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 6042a5a6a44f..554c39024a40 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -31,7 +31,7 @@ dml_ccflags := $(dml_ccflags-y) -msse
endif
ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
+dml_ccflags := -mhard-float
endif
ifdef CONFIG_ARM64
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index acff3449b8d7..7b51364084b5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -30,7 +30,7 @@ dml2_ccflags := $(dml2_ccflags-y) -msse
endif
ifdef CONFIG_PPC64
-dml2_ccflags := -mhard-float -maltivec
+dml2_ccflags := -mhard-float
endif
ifdef CONFIG_ARM64
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-11 12:23 ` Michael Ellerman
@ 2023-12-14 1:03 ` Samuel Holland
2023-12-14 3:13 ` Timothy Pearson
2023-12-14 4:45 ` Michael Ellerman
0 siblings, 2 replies; 33+ messages in thread
From: Samuel Holland @ 2023-12-14 1:03 UTC (permalink / raw)
To: Michael Ellerman
Cc: linux-kernel, amd-gfx, linux-arch, linux-arm-kernel, loongarch,
linuxppc-dev, x86, linux-riscv, Christoph Hellwig,
Timothy Pearson
On 2023-12-11 6:23 AM, Michael Ellerman wrote:
> Hi Samuel,
>
> Thanks for trying to clean all this up.
>
> One problem below.
>
> Samuel Holland <samuel.holland@sifive.com> writes:
>> Now that all previously-supported architectures select
>> ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
>> of the existing list of architectures. It can also take advantage of the
>> common kernel-mode FPU API and method of adjusting CFLAGS.
>>
>> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
> ...
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> index 4ae4720535a5..b64f917174ca 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
>> WARN_ON_ONCE(!in_task());
>> preempt_disable();
>> depth = __this_cpu_inc_return(fpu_recursion_depth);
>> -
>> if (depth == 1) {
>> -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
>> + BUG_ON(!kernel_fpu_available());
>> kernel_fpu_begin();
>> -#elif defined(CONFIG_PPC64)
>> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
>> - enable_kernel_vsx();
>> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
>> - enable_kernel_altivec();
>
> Note altivec.
>
>> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
>> - enable_kernel_fp();
>> -#elif defined(CONFIG_ARM64)
>> - kernel_neon_begin();
>> -#endif
>> }
>>
>> TRACE_DCN_FPU(true, function_name, line, depth);
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> index ea7d60f9a9b4..5aad0f572ba3 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> @@ -25,40 +25,8 @@
>> # It provides the general basic services required by other DAL
>> # subcomponents.
>>
>> -ifdef CONFIG_X86
>> -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
>> -dml_ccflags := $(dml_ccflags-y) -msse
>> -endif
>> -
>> -ifdef CONFIG_PPC64
>> -dml_ccflags := -mhard-float -maltivec
>> -endif
>
> And altivec is enabled in the flags there.
>
> That doesn't match your implementation for powerpc in patch 7, which
> only deals with float.
>
> I suspect the AMD driver actually doesn't need altivec enabled, but I
> don't know that for sure. It compiles without it, but I don't have a GPU
> to actually test. I've added Timothy on Cc who added the support for
> powerpc to the driver originally, hopefully he has a test system.
I tested this series on a POWER9 system with an AMD Radeon RX 6400 GPU (which
requires this FPU code to initialize), and got functioning graphics output.
> Anyway if that's true that it doesn't need altivec we should probably do
> a lead-up patch that drops altivec from the AMD driver explicitly, eg.
> as below.
That makes sense to me. Do you want to provide your Signed-off-by so I can send
this patch with your authorship?
Regards,
Samuel
> cheers
>
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> index 4ae4720535a5..0de16796466b 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> @@ -92,11 +92,7 @@ void dc_fpu_begin(const char *function_name, const int line)
> #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
> kernel_fpu_begin();
> #elif defined(CONFIG_PPC64)
> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
> - enable_kernel_vsx();
> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
> - enable_kernel_altivec();
> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> + if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> enable_kernel_fp();
> #elif defined(CONFIG_ARM64)
> kernel_neon_begin();
> @@ -125,11 +121,7 @@ void dc_fpu_end(const char *function_name, const int line)
> #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
> kernel_fpu_end();
> #elif defined(CONFIG_PPC64)
> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
> - disable_kernel_vsx();
> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
> - disable_kernel_altivec();
> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> + if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
> disable_kernel_fp();
> #elif defined(CONFIG_ARM64)
> kernel_neon_end();
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index 6042a5a6a44f..554c39024a40 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -31,7 +31,7 @@ dml_ccflags := $(dml_ccflags-y) -msse
> endif
>
> ifdef CONFIG_PPC64
> -dml_ccflags := -mhard-float -maltivec
> +dml_ccflags := -mhard-float
> endif
>
> ifdef CONFIG_ARM64
> diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> index acff3449b8d7..7b51364084b5 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
> @@ -30,7 +30,7 @@ dml2_ccflags := $(dml2_ccflags-y) -msse
> endif
>
> ifdef CONFIG_PPC64
> -dml2_ccflags := -mhard-float -maltivec
> +dml2_ccflags := -mhard-float
> endif
>
> ifdef CONFIG_ARM64
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-14 1:03 ` Samuel Holland
@ 2023-12-14 3:13 ` Timothy Pearson
2023-12-14 4:45 ` Michael Ellerman
1 sibling, 0 replies; 33+ messages in thread
From: Timothy Pearson @ 2023-12-14 3:13 UTC (permalink / raw)
To: Samuel Holland
Cc: Michael Ellerman, linux-kernel, amd-gfx, linux-arch,
linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, Timothy Pearson
----- Original Message -----
> From: "Samuel Holland" <samuel.holland@sifive.com>
> To: "Michael Ellerman" <mpe@ellerman.id.au>
> Cc: "linux-kernel" <linux-kernel@vger.kernel.org>, "amd-gfx" <amd-gfx@lists.freedesktop.org>, "linux-arch"
> <linux-arch@vger.kernel.org>, "linux-arm-kernel" <linux-arm-kernel@lists.infradead.org>, loongarch@lists.linux.dev,
> "linuxppc-dev" <linuxppc-dev@lists.ozlabs.org>, "x86" <x86@kernel.org>, linux-riscv@lists.infradead.org, "Christoph
> Hellwig" <hch@infradead.org>, "Timothy Pearson" <tpearson@raptorengineering.com>
> Sent: Wednesday, December 13, 2023 7:03:20 PM
> Subject: Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT
> On 2023-12-11 6:23 AM, Michael Ellerman wrote:
>> Hi Samuel,
>>
>> Thanks for trying to clean all this up.
>>
>> One problem below.
>>
>> Samuel Holland <samuel.holland@sifive.com> writes:
>>> Now that all previously-supported architectures select
>>> ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
>>> of the existing list of architectures. It can also take advantage of the
>>> common kernel-mode FPU API and method of adjusting CFLAGS.
>>>
>>> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
>> ...
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> index 4ae4720535a5..b64f917174ca 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
>>> WARN_ON_ONCE(!in_task());
>>> preempt_disable();
>>> depth = __this_cpu_inc_return(fpu_recursion_depth);
>>> -
>>> if (depth == 1) {
>>> -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
>>> + BUG_ON(!kernel_fpu_available());
>>> kernel_fpu_begin();
>>> -#elif defined(CONFIG_PPC64)
>>> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
>>> - enable_kernel_vsx();
>>> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
>>> - enable_kernel_altivec();
>>
>> Note altivec.
>>
>>> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
>>> - enable_kernel_fp();
>>> -#elif defined(CONFIG_ARM64)
>>> - kernel_neon_begin();
>>> -#endif
>>> }
>>>
>>> TRACE_DCN_FPU(true, function_name, line, depth);
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> index ea7d60f9a9b4..5aad0f572ba3 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> @@ -25,40 +25,8 @@
>>> # It provides the general basic services required by other DAL
>>> # subcomponents.
>>>
>>> -ifdef CONFIG_X86
>>> -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
>>> -dml_ccflags := $(dml_ccflags-y) -msse
>>> -endif
>>> -
>>> -ifdef CONFIG_PPC64
>>> -dml_ccflags := -mhard-float -maltivec
>>> -endif
>>
>> And altivec is enabled in the flags there.
>>
>> That doesn't match your implementation for powerpc in patch 7, which
>> only deals with float.
>>
>> I suspect the AMD driver actually doesn't need altivec enabled, but I
>> don't know that for sure. It compiles without it, but I don't have a GPU
>> to actually test. I've added Timothy on Cc who added the support for
>> powerpc to the driver originally, hopefully he has a test system.
If you would like me to test I'm happy to do so, but I am travelling until Friday so would need to wait until then.
Thanks!
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT
2023-12-14 1:03 ` Samuel Holland
2023-12-14 3:13 ` Timothy Pearson
@ 2023-12-14 4:45 ` Michael Ellerman
1 sibling, 0 replies; 33+ messages in thread
From: Michael Ellerman @ 2023-12-14 4:45 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-kernel, amd-gfx, linux-arch, linux-arm-kernel, loongarch,
linuxppc-dev, x86, linux-riscv, Christoph Hellwig,
Timothy Pearson
Samuel Holland <samuel.holland@sifive.com> writes:
> On 2023-12-11 6:23 AM, Michael Ellerman wrote:
>> Hi Samuel,
>>
>> Thanks for trying to clean all this up.
>>
>> One problem below.
>>
>> Samuel Holland <samuel.holland@sifive.com> writes:
>>> Now that all previously-supported architectures select
>>> ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead
>>> of the existing list of architectures. It can also take advantage of the
>>> common kernel-mode FPU API and method of adjusting CFLAGS.
>>>
>>> Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
>> ...
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> index 4ae4720535a5..b64f917174ca 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>>> @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
>>> WARN_ON_ONCE(!in_task());
>>> preempt_disable();
>>> depth = __this_cpu_inc_return(fpu_recursion_depth);
>>> -
>>> if (depth == 1) {
>>> -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
>>> + BUG_ON(!kernel_fpu_available());
>>> kernel_fpu_begin();
>>> -#elif defined(CONFIG_PPC64)
>>> - if (cpu_has_feature(CPU_FTR_VSX_COMP))
>>> - enable_kernel_vsx();
>>> - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
>>> - enable_kernel_altivec();
>>
>> Note altivec.
>>
>>> - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
>>> - enable_kernel_fp();
>>> -#elif defined(CONFIG_ARM64)
>>> - kernel_neon_begin();
>>> -#endif
>>> }
>>>
>>> TRACE_DCN_FPU(true, function_name, line, depth);
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> index ea7d60f9a9b4..5aad0f572ba3 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>>> @@ -25,40 +25,8 @@
>>> # It provides the general basic services required by other DAL
>>> # subcomponents.
>>>
>>> -ifdef CONFIG_X86
>>> -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
>>> -dml_ccflags := $(dml_ccflags-y) -msse
>>> -endif
>>> -
>>> -ifdef CONFIG_PPC64
>>> -dml_ccflags := -mhard-float -maltivec
>>> -endif
>>
>> And altivec is enabled in the flags there.
>>
>> That doesn't match your implementation for powerpc in patch 7, which
>> only deals with float.
>>
>> I suspect the AMD driver actually doesn't need altivec enabled, but I
>> don't know that for sure. It compiles without it, but I don't have a GPU
>> to actually test. I've added Timothy on Cc who added the support for
>> powerpc to the driver originally, hopefully he has a test system.
>
> I tested this series on a POWER9 system with an AMD Radeon RX 6400 GPU (which
> requires this FPU code to initialize), and got functioning graphics output.
Awesome.
>> Anyway if that's true that it doesn't need altivec we should probably do
>> a lead-up patch that drops altivec from the AMD driver explicitly, eg.
>> as below.
>
> That makes sense to me. Do you want to provide your Signed-off-by so I can send
> this patch with your authorship?
Yeah that'd be great. Patch below. Feel free to adjust the commit
message as you see fit.
cheers
From c8a2862d2ebe76a023eceb3267fd85262925c0ba Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 14 Dec 2023 15:39:05 +1100
Subject: [PATCH] drm/amd/display: Only use hard-float, not altivec on powerpc
The compiler flags enable altivec, but that is not required, hard-float
is sufficient for the code to build and function.
Drop altivec from the compiler flags and adjust the enable/disable code
to only enable FPU use.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 12 ++----------
drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 +-
drivers/gpu/drm/amd/display/dc/dml2/Makefile | 2 +-
3 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 4ae4720535a5..0de16796466b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -92,11 +92,7 @@ void dc_fpu_begin(const char *function_name, const int line)
#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_begin();
#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- enable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- enable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+ if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
enable_kernel_fp();
#elif defined(CONFIG_ARM64)
kernel_neon_begin();
@@ -125,11 +121,7 @@ void dc_fpu_end(const char *function_name, const int line)
#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_end();
#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- disable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- disable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+ if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
disable_kernel_fp();
#elif defined(CONFIG_ARM64)
kernel_neon_end();
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 6042a5a6a44f..554c39024a40 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -31,7 +31,7 @@ dml_ccflags := $(dml_ccflags-y) -msse
endif
ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
+dml_ccflags := -mhard-float
endif
ifdef CONFIG_ARM64
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index acff3449b8d7..7b51364084b5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -30,7 +30,7 @@ dml2_ccflags := $(dml2_ccflags-y) -msse
endif
ifdef CONFIG_PPC64
-dml2_ccflags := -mhard-float -maltivec
+dml2_ccflags := -mhard-float
endif
ifdef CONFIG_ARM64
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [RFC PATCH 11/12] selftests/fpu: Move FP code to a separate translation unit
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (9 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 10/12] drm/amd/display: Use ARCH_HAS_KERNEL_FPU_SUPPORT Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:18 ` Christoph Hellwig
2023-12-08 5:54 ` [RFC PATCH 12/12] selftests/fpu: Allow building on other architectures Samuel Holland
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
This ensures no compiler-generated floating-point code can appear
outside kernel_fpu_{begin,end}() sections, and some architectures
enforce this separation.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
lib/Makefile | 3 ++-
lib/{test_fpu.c => test_fpu_glue.c} | 32 +-------------------------
lib/test_fpu_impl.c | 35 +++++++++++++++++++++++++++++
3 files changed, 38 insertions(+), 32 deletions(-)
rename lib/{test_fpu.c => test_fpu_glue.c} (71%)
create mode 100644 lib/test_fpu_impl.c
diff --git a/lib/Makefile b/lib/Makefile
index 6b09731d8e61..e7cbd54944a2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -132,7 +132,8 @@ FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-st
endif
obj-$(CONFIG_TEST_FPU) += test_fpu.o
-CFLAGS_test_fpu.o += $(FPU_CFLAGS)
+test_fpu-y := test_fpu_glue.o test_fpu_impl.o
+CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS)
obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
diff --git a/lib/test_fpu.c b/lib/test_fpu_glue.c
similarity index 71%
rename from lib/test_fpu.c
rename to lib/test_fpu_glue.c
index e82db19fed84..2761b51117b0 100644
--- a/lib/test_fpu.c
+++ b/lib/test_fpu_glue.c
@@ -19,37 +19,7 @@
#include <linux/debugfs.h>
#include <asm/fpu/api.h>
-static int test_fpu(void)
-{
- /*
- * This sequence of operations tests that rounding mode is
- * to nearest and that denormal numbers are supported.
- * Volatile variables are used to avoid compiler optimizing
- * the calculations away.
- */
- volatile double a, b, c, d, e, f, g;
-
- a = 4.0;
- b = 1e-15;
- c = 1e-310;
-
- /* Sets precision flag */
- d = a + b;
-
- /* Result depends on rounding mode */
- e = a + b / 2;
-
- /* Denormal and very large values */
- f = b / c;
-
- /* Depends on denormal support */
- g = a + c * f;
-
- if (d > a && e > a && g > a)
- return 0;
- else
- return -EINVAL;
-}
+int test_fpu(void);
static int test_fpu_get(void *data, u64 *val)
{
diff --git a/lib/test_fpu_impl.c b/lib/test_fpu_impl.c
new file mode 100644
index 000000000000..2ff01980bc22
--- /dev/null
+++ b/lib/test_fpu_impl.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/errno.h>
+
+int test_fpu(void)
+{
+ /*
+ * This sequence of operations tests that rounding mode is
+ * to nearest and that denormal numbers are supported.
+ * Volatile variables are used to avoid compiler optimizing
+ * the calculations away.
+ */
+ volatile double a, b, c, d, e, f, g;
+
+ a = 4.0;
+ b = 1e-15;
+ c = 1e-310;
+
+ /* Sets precision flag */
+ d = a + b;
+
+ /* Result depends on rounding mode */
+ e = a + b / 2;
+
+ /* Denormal and very large values */
+ f = b / c;
+
+ /* Depends on denormal support */
+ g = a + c * f;
+
+ if (d > a && e > a && g > a)
+ return 0;
+ else
+ return -EINVAL;
+}
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [RFC PATCH 11/12] selftests/fpu: Move FP code to a separate translation unit
2023-12-08 5:54 ` [RFC PATCH 11/12] selftests/fpu: Move FP code to a separate translation unit Samuel Holland
@ 2023-12-11 16:18 ` Christoph Hellwig
0 siblings, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2023-12-11 16:18 UTC (permalink / raw)
To: Samuel Holland
Cc: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig, linux-kernel, amd-gfx, linux-arch,
Masahiro Yamada, Nathan Chancellor, linux-kbuild
> obj-$(CONFIG_TEST_FPU) += test_fpu.o
> -CFLAGS_test_fpu.o += $(FPU_CFLAGS)
> +test_fpu-y := test_fpu_glue.o test_fpu_impl.o
> +CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS)
Btw, I really wonder if having a
modname-fpu += foo.o
syntax in kbuild wouldn't be preferable to this. Of coure that requires
someone who understands kbuild inside out.
> +int test_fpu(void);
This needs to go into a header.
And I think I underatand your way to enforce the use of a separate
compilation unit in the riscv patch now.
Can we just make that generic, e.g. have a <linux/fpu.h> that wraps
<asm/fpu.h> that does the guard based on a
-D_LINUX_FPU_COMPILATION_UNIT=1 on the command line so that all the
code becomes fully portable? Any legacy arch specific fpu users not
using <linux/fpu.h> would not be affected by it, although it would be
great to eventually migrate them to the common scheme.
^ permalink raw reply [flat|nested] 33+ messages in thread
* [RFC PATCH 12/12] selftests/fpu: Allow building on other architectures
2023-12-08 5:54 [RFC PATCH 00/12] Unified cross-architecture kernel-mode FPU API Samuel Holland
` (10 preceding siblings ...)
2023-12-08 5:54 ` [RFC PATCH 11/12] selftests/fpu: Move FP code to a separate translation unit Samuel Holland
@ 2023-12-08 5:54 ` Samuel Holland
2023-12-11 16:19 ` Christoph Hellwig
11 siblings, 1 reply; 33+ messages in thread
From: Samuel Holland @ 2023-12-08 5:54 UTC (permalink / raw)
To: linux-arm-kernel, loongarch, linuxppc-dev, x86, linux-riscv,
Christoph Hellwig
Cc: linux-kernel, amd-gfx, linux-arch, Samuel Holland
Now that ARCH_HAS_KERNEL_FPU_SUPPORT provides a common way to compile
and run floating-point code, this test is no longer x86-specific.
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
---
lib/Kconfig.debug | 2 +-
lib/Makefile | 25 ++-----------------------
lib/test_fpu_glue.c | 5 ++++-
3 files changed, 7 insertions(+), 25 deletions(-)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cc7d53d9dc01..bbab0b054e09 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2933,7 +2933,7 @@ config TEST_FREE_PAGES
config TEST_FPU
tristate "Test floating point operations in kernel space"
- depends on X86 && !KCOV_INSTRUMENT_ALL
+ depends on ARCH_HAS_KERNEL_FPU_SUPPORT && !KCOV_INSTRUMENT_ALL
help
Enable this option to add /sys/kernel/debug/selftest_helpers/test_fpu
which will trigger a sequence of floating point operations. This is used
diff --git a/lib/Makefile b/lib/Makefile
index e7cbd54944a2..b9f28558c9bd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -109,31 +109,10 @@ CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)
obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o
obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o
-#
-# CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns
-# off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS
-# get appended last to CFLAGS and thus override those previous compiler options.
-#
-FPU_CFLAGS := -msse -msse2
-ifdef CONFIG_CC_IS_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
-#
-# The "-msse" in the first argument is there so that the
-# -mpreferred-stack-boundary=3 build error:
-#
-# -mpreferred-stack-boundary=3 is not between 4 and 12
-#
-# can be triggered. Otherwise gcc doesn't complain.
-FPU_CFLAGS += -mhard-float
-FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
-endif
-
obj-$(CONFIG_TEST_FPU) += test_fpu.o
test_fpu-y := test_fpu_glue.o test_fpu_impl.o
-CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS)
+CFLAGS_test_fpu_impl.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_test_fpu_impl.o += $(CC_FLAGS_NO_FPU)
obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
diff --git a/lib/test_fpu_glue.c b/lib/test_fpu_glue.c
index 2761b51117b0..2e0b4027a5e3 100644
--- a/lib/test_fpu_glue.c
+++ b/lib/test_fpu_glue.c
@@ -17,7 +17,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/debugfs.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
int test_fpu(void);
@@ -38,6 +38,9 @@ static struct dentry *selftest_dir;
static int __init test_fpu_init(void)
{
+ if (!kernel_fpu_available())
+ return -EINVAL;
+
selftest_dir = debugfs_create_dir("selftest_helpers", NULL);
if (!selftest_dir)
return -ENOMEM;
--
2.42.0
^ permalink raw reply related [flat|nested] 33+ messages in thread