* [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add, sub} under certain modes
@ 2025-05-07 9:14 WANG Rui
2025-05-07 15:04 ` [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add,sub} " Richard Henderson
0 siblings, 1 reply; 4+ messages in thread
From: WANG Rui @ 2025-05-07 9:14 UTC (permalink / raw)
To: Song Gao, Richard Henderson
Cc: Philippe Mathieu-Daudé, qemu-devel, qemu, WANG Rui,
mengqinggang
This patch fixes incorrect results for `[xv]fnm{add,sub}.{s,d}`
instructions when rounding toward {zero, positive, negative}.
According to the LoongArch ISA specification, the result of an
instruction like `FNMSUB.D` is computed as:
FR[fd] = -FP64_fusedMultiplyAdd(FR[fj], FR[fk], -FR[fa])
Here, `FP64_fusedMultiplyAdd()` performs a fused multiply-add operation
compliant with IEEE 754-2008. The negation is applied to the fully
rounded result of the fused operation - not to any intermediate value.
This behavior is specifiec to LoongArch and differs from other arches,
which is why the existing `float_muladd_negate_result` flag does not
model it correctly.
To address this, I introduce a new flag `float_muladd_negate_rounded_result`,
which applies the negation after rounding. This ensures that rounding
decisions based on the sign of the result are handled correctly.
Reported-by: mengqinggang <mengqinggang@loongson.cn>
Signed-off-by: WANG Rui <wangrui@loongson.cn>
---
v1 -> v2:
- Introduce `float_muladd_negate_rounded_result`
---
fpu/softfloat.c | 42 ++++++++++++++++---
include/fpu/softfloat.h | 3 +-
.../tcg/insn_trans/trans_farith.c.inc | 10 +++--
target/loongarch/tcg/vec_helper.c | 8 ++--
tests/tcg/loongarch64/Makefile.target | 2 +
tests/tcg/loongarch64/test_fnmsub.c | 25 +++++++++++
tests/tcg/loongarch64/test_vfnmsub.c | 27 ++++++++++++
7 files changed, 102 insertions(+), 15 deletions(-)
create mode 100644 tests/tcg/loongarch64/test_fnmsub.c
create mode 100644 tests/tcg/loongarch64/test_vfnmsub.c
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 34c962d6bd..2691e89a03 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2234,13 +2234,18 @@ float16_muladd_scalbn(float16 a, float16 b, float16 c,
int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
+ float16 r;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
float16_unpack_canonical(&pc, c, status);
pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
- return float16_round_pack_canonical(pr, status);
+ r = float16_round_pack_canonical(pr, status);
+ if (flags & float_muladd_negate_rounded_result) {
+ r = float16_chs(r);
+ }
+ return r;
}
float16 float16_muladd(float16 a, float16 b, float16 c,
@@ -2254,13 +2259,18 @@ float32_muladd_scalbn(float32 a, float32 b, float32 c,
int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
+ float32 r;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
float32_unpack_canonical(&pc, c, status);
pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
- return float32_round_pack_canonical(pr, status);
+ r = float32_round_pack_canonical(pr, status);
+ if (flags & float_muladd_negate_rounded_result) {
+ r = float32_chs(r);
+ }
+ return r;
}
float64 QEMU_SOFTFLOAT_ATTR
@@ -2268,13 +2278,18 @@ float64_muladd_scalbn(float64 a, float64 b, float64 c,
int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
+ float64 r;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
float64_unpack_canonical(&pc, c, status);
pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
- return float64_round_pack_canonical(pr, status);
+ r = float64_round_pack_canonical(pr, status);
+ if (flags & float_muladd_negate_rounded_result) {
+ r = float64_chs(r);
+ }
+ return r;
}
static bool force_soft_fma;
@@ -2422,39 +2437,54 @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
+ float64 r;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
float64_unpack_canonical(&pc, c, status);
pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
- return float64r32_round_pack_canonical(pr, status);
+ r = float64r32_round_pack_canonical(pr, status);
+ if (flags & float_muladd_negate_rounded_result) {
+ r = float64_chs(r);
+ }
+ return r;
}
bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
+ bfloat16 r;
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
bfloat16_unpack_canonical(&pc, c, status);
pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
- return bfloat16_round_pack_canonical(pr, status);
+ r = bfloat16_round_pack_canonical(pr, status);
+ if (flags & float_muladd_negate_rounded_result) {
+ r = bfloat16_chs(r);
+ }
+ return r;
}
float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
int flags, float_status *status)
{
FloatParts128 pa, pb, pc, *pr;
+ float128 r;
float128_unpack_canonical(&pa, a, status);
float128_unpack_canonical(&pb, b, status);
float128_unpack_canonical(&pc, c, status);
pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
- return float128_round_pack_canonical(pr, status);
+ r = float128_round_pack_canonical(pr, status);
+ if (flags & float_muladd_negate_rounded_result) {
+ r = float128_chs(r);
+ }
+ return r;
}
/*
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index c18ab2cb60..db7ea2c916 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -129,7 +129,8 @@ enum {
float_muladd_negate_c = 1,
float_muladd_negate_product = 2,
float_muladd_negate_result = 4,
- float_muladd_suppress_add_product_zero = 8,
+ float_muladd_negate_rounded_result = 8,
+ float_muladd_suppress_add_product_zero = 16,
};
/*----------------------------------------------------------------------------
diff --git a/target/loongarch/tcg/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc
index f4a0dea727..68d149647e 100644
--- a/target/loongarch/tcg/insn_trans/trans_farith.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_farith.c.inc
@@ -199,9 +199,11 @@ TRANS(fmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, 0)
TRANS(fmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, 0)
TRANS(fmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_c)
TRANS(fmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_c)
-TRANS(fnmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_result)
-TRANS(fnmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_result)
+TRANS(fnmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s,
+ float_muladd_negate_rounded_result)
+TRANS(fnmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d,
+ float_muladd_negate_rounded_result)
TRANS(fnmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s,
- float_muladd_negate_c | float_muladd_negate_result)
+ float_muladd_negate_c | float_muladd_negate_rounded_result)
TRANS(fnmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d,
- float_muladd_negate_c | float_muladd_negate_result)
+ float_muladd_negate_c | float_muladd_negate_rounded_result)
diff --git a/target/loongarch/tcg/vec_helper.c b/target/loongarch/tcg/vec_helper.c
index 3faf52cbc4..d20f887afa 100644
--- a/target/loongarch/tcg/vec_helper.c
+++ b/target/loongarch/tcg/vec_helper.c
@@ -2458,12 +2458,12 @@ DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0)
DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0)
DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c)
DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c)
-DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result)
-DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result)
+DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_rounded_result)
+DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_rounded_result)
DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
- float_muladd_negate_c | float_muladd_negate_result)
+ float_muladd_negate_c | float_muladd_negate_rounded_result)
DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
- float_muladd_negate_c | float_muladd_negate_result)
+ float_muladd_negate_c | float_muladd_negate_rounded_result)
#define DO_2OP_F(NAME, BIT, E, FN) \
void HELPER(NAME)(void *vd, void *vj, \
diff --git a/tests/tcg/loongarch64/Makefile.target b/tests/tcg/loongarch64/Makefile.target
index 00030a1026..e3554a500e 100644
--- a/tests/tcg/loongarch64/Makefile.target
+++ b/tests/tcg/loongarch64/Makefile.target
@@ -16,5 +16,7 @@ LOONGARCH64_TESTS += test_fclass
LOONGARCH64_TESTS += test_fpcom
LOONGARCH64_TESTS += test_pcadd
LOONGARCH64_TESTS += test_fcsr
+LOONGARCH64_TESTS += test_fnmsub
+LOONGARCH64_TESTS += test_vfnmsub
TESTS += $(LOONGARCH64_TESTS)
diff --git a/tests/tcg/loongarch64/test_fnmsub.c b/tests/tcg/loongarch64/test_fnmsub.c
new file mode 100644
index 0000000000..47fef92cb7
--- /dev/null
+++ b/tests/tcg/loongarch64/test_fnmsub.c
@@ -0,0 +1,25 @@
+#include <assert.h>
+#include <stdint.h>
+#include <fenv.h>
+
+int main()
+{
+ double x, y, z;
+ union {
+ uint64_t i;
+ double d;
+ } u;
+
+ x = 0x1.0p256;
+ y = 0x1.0p256;
+ z = 0x1.0p-256;
+
+ fesetround(FE_DOWNWARD);
+ asm("fnmsub.d %[x], %[x], %[y], %[z]\n\t"
+ :[x]"+f"(x)
+ :[y]"f"(y), [z]"f"(z));
+
+ u.d = x;
+ assert(u.i == 0xdfefffffffffffffUL);
+ return 0;
+}
diff --git a/tests/tcg/loongarch64/test_vfnmsub.c b/tests/tcg/loongarch64/test_vfnmsub.c
new file mode 100644
index 0000000000..8c332674ae
--- /dev/null
+++ b/tests/tcg/loongarch64/test_vfnmsub.c
@@ -0,0 +1,27 @@
+#include <assert.h>
+#include <stdint.h>
+#include <fenv.h>
+
+int main()
+{
+ uint64_t x, y, z;
+
+ x = 0x4ff0000000000000UL;
+ y = 0x4ff0000000000000UL;
+ z = 0x2ff0000000000000UL;
+
+ fesetround(FE_DOWNWARD);
+ asm("vreplgr2vr.d $vr0, %[x]\n\t"
+ "vreplgr2vr.d $vr1, %[y]\n\t"
+ "vreplgr2vr.d $vr2, %[z]\n\t"
+ "vfnmsub.d $vr0, $vr0, $vr1, $vr2\n\t"
+ "vpickve2gr.d %[x], $vr0, 0\n\t"
+ "vpickve2gr.d %[y], $vr0, 1\n\t"
+ :[x]"+&r"(x), [y]"+&r"(y)
+ :[z]"r"(z)
+ :"$f0", "$f1", "$f2");
+
+ assert(x == 0xdfefffffffffffffUL);
+ assert(y == 0xdfefffffffffffffUL);
+ return 0;
+}
--
2.49.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add,sub} under certain modes
2025-05-07 9:14 [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add, sub} under certain modes WANG Rui
@ 2025-05-07 15:04 ` Richard Henderson
2025-05-07 15:25 ` WANG Rui
0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2025-05-07 15:04 UTC (permalink / raw)
To: WANG Rui, Song Gao
Cc: Philippe Mathieu-Daudé, qemu-devel, qemu, mengqinggang
On 5/7/25 02:14, WANG Rui wrote:
> This patch fixes incorrect results for `[xv]fnm{add,sub}.{s,d}`
> instructions when rounding toward {zero, positive, negative}.
>
> According to the LoongArch ISA specification, the result of an
> instruction like `FNMSUB.D` is computed as:
>
> FR[fd] = -FP64_fusedMultiplyAdd(FR[fj], FR[fk], -FR[fa])
>
> Here, `FP64_fusedMultiplyAdd()` performs a fused multiply-add operation
> compliant with IEEE 754-2008. The negation is applied to the fully
> rounded result of the fused operation - not to any intermediate value.
> This behavior is specifiec to LoongArch and differs from other arches,
> which is why the existing `float_muladd_negate_result` flag does not
> model it correctly.
Loongarch does not differ from other arches; we got it wrong for everyone.
There's no need for a new flag.
r~
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add,sub} under certain modes
2025-05-07 15:04 ` [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add,sub} " Richard Henderson
@ 2025-05-07 15:25 ` WANG Rui
2025-05-07 16:16 ` Richard Henderson
0 siblings, 1 reply; 4+ messages in thread
From: WANG Rui @ 2025-05-07 15:25 UTC (permalink / raw)
To: Richard Henderson
Cc: Song Gao, Philippe Mathieu-Daudé, qemu-devel, qemu,
mengqinggang
Hi Richard,
On Wed, May 7, 2025 at 11:04 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 5/7/25 02:14, WANG Rui wrote:
> > This patch fixes incorrect results for `[xv]fnm{add,sub}.{s,d}`
> > instructions when rounding toward {zero, positive, negative}.
> >
> > According to the LoongArch ISA specification, the result of an
> > instruction like `FNMSUB.D` is computed as:
> >
> > FR[fd] = -FP64_fusedMultiplyAdd(FR[fj], FR[fk], -FR[fa])
> >
> > Here, `FP64_fusedMultiplyAdd()` performs a fused multiply-add operation
> > compliant with IEEE 754-2008. The negation is applied to the fully
> > rounded result of the fused operation - not to any intermediate value.
> > This behavior is specifiec to LoongArch and differs from other arches,
> > which is why the existing `float_muladd_negate_result` flag does not
> > model it correctly.
>
> Loongarch does not differ from other arches; we got it wrong for everyone.
> There's no need for a new flag.
I'm not familiar with PowerPC and s390x. The official PowerPC docs[^1]
doesn't clearly specify the order of negation and rounding operations.
I also don't have access to the hardware to run experiments. However,
I did find the Linux kernel's emulation code[^2][^3] for the PowerPC
`fnmsub` instruction, which seems to suggest that negation occurs
before rounding - though it's possible that interpretation is
incorrect.
[^1]: https://www.ibm.com/docs/en/aix/7.2?topic=set-fnmsub-fnms-floating-negative-multiply-subtract-instruction
[^2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/math-emu/fnmsub.c?h=v6.15-rc5#n49
[^3}: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/math-emu/fnmsub.c?h=v6.15-rc5#n55
Regards,
-hev
>
>
> r~
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add,sub} under certain modes
2025-05-07 15:25 ` WANG Rui
@ 2025-05-07 16:16 ` Richard Henderson
0 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2025-05-07 16:16 UTC (permalink / raw)
To: WANG Rui
Cc: Song Gao, Philippe Mathieu-Daudé, qemu-devel, qemu,
mengqinggang
On 5/7/25 08:25, WANG Rui wrote:
> Hi Richard,
>
> On Wed, May 7, 2025 at 11:04 PM Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> On 5/7/25 02:14, WANG Rui wrote:
>>> This patch fixes incorrect results for `[xv]fnm{add,sub}.{s,d}`
>>> instructions when rounding toward {zero, positive, negative}.
>>>
>>> According to the LoongArch ISA specification, the result of an
>>> instruction like `FNMSUB.D` is computed as:
>>>
>>> FR[fd] = -FP64_fusedMultiplyAdd(FR[fj], FR[fk], -FR[fa])
>>>
>>> Here, `FP64_fusedMultiplyAdd()` performs a fused multiply-add operation
>>> compliant with IEEE 754-2008. The negation is applied to the fully
>>> rounded result of the fused operation - not to any intermediate value.
>>> This behavior is specifiec to LoongArch and differs from other arches,
>>> which is why the existing `float_muladd_negate_result` flag does not
>>> model it correctly.
>>
>> Loongarch does not differ from other arches; we got it wrong for everyone.
>> There's no need for a new flag.
>
> I'm not familiar with PowerPC and s390x. The official PowerPC docs[^1]
> doesn't clearly specify the order of negation and rounding operations.
Certainly it does.
This instruction produces the same result as would be
obtained by using the Floating Multiply-Subtract
instruction and then negating the result, with the
following exceptions.
Round is done the same as FMSUB, *and then negating*.
So the negation must happen second.
> I also don't have access to the hardware to run experiments. However,
> I did find the Linux kernel's emulation code[^2][^3] for the PowerPC
> `fnmsub` instruction, which seems to suggest that negation occurs
> before rounding - though it's possible that interpretation is
> incorrect.
That code gets this case wrong, just like we did.
>
> [^1]: https://www.ibm.com/docs/en/aix/7.2?topic=set-fnmsub-fnms-floating-negative-multiply-subtract-instruction
> [^2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/math-emu/fnmsub.c?h=v6.15-rc5#n49
> [^3}: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/math-emu/fnmsub.c?h=v6.15-rc5#n55
>
> Regards,
> -hev
>
>>
>>
>> r~
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-05-07 16:16 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-07 9:14 [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add, sub} under certain modes WANG Rui
2025-05-07 15:04 ` [RFC PATCH v2] target/loongarch: Fix incorrect rounding in fnm{add,sub} " Richard Henderson
2025-05-07 15:25 ` WANG Rui
2025-05-07 16:16 ` Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).