* [Qemu-devel] [PATCH 02/14] target-mips: use the softfloat floatXX_muladd functions
2012-10-09 20:27 [Qemu-devel] [PATCH 00/14] target-mips: misc fixes and optimizations Aurelien Jarno
@ 2012-10-09 20:27 ` Aurelien Jarno
2012-10-10 19:58 ` Richard Henderson
0 siblings, 1 reply; 3+ messages in thread
From: Aurelien Jarno @ 2012-10-09 20:27 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Use the new softfloat floatXX_muladd() functions to implement the madd,
msub, nmadd and nmsub instructions. At the same time replace the name of
the helpers by the name of the instruction, as the only reason for the
previous names was to keep the macros simple.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/helper.h | 8 +--
target-mips/op_helper.c | 137 +++++++++++++++++------------------------------
target-mips/translate.c | 24 ++++-----
3 files changed, 64 insertions(+), 105 deletions(-)
diff --git a/target-mips/helper.h b/target-mips/helper.h
index f35ed78..740178f 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -254,10 +254,10 @@ FOP_PROTO(rsqrt2)
DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32) \
DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64) \
DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64)
-FOP_PROTO(muladd)
-FOP_PROTO(mulsub)
-FOP_PROTO(nmuladd)
-FOP_PROTO(nmulsub)
+FOP_PROTO(madd)
+FOP_PROTO(msub)
+FOP_PROTO(nmadd)
+FOP_PROTO(nmsub)
#undef FOP_PROTO
#define FOP_PROTO(op) \
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index ce5ddaf..9d6d54a 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -3016,95 +3016,54 @@ FLOAT_BINOP(mul)
FLOAT_BINOP(div)
#undef FLOAT_BINOP
-/* ternary operations */
-#define FLOAT_TERNOP(name1, name2) \
-uint64_t helper_float_ ## name1 ## name2 ## _d(CPUMIPSState *env, \
- uint64_t fdt0, \
- uint64_t fdt1, \
- uint64_t fdt2) \
-{ \
- fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status); \
- return float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status); \
-} \
- \
-uint32_t helper_float_ ## name1 ## name2 ## _s(CPUMIPSState *env, \
- uint32_t fst0, \
- uint32_t fst1, \
- uint32_t fst2) \
-{ \
- fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status); \
- return float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status); \
-} \
- \
-uint64_t helper_float_ ## name1 ## name2 ## _ps(CPUMIPSState *env, \
- uint64_t fdt0, \
- uint64_t fdt1, \
- uint64_t fdt2) \
-{ \
- uint32_t fst0 = fdt0 & 0XFFFFFFFF; \
- uint32_t fsth0 = fdt0 >> 32; \
- uint32_t fst1 = fdt1 & 0XFFFFFFFF; \
- uint32_t fsth1 = fdt1 >> 32; \
- uint32_t fst2 = fdt2 & 0XFFFFFFFF; \
- uint32_t fsth2 = fdt2 >> 32; \
- \
- fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status); \
- fsth0 = float32_ ## name1 (fsth0, fsth1, &env->active_fpu.fp_status); \
- fst2 = float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status); \
- fsth2 = float32_ ## name2 (fsth0, fsth2, &env->active_fpu.fp_status); \
- return ((uint64_t)fsth2 << 32) | fst2; \
-}
-
-FLOAT_TERNOP(mul, add)
-FLOAT_TERNOP(mul, sub)
-#undef FLOAT_TERNOP
-
-/* negated ternary operations */
-#define FLOAT_NTERNOP(name1, name2) \
-uint64_t helper_float_n ## name1 ## name2 ## _d(CPUMIPSState *env, \
- uint64_t fdt0, \
- uint64_t fdt1, \
- uint64_t fdt2) \
-{ \
- fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status); \
- fdt2 = float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status); \
- return float64_chs(fdt2); \
-} \
- \
-uint32_t helper_float_n ## name1 ## name2 ## _s(CPUMIPSState *env, \
- uint32_t fst0, \
- uint32_t fst1, \
- uint32_t fst2) \
-{ \
- fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status); \
- fst2 = float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status); \
- return float32_chs(fst2); \
-} \
- \
-uint64_t helper_float_n ## name1 ## name2 ## _ps(CPUMIPSState *env, \
- uint64_t fdt0, \
- uint64_t fdt1, \
- uint64_t fdt2) \
-{ \
- uint32_t fst0 = fdt0 & 0XFFFFFFFF; \
- uint32_t fsth0 = fdt0 >> 32; \
- uint32_t fst1 = fdt1 & 0XFFFFFFFF; \
- uint32_t fsth1 = fdt1 >> 32; \
- uint32_t fst2 = fdt2 & 0XFFFFFFFF; \
- uint32_t fsth2 = fdt2 >> 32; \
- \
- fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status); \
- fsth0 = float32_ ## name1 (fsth0, fsth1, &env->active_fpu.fp_status); \
- fst2 = float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status); \
- fsth2 = float32_ ## name2 (fsth0, fsth2, &env->active_fpu.fp_status); \
- fst2 = float32_chs(fst2); \
- fsth2 = float32_chs(fsth2); \
- return ((uint64_t)fsth2 << 32) | fst2; \
-}
-
-FLOAT_NTERNOP(mul, add)
-FLOAT_NTERNOP(mul, sub)
-#undef FLOAT_NTERNOP
+/* FMA based operations */
+#define FLOAT_FMA(name, type) \
+uint64_t helper_float_ ## name ## _d(CPUMIPSState *env, \
+ uint64_t fdt0, uint64_t fdt1, \
+ uint64_t fdt2) \
+{ \
+ set_float_exception_flags(0, &env->active_fpu.fp_status); \
+ fdt0 = float64_muladd(fdt0, fdt1, fdt2, type, \
+ &env->active_fpu.fp_status); \
+ update_fcr31(env); \
+ return fdt0; \
+} \
+ \
+uint32_t helper_float_ ## name ## _s(CPUMIPSState *env, \
+ uint32_t fst0, uint32_t fst1, \
+ uint32_t fst2) \
+{ \
+ set_float_exception_flags(0, &env->active_fpu.fp_status); \
+ fst0 = float32_muladd(fst0, fst1, fst2, type, \
+ &env->active_fpu.fp_status); \
+ update_fcr31(env); \
+ return fst0; \
+} \
+ \
+uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env, \
+ uint64_t fdt0, uint64_t fdt1, \
+ uint64_t fdt2) \
+{ \
+ uint32_t fst0 = fdt0 & 0XFFFFFFFF; \
+ uint32_t fsth0 = fdt0 >> 32; \
+ uint32_t fst1 = fdt1 & 0XFFFFFFFF; \
+ uint32_t fsth1 = fdt1 >> 32; \
+ uint32_t fst2 = fdt2 & 0XFFFFFFFF; \
+ uint32_t fsth2 = fdt2 >> 32; \
+ \
+ set_float_exception_flags(0, &env->active_fpu.fp_status); \
+ fst0 = float32_muladd(fst0, fst1, fst2, type, \
+ &env->active_fpu.fp_status); \
+ fsth0 = float32_muladd(fsth0, fsth1, fsth2, type, \
+ &env->active_fpu.fp_status); \
+ update_fcr31(env); \
+ return ((uint64_t)fsth0 << 32) | fst0; \
+}
+FLOAT_FMA(madd, 0)
+FLOAT_FMA(msub, float_muladd_negate_c)
+FLOAT_FMA(nmadd, float_muladd_negate_result)
+FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
+#undef FLOAT_FMA
/* MIPS specific binary operations */
uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index ed55e26..8183854 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -8288,7 +8288,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr32(fp0, fs);
gen_load_fpr32(fp1, ft);
gen_load_fpr32(fp2, fr);
- gen_helper_float_muladd_s(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i32(fp0);
tcg_temp_free_i32(fp1);
gen_store_fpr32(fp2, fd);
@@ -8307,7 +8307,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_muladd_d(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -8325,7 +8325,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_muladd_ps(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -8343,7 +8343,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr32(fp0, fs);
gen_load_fpr32(fp1, ft);
gen_load_fpr32(fp2, fr);
- gen_helper_float_mulsub_s(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i32(fp0);
tcg_temp_free_i32(fp1);
gen_store_fpr32(fp2, fd);
@@ -8362,7 +8362,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_mulsub_d(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -8380,7 +8380,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_mulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -8398,7 +8398,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr32(fp0, fs);
gen_load_fpr32(fp1, ft);
gen_load_fpr32(fp2, fr);
- gen_helper_float_nmuladd_s(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i32(fp0);
tcg_temp_free_i32(fp1);
gen_store_fpr32(fp2, fd);
@@ -8417,7 +8417,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_nmuladd_d(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -8435,7 +8435,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_nmuladd_ps(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -8453,7 +8453,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr32(fp0, fs);
gen_load_fpr32(fp1, ft);
gen_load_fpr32(fp2, fr);
- gen_helper_float_nmulsub_s(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i32(fp0);
tcg_temp_free_i32(fp1);
gen_store_fpr32(fp2, fd);
@@ -8472,7 +8472,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_nmulsub_d(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
@@ -8490,7 +8490,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
gen_load_fpr64(ctx, fp0, fs);
gen_load_fpr64(ctx, fp1, ft);
gen_load_fpr64(ctx, fp2, fr);
- gen_helper_float_nmulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+ gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
tcg_temp_free_i64(fp0);
tcg_temp_free_i64(fp1);
gen_store_fpr64(ctx, fp2, fd);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] [PATCH 02/14] target-mips: use the softfloat floatXX_muladd functions
2012-10-09 20:27 ` [Qemu-devel] [PATCH 02/14] target-mips: use the softfloat floatXX_muladd functions Aurelien Jarno
@ 2012-10-10 19:58 ` Richard Henderson
0 siblings, 0 replies; 3+ messages in thread
From: Richard Henderson @ 2012-10-10 19:58 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 10/09/2012 01:27 PM, Aurelien Jarno wrote:
> Use the new softfloat floatXX_muladd() functions to implement the madd,
> msub, nmadd and nmsub instructions. At the same time replace the name of
> the helpers by the name of the instruction, as the only reason for the
> previous names was to keep the macros simple.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Qemu-devel] [PATCH 02/14] target-mips: use the softfloat floatXX_muladd functions
@ 2013-01-11 13:23 Tom de Vries
0 siblings, 0 replies; 3+ messages in thread
From: Tom de Vries @ 2013-01-11 13:23 UTC (permalink / raw)
To: aurelien; +Cc: qemu-devel, Richard Henderson
Aurelien,
> @@ -8307,7 +8307,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t
> opc,
> gen_load_fpr64(ctx, fp0, fs);
> gen_load_fpr64(ctx, fp1, ft);
> gen_load_fpr64(ctx, fp2, fr);
> - gen_helper_float_muladd_d(fp2, cpu_env, fp0, fp1, fp2);
> + gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
> tcg_temp_free_i64(fp0);
> tcg_temp_free_i64(fp1);
> gen_store_fpr64(ctx, fp2, fd);
AFAIU:
- you're replacing here a non-fused mAC with a fused MAC and
- for all mips cores (except the r8000) the madd.d is non-fused.
So shouldn't we use a non-fused MAC here?
Thanks,
- Tom
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2013-01-11 13:24 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-11 13:23 [Qemu-devel] [PATCH 02/14] target-mips: use the softfloat floatXX_muladd functions Tom de Vries
-- strict thread matches above, loose matches on Subject: below --
2012-10-09 20:27 [Qemu-devel] [PATCH 00/14] target-mips: misc fixes and optimizations Aurelien Jarno
2012-10-09 20:27 ` [Qemu-devel] [PATCH 02/14] target-mips: use the softfloat floatXX_muladd functions Aurelien Jarno
2012-10-10 19:58 ` Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).