* [PATCH v2 1/9] qemu/int128: add int128_urshift
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-06 13:29 ` Richard Henderson
2022-04-05 19:55 ` [PATCH v2 2/9] target/ppc: Implemented vector divide instructions Lucas Mateus Castro(alqotel)
` (7 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: Peter Maydell, danielhb413, richard.henderson,
Philippe Mathieu-Daudé, Lucas Mateus Castro, clg,
Matheus Ferst
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Implement an unsigned right shift for Int128 values and add the same
tests cases of int128_rshift in the unit test.
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
include/qemu/int128.h | 19 +++++++++++++++++++
tests/unit/test-int128.c | 32 ++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 2c4064256c..3af01f38cd 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -83,6 +83,11 @@ static inline Int128 int128_rshift(Int128 a, int n)
return a >> n;
}
+static inline Int128 int128_urshift(Int128 a, int n)
+{
+ return (__uint128_t)a >> n;
+}
+
static inline Int128 int128_lshift(Int128 a, int n)
{
return a << n;
@@ -299,6 +304,20 @@ static inline Int128 int128_rshift(Int128 a, int n)
}
}
+static inline Int128 int128_urshift(Int128 a, int n)
+{
+ uint64_t h = a.hi;
+ if (!n) {
+ return a;
+ }
+ h = h >> (n & 63);
+ if (n >= 64) {
+ return int128_make64(h);
+ } else {
+ return int128_make128((a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h);
+ }
+}
+
static inline Int128 int128_lshift(Int128 a, int n)
{
uint64_t l = a.lo << (n & 63);
diff --git a/tests/unit/test-int128.c b/tests/unit/test-int128.c
index b86a3c76e6..ae0f552193 100644
--- a/tests/unit/test-int128.c
+++ b/tests/unit/test-int128.c
@@ -206,6 +206,37 @@ static void test_rshift(void)
test_rshift_one(0xFFFE8000U, 0, 0xFFFFFFFFFFFFFFFEULL, 0x8000000000000000ULL);
}
+static void __attribute__((__noinline__)) ATTRIBUTE_NOCLONE
+test_urshift_one(uint32_t x, int n, uint64_t h, uint64_t l)
+{
+ Int128 a = expand(x);
+ Int128 r = int128_urshift(a, n);
+ g_assert_cmpuint(int128_getlo(r), ==, l);
+ g_assert_cmpuint(int128_gethi(r), ==, h);
+}
+
+static void test_urshift(void)
+{
+ test_urshift_one(0x00010000U, 64, 0x0000000000000000ULL, 0x0000000000000001ULL);
+ test_urshift_one(0x80010000U, 64, 0x0000000000000000ULL, 0x8000000000000001ULL);
+ test_urshift_one(0x7FFE0000U, 64, 0x0000000000000000ULL, 0x7FFFFFFFFFFFFFFEULL);
+ test_urshift_one(0xFFFE0000U, 64, 0x0000000000000000ULL, 0xFFFFFFFFFFFFFFFEULL);
+ test_urshift_one(0x00010000U, 60, 0x0000000000000000ULL, 0x0000000000000010ULL);
+ test_urshift_one(0x80010000U, 60, 0x0000000000000008ULL, 0x0000000000000010ULL);
+ test_urshift_one(0x00018000U, 60, 0x0000000000000000ULL, 0x0000000000000018ULL);
+ test_urshift_one(0x80018000U, 60, 0x0000000000000008ULL, 0x0000000000000018ULL);
+ test_urshift_one(0x7FFE0000U, 60, 0x0000000000000007ULL, 0xFFFFFFFFFFFFFFE0ULL);
+ test_urshift_one(0xFFFE0000U, 60, 0x000000000000000FULL, 0xFFFFFFFFFFFFFFE0ULL);
+ test_urshift_one(0x7FFE8000U, 60, 0x0000000000000007ULL, 0xFFFFFFFFFFFFFFE8ULL);
+ test_urshift_one(0xFFFE8000U, 60, 0x000000000000000FULL, 0xFFFFFFFFFFFFFFE8ULL);
+ test_urshift_one(0x00018000U, 0, 0x0000000000000001ULL, 0x8000000000000000ULL);
+ test_urshift_one(0x80018000U, 0, 0x8000000000000001ULL, 0x8000000000000000ULL);
+ test_urshift_one(0x7FFE0000U, 0, 0x7FFFFFFFFFFFFFFEULL, 0x0000000000000000ULL);
+ test_urshift_one(0xFFFE0000U, 0, 0xFFFFFFFFFFFFFFFEULL, 0x0000000000000000ULL);
+ test_urshift_one(0x7FFE8000U, 0, 0x7FFFFFFFFFFFFFFEULL, 0x8000000000000000ULL);
+ test_urshift_one(0xFFFE8000U, 0, 0xFFFFFFFFFFFFFFFEULL, 0x8000000000000000ULL);
+}
+
int main(int argc, char **argv)
{
g_test_init(&argc, &argv, NULL);
@@ -219,5 +250,6 @@ int main(int argc, char **argv)
g_test_add_func("/int128/int128_ge", test_ge);
g_test_add_func("/int128/int128_gt", test_gt);
g_test_add_func("/int128/int128_rshift", test_rshift);
+ g_test_add_func("/int128/int128_urshift", test_urshift);
return g_test_run();
}
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 1/9] qemu/int128: add int128_urshift
2022-04-05 19:55 ` [PATCH v2 1/9] qemu/int128: add int128_urshift Lucas Mateus Castro(alqotel)
@ 2022-04-06 13:29 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-04-06 13:29 UTC (permalink / raw)
To: Lucas Mateus Castro(alqotel), qemu-devel, qemu-ppc
Cc: Peter Maydell, danielhb413, Matheus Ferst, clg,
Philippe Mathieu-Daudé
On 4/5/22 14:55, Lucas Mateus Castro(alqotel) wrote:
> From: Matheus Ferst<matheus.ferst@eldorado.org.br>
>
> Implement an unsigned right shift for Int128 values and add the same
> tests cases of int128_rshift in the unit test.
>
> Signed-off-by: Matheus Ferst<matheus.ferst@eldorado.org.br>
> Signed-off-by: Lucas Mateus Castro (alqotel)<lucas.araujo@eldorado.org.br>
> ---
> include/qemu/int128.h | 19 +++++++++++++++++++
> tests/unit/test-int128.c | 32 ++++++++++++++++++++++++++++++++
> 2 files changed, 51 insertions(+)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 2/9] target/ppc: Implemented vector divide instructions
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
2022-04-05 19:55 ` [PATCH v2 1/9] qemu/int128: add int128_urshift Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-12 1:51 ` Richard Henderson
2022-04-05 19:55 ` [PATCH v2 3/9] target/ppc: Implemented vector divide quadword Lucas Mateus Castro(alqotel)
` (6 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: danielhb413, richard.henderson, Greg Kurz,
Lucas Mateus Castro (alqotel), clg, David Gibson
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
vdivsw: Vector Divide Signed Word
vdivuw: Vector Divide Unsigned Word
vdivsd: Vector Divide Signed Doubleword
vdivud: Vector Divide Unsigned Doubleword
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/insn32.decode | 7 ++++
target/ppc/translate/vmx-impl.c.inc | 59 +++++++++++++++++++++++++++++
2 files changed, 66 insertions(+)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index ac2d3da9a7..597768558b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -703,3 +703,10 @@ XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb
&XL_s s:uint8_t
@XL_s ......-------------- s:1 .......... - &XL_s
RFEBB 010011-------------- . 0010010010 - @XL_s
+
+## Vector Division Instructions
+
+VDIVSW 000100 ..... ..... ..... 00110001011 @VX
+VDIVUW 000100 ..... ..... ..... 00010001011 @VX
+VDIVSD 000100 ..... ..... ..... 00111001011 @VX
+VDIVUD 000100 ..... ..... ..... 00011001011 @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 6101bca3fd..be35d6fdf3 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3236,6 +3236,65 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
+#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC) \
+static bool trans_##NAME(DisasContext *ctx, arg_VX *a) \
+{ \
+ static const GVecGen3 op = { \
+ .fni4 = FNI4_FUNC, \
+ .fni8 = FNI8_FUNC, \
+ .vece = VECE \
+ }; \
+ \
+ REQUIRE_VECTOR(ctx); \
+ REQUIRE_INSNS_FLAGS2(ctx, FLAGS); \
+ \
+ tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), \
+ avr_full_offset(a->vrb), 16, 16, &op); \
+ \
+ return true; \
+}
+
+#define DO_VDIV_VMOD(NAME, SZ, DIV, SIGNED) \
+static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ b) \
+{ \
+ /* \
+ * If N/0 the instruction used by the backend might deliver \
+ * an invalid division signal to the process, so if b = 0 return \
+ * N/1 and if signed instruction, the same for a = int_min, b = -1 \
+ */ \
+ if (SIGNED) { \
+ TCGv_i##SZ t0 = tcg_temp_new_i##SZ(); \
+ TCGv_i##SZ t1 = tcg_temp_new_i##SZ(); \
+ tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, INT##SZ##_MIN); \
+ tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, -1); \
+ tcg_gen_and_i##SZ(t0, t0, t1); \
+ tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 0); \
+ tcg_gen_or_i##SZ(t0, t0, t1); \
+ tcg_gen_movi_i##SZ(t1, 0); \
+ tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, b); \
+ DIV(t, a, b); \
+ tcg_temp_free_i##SZ(t0); \
+ tcg_temp_free_i##SZ(t1); \
+ } else { \
+ TCGv_i##SZ zero = tcg_constant_i##SZ(0); \
+ TCGv_i##SZ one = tcg_constant_i##SZ(1); \
+ tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, b); \
+ DIV(t, a, b); \
+ } \
+}
+
+DO_VDIV_VMOD(do_divsw, 32, tcg_gen_div_i32, true)
+DO_VDIV_VMOD(do_divuw, 32, tcg_gen_divu_i32, false)
+DO_VDIV_VMOD(do_divsd, 64, tcg_gen_div_i64, true)
+DO_VDIV_VMOD(do_divud, 64, tcg_gen_divu_i64, false)
+
+TRANS_VDIV_VMOD(ISA310, VDIVSW, MO_32, do_divsw, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVUW, MO_32, do_divuw, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVSD, MO_64, NULL, do_divsd)
+TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud)
+
+#undef DO_VDIV_VMOD
+
#undef GEN_VR_LDX
#undef GEN_VR_STX
#undef GEN_VR_LVE
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 2/9] target/ppc: Implemented vector divide instructions
2022-04-05 19:55 ` [PATCH v2 2/9] target/ppc: Implemented vector divide instructions Lucas Mateus Castro(alqotel)
@ 2022-04-12 1:51 ` Richard Henderson
2022-04-20 13:43 ` Lucas Mateus Martins Araujo e Castro
0 siblings, 1 reply; 19+ messages in thread
From: Richard Henderson @ 2022-04-12 1:51 UTC (permalink / raw)
To: Lucas Mateus Castro(alqotel), qemu-devel, qemu-ppc
Cc: Greg Kurz, danielhb413, clg, David Gibson
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
>
> Implement the following PowerISA v3.1 instructions:
> vdivsw: Vector Divide Signed Word
> vdivuw: Vector Divide Unsigned Word
> vdivsd: Vector Divide Signed Doubleword
> vdivud: Vector Divide Unsigned Doubleword
>
> Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
> ---
> target/ppc/insn32.decode | 7 ++++
> target/ppc/translate/vmx-impl.c.inc | 59 +++++++++++++++++++++++++++++
> 2 files changed, 66 insertions(+)
>
> diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
> index ac2d3da9a7..597768558b 100644
> --- a/target/ppc/insn32.decode
> +++ b/target/ppc/insn32.decode
> @@ -703,3 +703,10 @@ XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb
> &XL_s s:uint8_t
> @XL_s ......-------------- s:1 .......... - &XL_s
> RFEBB 010011-------------- . 0010010010 - @XL_s
> +
> +## Vector Division Instructions
> +
> +VDIVSW 000100 ..... ..... ..... 00110001011 @VX
> +VDIVUW 000100 ..... ..... ..... 00010001011 @VX
> +VDIVSD 000100 ..... ..... ..... 00111001011 @VX
> +VDIVUD 000100 ..... ..... ..... 00011001011 @VX
> diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
> index 6101bca3fd..be35d6fdf3 100644
> --- a/target/ppc/translate/vmx-impl.c.inc
> +++ b/target/ppc/translate/vmx-impl.c.inc
> @@ -3236,6 +3236,65 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
> TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
> TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
>
> +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC) \
> +static bool trans_##NAME(DisasContext *ctx, arg_VX *a) \
> +{ \
> + static const GVecGen3 op = { \
> + .fni4 = FNI4_FUNC, \
> + .fni8 = FNI8_FUNC, \
> + .vece = VECE \
> + }; \
> + \
> + REQUIRE_VECTOR(ctx); \
> + REQUIRE_INSNS_FLAGS2(ctx, FLAGS); \
> + \
> + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), \
> + avr_full_offset(a->vrb), 16, 16, &op); \
> + \
> + return true; \
> +}
Better to use a standalone helper and TRANS() -- the op structure doesn't *need* to be
static const.
> +
> +#define DO_VDIV_VMOD(NAME, SZ, DIV, SIGNED) \
> +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ b) \
> +{ \
> + /* \
> + * If N/0 the instruction used by the backend might deliver \
> + * an invalid division signal to the process, so if b = 0 return \
> + * N/1 and if signed instruction, the same for a = int_min, b = -1 \
> + */ \
> + if (SIGNED) { \
> + TCGv_i##SZ t0 = tcg_temp_new_i##SZ(); \
> + TCGv_i##SZ t1 = tcg_temp_new_i##SZ(); \
> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, INT##SZ##_MIN); \
> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, -1); \
> + tcg_gen_and_i##SZ(t0, t0, t1); \
> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 0); \
> + tcg_gen_or_i##SZ(t0, t0, t1); \
> + tcg_gen_movi_i##SZ(t1, 0); \
> + tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, b); \
> + DIV(t, a, b); \
> + tcg_temp_free_i##SZ(t0); \
> + tcg_temp_free_i##SZ(t1); \
> + } else { \
> + TCGv_i##SZ zero = tcg_constant_i##SZ(0); \
> + TCGv_i##SZ one = tcg_constant_i##SZ(1); \
> + tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, b); \
> + DIV(t, a, b); \
> + } \
> +}
This is overkill. Even if you keep some macros, passing in SIGNED and using it in the
outermost if is a sign you should split the macro in two.
However, only tcg_gen_div_i64 really requires the full signed treatment; tcg_gen_div_i32
can be better handled by extending to i64, because INT32_MIN / -1ULL does not trap.
I think this would be much easier to read as 4 separate functions.
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 2/9] target/ppc: Implemented vector divide instructions
2022-04-12 1:51 ` Richard Henderson
@ 2022-04-20 13:43 ` Lucas Mateus Martins Araujo e Castro
0 siblings, 0 replies; 19+ messages in thread
From: Lucas Mateus Martins Araujo e Castro @ 2022-04-20 13:43 UTC (permalink / raw)
To: Richard Henderson, qemu-devel, qemu-ppc
Cc: Greg Kurz, danielhb413, clg, David Gibson
[-- Attachment #1: Type: text/plain, Size: 3460 bytes --]
On 11/04/2022 22:51, Richard Henderson wrote:
>
> On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
>
>> +
>> +#define DO_VDIV_VMOD(NAME, SZ, DIV,
>> SIGNED) \
>> +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ
>> b) \
>> +{ \
>> + /* \
>> + * If N/0 the instruction used by the backend might
>> deliver \
>> + * an invalid division signal to the process, so if b = 0
>> return \
>> + * N/1 and if signed instruction, the same for a = int_min, b =
>> -1 \
>> + */ \
>> + if (SIGNED)
>> { \
>> + TCGv_i##SZ t0 =
>> tcg_temp_new_i##SZ(); \
>> + TCGv_i##SZ t1 =
>> tcg_temp_new_i##SZ(); \
>> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a,
>> INT##SZ##_MIN); \
>> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b,
>> -1); \
>> + tcg_gen_and_i##SZ(t0, t0,
>> t1); \
>> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b,
>> 0); \
>> + tcg_gen_or_i##SZ(t0, t0,
>> t1); \
>> + tcg_gen_movi_i##SZ(t1,
>> 0); \
>> + tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0,
>> b); \
>> + DIV(t, a,
>> b); \
>> + tcg_temp_free_i##SZ(t0); \
>> + tcg_temp_free_i##SZ(t1); \
>> + } else
>> { \
>> + TCGv_i##SZ zero =
>> tcg_constant_i##SZ(0); \
>> + TCGv_i##SZ one =
>> tcg_constant_i##SZ(1); \
>> + tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one,
>> b); \
>> + DIV(t, a,
>> b); \
>> + } \
>> +}
>
> This is overkill. Even if you keep some macros, passing in SIGNED and
> using it in the
> outermost if is a sign you should split the macro in two.
>
> However, only tcg_gen_div_i64 really requires the full signed
> treatment; tcg_gen_div_i32
> can be better handled by extending to i64, because INT32_MIN / -1ULL
> does not trap.
>
> I think this would be much easier to read as 4 separate functions.
>
>
Ok, I'll change it to 4 different macros, move clz128 to int128.h and
turn TRANS_VDIV_VMOD into do_vdiv_vmod function and call it with TRANS()
in v3
> r~
--
Lucas Mateus M. Araujo e Castro
Instituto de Pesquisas ELDORADO
<https://www.eldorado.org.br/?utm_campaign=assinatura_de_e-mail&utm_medium=email&utm_source=RD+Station>
Departamento Computação Embarcada
Analista de Software Trainee
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
[-- Attachment #2: Type: text/html, Size: 5658 bytes --]
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 3/9] target/ppc: Implemented vector divide quadword
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
2022-04-05 19:55 ` [PATCH v2 1/9] qemu/int128: add int128_urshift Lucas Mateus Castro(alqotel)
2022-04-05 19:55 ` [PATCH v2 2/9] target/ppc: Implemented vector divide instructions Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-12 1:52 ` Richard Henderson
2022-04-05 19:55 ` [PATCH v2 4/9] target/ppc: Implemented vector divide extended word Lucas Mateus Castro(alqotel)
` (5 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: danielhb413, richard.henderson, Greg Kurz,
Lucas Mateus Castro (alqotel), clg, David Gibson
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
vdivsq: Vector Divide Signed Quadword
vdivuq: Vector Divide Unsigned Quadword
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/helper.h | 2 ++
target/ppc/insn32.decode | 2 ++
target/ppc/int_helper.c | 21 +++++++++++++++++++++
target/ppc/translate/vmx-impl.c.inc | 2 ++
4 files changed, 27 insertions(+)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 57da11c77e..4cfdf7b3ec 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -171,6 +171,8 @@ DEF_HELPER_FLAGS_3(VMULOSW, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VMULOUB, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VDIVSQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VDIVUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_3(vslo, void, avr, avr, avr)
DEF_HELPER_3(vsro, void, avr, avr, avr)
DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 597768558b..3a88a0b5bc 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -710,3 +710,5 @@ VDIVSW 000100 ..... ..... ..... 00110001011 @VX
VDIVUW 000100 ..... ..... ..... 00010001011 @VX
VDIVSD 000100 ..... ..... ..... 00111001011 @VX
VDIVUD 000100 ..... ..... ..... 00011001011 @VX
+VDIVSQ 000100 ..... ..... ..... 00100001011 @VX
+VDIVUQ 000100 ..... ..... ..... 00000001011 @VX
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 492f34c499..ba5d4193ff 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1036,6 +1036,27 @@ void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
*t = tmp;
}
+void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ Int128 neg1 = int128_makes64(-1);
+ Int128 int128_min = int128_make128(0, INT64_MIN);
+ if (likely(int128_nz(b->s128) &&
+ (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
+ t->s128 = int128_divs(a->s128, b->s128);
+ } else {
+ t->s128 = a->s128; /* Undefined behavior */
+ }
+}
+
+void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ if (int128_nz(b->s128)) {
+ t->s128 = int128_divu(a->s128, b->s128);
+ } else {
+ t->s128 = a->s128; /* Undefined behavior */
+ }
+}
+
void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
ppc_avr_t result;
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index be35d6fdf3..bac0db7128 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3292,6 +3292,8 @@ TRANS_VDIV_VMOD(ISA310, VDIVSW, MO_32, do_divsw, NULL)
TRANS_VDIV_VMOD(ISA310, VDIVUW, MO_32, do_divuw, NULL)
TRANS_VDIV_VMOD(ISA310, VDIVSD, MO_64, NULL, do_divsd)
TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud)
+TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ)
+TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ)
#undef DO_VDIV_VMOD
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 3/9] target/ppc: Implemented vector divide quadword
2022-04-05 19:55 ` [PATCH v2 3/9] target/ppc: Implemented vector divide quadword Lucas Mateus Castro(alqotel)
@ 2022-04-12 1:52 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-04-12 1:52 UTC (permalink / raw)
To: Lucas Mateus Castro(alqotel), qemu-devel, qemu-ppc
Cc: Greg Kurz, danielhb413, clg, David Gibson
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)"<lucas.araujo@eldorado.org.br>
>
> Implement the following PowerISA v3.1 instructions:
> vdivsq: Vector Divide Signed Quadword
> vdivuq: Vector Divide Unsigned Quadword
>
> Signed-off-by: Lucas Mateus Castro (alqotel)<lucas.araujo@eldorado.org.br>
> ---
> target/ppc/helper.h | 2 ++
> target/ppc/insn32.decode | 2 ++
> target/ppc/int_helper.c | 21 +++++++++++++++++++++
> target/ppc/translate/vmx-impl.c.inc | 2 ++
> 4 files changed, 27 insertions(+)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 4/9] target/ppc: Implemented vector divide extended word
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
` (2 preceding siblings ...)
2022-04-05 19:55 ` [PATCH v2 3/9] target/ppc: Implemented vector divide quadword Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-12 1:56 ` Richard Henderson
2022-04-05 19:55 ` [PATCH v2 5/9] host-utils: Implemented unsigned 256-by-128 division Lucas Mateus Castro(alqotel)
` (4 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: danielhb413, richard.henderson, Greg Kurz,
Lucas Mateus Castro (alqotel), clg, David Gibson
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
vdivesw: Vector Divide Extended Signed Word
vdiveuw: Vector Divide Extended Unsigned Word
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/insn32.decode | 3 ++
target/ppc/translate/vmx-impl.c.inc | 48 +++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 3a88a0b5bc..8c115c9c60 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -712,3 +712,6 @@ VDIVSD 000100 ..... ..... ..... 00111001011 @VX
VDIVUD 000100 ..... ..... ..... 00011001011 @VX
VDIVSQ 000100 ..... ..... ..... 00100001011 @VX
VDIVUQ 000100 ..... ..... ..... 00000001011 @VX
+
+VDIVESW 000100 ..... ..... ..... 01110001011 @VX
+VDIVEUW 000100 ..... ..... ..... 01010001011 @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index bac0db7128..8799e945bd 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3295,6 +3295,54 @@ TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud)
TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ)
TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ)
+static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i64 val1, val2;
+
+ val1 = tcg_temp_new_i64();
+ val2 = tcg_temp_new_i64();
+
+ tcg_gen_ext_i32_i64(val1, a);
+ tcg_gen_ext_i32_i64(val2, b);
+
+ /* (a << 32)/b */
+ tcg_gen_shli_i64(val1, val1, 32);
+ tcg_gen_div_i64(val1, val1, val2);
+
+ /* if quotient doesn't fit in 32 bits the result is undefined */
+ tcg_gen_extrl_i64_i32(t, val1);
+
+ tcg_temp_free_i64(val1);
+ tcg_temp_free_i64(val2);
+}
+
+static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i64 val1, val2;
+
+ val1 = tcg_temp_new_i64();
+ val2 = tcg_temp_new_i64();
+
+ tcg_gen_extu_i32_i64(val1, a);
+ tcg_gen_extu_i32_i64(val2, b);
+
+ /* (a << 32)/b */
+ tcg_gen_shli_i64(val1, val1, 32);
+ tcg_gen_divu_i64(val1, val1, val2);
+
+ /* if quotient doesn't fit in 32 bits the result is undefined */
+ tcg_gen_extrl_i64_i32(t, val1);
+
+ tcg_temp_free_i64(val1);
+ tcg_temp_free_i64(val2);
+}
+
+DO_VDIV_VMOD(do_divesw, 32, do_dives_i32, true)
+DO_VDIV_VMOD(do_diveuw, 32, do_diveu_i32, false)
+
+TRANS_VDIV_VMOD(ISA310, VDIVESW, MO_32, do_divesw, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVEUW, MO_32, do_diveuw, NULL)
+
#undef DO_VDIV_VMOD
#undef GEN_VR_LDX
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 4/9] target/ppc: Implemented vector divide extended word
2022-04-05 19:55 ` [PATCH v2 4/9] target/ppc: Implemented vector divide extended word Lucas Mateus Castro(alqotel)
@ 2022-04-12 1:56 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-04-12 1:56 UTC (permalink / raw)
To: Lucas Mateus Castro(alqotel), qemu-devel, qemu-ppc
Cc: Greg Kurz, danielhb413, clg, David Gibson
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
>
> Implement the following PowerISA v3.1 instructions:
> vdivesw: Vector Divide Extended Signed Word
> vdiveuw: Vector Divide Extended Unsigned Word
>
> Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
> ---
> target/ppc/insn32.decode | 3 ++
> target/ppc/translate/vmx-impl.c.inc | 48 +++++++++++++++++++++++++++++
> 2 files changed, 51 insertions(+)
>
> diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
> index 3a88a0b5bc..8c115c9c60 100644
> --- a/target/ppc/insn32.decode
> +++ b/target/ppc/insn32.decode
> @@ -712,3 +712,6 @@ VDIVSD 000100 ..... ..... ..... 00111001011 @VX
> VDIVUD 000100 ..... ..... ..... 00011001011 @VX
> VDIVSQ 000100 ..... ..... ..... 00100001011 @VX
> VDIVUQ 000100 ..... ..... ..... 00000001011 @VX
> +
> +VDIVESW 000100 ..... ..... ..... 01110001011 @VX
> +VDIVEUW 000100 ..... ..... ..... 01010001011 @VX
> diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
> index bac0db7128..8799e945bd 100644
> --- a/target/ppc/translate/vmx-impl.c.inc
> +++ b/target/ppc/translate/vmx-impl.c.inc
> @@ -3295,6 +3295,54 @@ TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud)
> TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ)
> TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ)
>
> +static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
> +{
> + TCGv_i64 val1, val2;
> +
> + val1 = tcg_temp_new_i64();
> + val2 = tcg_temp_new_i64();
> +
> + tcg_gen_ext_i32_i64(val1, a);
> + tcg_gen_ext_i32_i64(val2, b);
> +
> + /* (a << 32)/b */
> + tcg_gen_shli_i64(val1, val1, 32);
> + tcg_gen_div_i64(val1, val1, val2);
> +
> + /* if quotient doesn't fit in 32 bits the result is undefined */
> + tcg_gen_extrl_i64_i32(t, val1);
> +
> + tcg_temp_free_i64(val1);
> + tcg_temp_free_i64(val2);
> +}
> +
> +static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
> +{
> + TCGv_i64 val1, val2;
> +
> + val1 = tcg_temp_new_i64();
> + val2 = tcg_temp_new_i64();
> +
> + tcg_gen_extu_i32_i64(val1, a);
> + tcg_gen_extu_i32_i64(val2, b);
> +
> + /* (a << 32)/b */
> + tcg_gen_shli_i64(val1, val1, 32);
> + tcg_gen_divu_i64(val1, val1, val2);
> +
> + /* if quotient doesn't fit in 32 bits the result is undefined */
> + tcg_gen_extrl_i64_i32(t, val1);
> +
> + tcg_temp_free_i64(val1);
> + tcg_temp_free_i64(val2);
> +}
> +
> +DO_VDIV_VMOD(do_divesw, 32, do_dives_i32, true)
> +DO_VDIV_VMOD(do_diveuw, 32, do_diveu_i32, false)
Oh, I see, you do have one more use for the full min/-1 treatment.
It would still be nice if the macro were minimal and you use a callback.
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 5/9] host-utils: Implemented unsigned 256-by-128 division
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
` (3 preceding siblings ...)
2022-04-05 19:55 ` [PATCH v2 4/9] target/ppc: Implemented vector divide extended word Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-12 2:20 ` Richard Henderson
2022-04-05 19:55 ` [PATCH v2 6/9] host-utils: Implemented signed " Lucas Mateus Castro(alqotel)
` (3 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: Peter Maydell, Matheus Ferst, danielhb413, richard.henderson,
Luis Pires, Philippe Mathieu-Daudé,
Lucas Mateus Castro (alqotel), clg, Alex Bennée,
David Gibson
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Based on already existing QEMU implementation, created an unsigned 256
bit by 128 bit division needed to implement the vector divide extended
unsigned instruction from PowerISA3.1
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
include/qemu/host-utils.h | 15 +++++
include/qemu/int128.h | 20 ++++++
util/host-utils.c | 128 ++++++++++++++++++++++++++++++++++++++
3 files changed, 163 insertions(+)
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index ca979dc6cc..6da6a93f69 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -32,6 +32,7 @@
#include "qemu/compiler.h"
#include "qemu/bswap.h"
+#include "qemu/int128.h"
#ifdef CONFIG_INT128
static inline void mulu64(uint64_t *plow, uint64_t *phigh,
@@ -153,6 +154,19 @@ static inline int clo64(uint64_t val)
return clz64(~val);
}
+/*
+ * clz128 - count leading zeros in a 128-bit value.
+ * @val: The value to search
+ */
+static inline int clz128(Int128 a)
+{
+ if (int128_gethi(a)) {
+ return clz64(int128_gethi(a));
+ } else {
+ return clz64(int128_getlo(a)) + 64;
+ }
+}
+
/**
* ctz32 - count trailing zeros in a 32-bit value.
* @val: The value to search
@@ -849,4 +863,5 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
#endif
}
+Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
#endif
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 3af01f38cd..2a9ee956aa 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -128,11 +128,21 @@ static inline bool int128_ge(Int128 a, Int128 b)
return a >= b;
}
+static inline bool int128_uge(Int128 a, Int128 b)
+{
+ return ((__uint128_t)a) >= ((__uint128_t)b);
+}
+
static inline bool int128_lt(Int128 a, Int128 b)
{
return a < b;
}
+static inline bool int128_ult(Int128 a, Int128 b)
+{
+ return (__uint128_t)a < (__uint128_t)b;
+}
+
static inline bool int128_le(Int128 a, Int128 b)
{
return a <= b;
@@ -373,11 +383,21 @@ static inline bool int128_ge(Int128 a, Int128 b)
return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo);
}
+static inline bool int128_uge(Int128 a, Int128 b)
+{
+ return (uint64_t)a.hi > (uint64_t)b.hi || (a.hi == b.hi && a.lo >= b.lo);
+}
+
static inline bool int128_lt(Int128 a, Int128 b)
{
return !int128_ge(a, b);
}
+static inline bool int128_ult(Int128 a, Int128 b)
+{
+ return !int128_uge(a, b);
+}
+
static inline bool int128_le(Int128 a, Int128 b)
{
return int128_ge(b, a);
diff --git a/util/host-utils.c b/util/host-utils.c
index bcc772b8ec..c6a01638c7 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -266,3 +266,131 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow)
*plow = *plow << shift;
}
}
+/*
+ * Unsigned 256-by-128 division.
+ * Returns the remainder via r.
+ * Returns lower 128 bit of quotient.
+ * Needs a normalized divisor (most significant bit set to 1).
+ *
+ * Adapted from include/qemu/host-utils.h udiv_qrnnd,
+ * from the GNU Multi Precision Library - longlong.h __udiv_qrnnd
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
+ *
+ * Licensed under the GPLv2/LGPLv3
+ */
+static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d)
+{
+ Int128 d0, d1, q0, q1, r1, r0, m;
+ uint64_t mp0, mp1;
+
+ d0 = int128_make64(int128_getlo(d));
+ d1 = int128_make64(int128_gethi(d));
+
+ r1 = int128_remu(n1, d1);
+ q1 = int128_divu(n1, d1);
+ mp0 = int128_getlo(q1);
+ mp1 = int128_gethi(q1);
+ mulu128(&mp0, &mp1, int128_getlo(d0));
+ m = int128_make128(mp0, mp1);
+ r1 = int128_make128(int128_gethi(n0), int128_getlo(r1));
+ if (int128_ult(r1, m)) {
+ q1 = int128_sub(q1, int128_one());
+ r1 = int128_add(r1, d);
+ if (int128_uge(r1, d)) {
+ if (int128_ult(r1, m)) {
+ q1 = int128_sub(q1, int128_one());
+ r1 = int128_add(r1, d);
+ }
+ }
+ }
+ r1 = int128_sub(r1, m);
+
+ r0 = int128_remu(r1, d1);
+ q0 = int128_divu(r1, d1);
+ mp0 = int128_getlo(q0);
+ mp1 = int128_gethi(q0);
+ mulu128(&mp0, &mp1, int128_getlo(d0));
+ m = int128_make128(mp0, mp1);
+ r0 = int128_make128(int128_getlo(n0), int128_getlo(r0));
+ if (int128_ult(r0, m)) {
+ q0 = int128_sub(q0, int128_one());
+ r0 = int128_add(r0, d);
+ if (int128_uge(r0, d)) {
+ if (int128_ult(r0, m)) {
+ q0 = int128_sub(q0, int128_one());
+ r0 = int128_add(r0, d);
+ }
+ }
+ }
+ r0 = int128_sub(r0, m);
+
+ *r = r0;
+ return int128_or(int128_lshift(q1, 64), q0);
+}
+
+/*
+ * Unsigned 256-by-128 division.
+ * Returns the remainder.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor)
+{
+ Int128 dhi = *phigh;
+ Int128 dlo = *plow;
+ Int128 rem, dhighest;
+ int sh;
+
+ if (!int128_nz(divisor) || !int128_nz(dhi)) {
+ *plow = int128_divu(dlo, divisor);
+ *phigh = int128_zero();
+ return int128_remu(dlo, divisor);
+ } else {
+ sh = clz128(divisor);
+
+ if (int128_ult(dhi, divisor)) {
+ if (sh != 0) {
+ /* normalize the divisor, shifting the dividend accordingly */
+ divisor = int128_lshift(divisor, sh);
+ dhi = int128_or(int128_lshift(dhi, sh),
+ int128_urshift(dlo, (128 - sh)));
+ dlo = int128_lshift(dlo, sh);
+ }
+
+ *phigh = int128_zero();
+ *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
+ } else {
+ if (sh != 0) {
+ /* normalize the divisor, shifting the dividend accordingly */
+ divisor = int128_lshift(divisor, sh);
+ dhighest = int128_rshift(dhi, (128 - sh));
+ dhi = int128_or(int128_lshift(dhi, sh),
+ int128_urshift(dlo, (128 - sh)));
+ dlo = int128_lshift(dlo, sh);
+
+ *phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor);
+ } else {
+ /*
+ * dhi >= divisor
+ * Since the MSB of divisor is set (sh == 0),
+ * (dhi - divisor) < divisor
+ *
+ * Thus, the high part of the quotient is 1, and we can
+ * calculate the low part with a single call to udiv_qrnnd
+ * after subtracting divisor from dhi
+ */
+ dhi = int128_sub(dhi, divisor);
+ *phigh = int128_one();
+ }
+
+ *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
+ }
+
+ /*
+ * since the dividend/divisor might have been normalized,
+ * the remainder might also have to be shifted back
+ */
+ rem = int128_urshift(rem, sh);
+ return rem;
+ }
+}
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 5/9] host-utils: Implemented unsigned 256-by-128 division
2022-04-05 19:55 ` [PATCH v2 5/9] host-utils: Implemented unsigned 256-by-128 division Lucas Mateus Castro(alqotel)
@ 2022-04-12 2:20 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-04-12 2:20 UTC (permalink / raw)
To: Lucas Mateus Castro(alqotel), qemu-devel, qemu-ppc
Cc: Peter Maydell, Matheus Ferst, danielhb413,
Philippe Mathieu-Daudé, Luis Pires, clg, Alex Bennée,
David Gibson
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
>
> Based on already existing QEMU implementation, created an unsigned 256
> bit by 128 bit division needed to implement the vector divide extended
> unsigned instruction from PowerISA3.1
>
> Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
> ---
> include/qemu/host-utils.h | 15 +++++
> include/qemu/int128.h | 20 ++++++
> util/host-utils.c | 128 ++++++++++++++++++++++++++++++++++++++
> 3 files changed, 163 insertions(+)
>
> diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
> index ca979dc6cc..6da6a93f69 100644
> --- a/include/qemu/host-utils.h
> +++ b/include/qemu/host-utils.h
> @@ -32,6 +32,7 @@
>
> #include "qemu/compiler.h"
> #include "qemu/bswap.h"
> +#include "qemu/int128.h"
>
> #ifdef CONFIG_INT128
> static inline void mulu64(uint64_t *plow, uint64_t *phigh,
> @@ -153,6 +154,19 @@ static inline int clo64(uint64_t val)
> return clz64(~val);
> }
>
> +/*
> + * clz128 - count leading zeros in a 128-bit value.
> + * @val: The value to search
> + */
> +static inline int clz128(Int128 a)
> +{
> + if (int128_gethi(a)) {
> + return clz64(int128_gethi(a));
> + } else {
> + return clz64(int128_getlo(a)) + 64;
> + }
> +}
Should be in int128.h, like bswap128.
> diff --git a/util/host-utils.c b/util/host-utils.c
> index bcc772b8ec..c6a01638c7 100644
> --- a/util/host-utils.c
> +++ b/util/host-utils.c
> @@ -266,3 +266,131 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow)
> *plow = *plow << shift;
> }
> }
> +/*
Watch your spacing.
> + * Unsigned 256-by-128 division.
> + * Returns the remainder via r.
> + * Returns lower 128 bit of quotient.
> + * Needs a normalized divisor (most significant bit set to 1).
> + *
> + * Adapted from include/qemu/host-utils.h udiv_qrnnd,
> + * from the GNU Multi Precision Library - longlong.h __udiv_qrnnd
> + * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
> + *
> + * Licensed under the GPLv2/LGPLv3
> + */
> +static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d)
> +{
> + Int128 d0, d1, q0, q1, r1, r0, m;
> + uint64_t mp0, mp1;
> +
> + d0 = int128_make64(int128_getlo(d));
> + d1 = int128_make64(int128_gethi(d));
> +
> + r1 = int128_remu(n1, d1);
> + q1 = int128_divu(n1, d1);
> + mp0 = int128_getlo(q1);
> + mp1 = int128_gethi(q1);
> + mulu128(&mp0, &mp1, int128_getlo(d0));
> + m = int128_make128(mp0, mp1);
> + r1 = int128_make128(int128_gethi(n0), int128_getlo(r1));
> + if (int128_ult(r1, m)) {
> + q1 = int128_sub(q1, int128_one());
> + r1 = int128_add(r1, d);
> + if (int128_uge(r1, d)) {
> + if (int128_ult(r1, m)) {
> + q1 = int128_sub(q1, int128_one());
> + r1 = int128_add(r1, d);
> + }
> + }
> + }
> + r1 = int128_sub(r1, m);
> +
> + r0 = int128_remu(r1, d1);
> + q0 = int128_divu(r1, d1);
> + mp0 = int128_getlo(q0);
> + mp1 = int128_gethi(q0);
> + mulu128(&mp0, &mp1, int128_getlo(d0));
> + m = int128_make128(mp0, mp1);
> + r0 = int128_make128(int128_getlo(n0), int128_getlo(r0));
> + if (int128_ult(r0, m)) {
> + q0 = int128_sub(q0, int128_one());
> + r0 = int128_add(r0, d);
> + if (int128_uge(r0, d)) {
> + if (int128_ult(r0, m)) {
> + q0 = int128_sub(q0, int128_one());
> + r0 = int128_add(r0, d);
> + }
> + }
> + }
> + r0 = int128_sub(r0, m);
> +
> + *r = r0;
> + return int128_or(int128_lshift(q1, 64), q0);
> +}
> +
> +/*
> + * Unsigned 256-by-128 division.
> + * Returns the remainder.
> + * Returns quotient via plow and phigh.
> + * Also returns the remainder via the function return value.
> + */
> +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor)
> +{
> + Int128 dhi = *phigh;
> + Int128 dlo = *plow;
> + Int128 rem, dhighest;
> + int sh;
> +
> + if (!int128_nz(divisor) || !int128_nz(dhi)) {
> + *plow = int128_divu(dlo, divisor);
> + *phigh = int128_zero();
> + return int128_remu(dlo, divisor);
> + } else {
> + sh = clz128(divisor);
> +
> + if (int128_ult(dhi, divisor)) {
> + if (sh != 0) {
> + /* normalize the divisor, shifting the dividend accordingly */
> + divisor = int128_lshift(divisor, sh);
> + dhi = int128_or(int128_lshift(dhi, sh),
> + int128_urshift(dlo, (128 - sh)));
> + dlo = int128_lshift(dlo, sh);
> + }
> +
> + *phigh = int128_zero();
> + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
> + } else {
> + if (sh != 0) {
> + /* normalize the divisor, shifting the dividend accordingly */
> + divisor = int128_lshift(divisor, sh);
> + dhighest = int128_rshift(dhi, (128 - sh));
> + dhi = int128_or(int128_lshift(dhi, sh),
> + int128_urshift(dlo, (128 - sh)));
> + dlo = int128_lshift(dlo, sh);
> +
> + *phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor);
> + } else {
> + /*
> + * dhi >= divisor
> + * Since the MSB of divisor is set (sh == 0),
> + * (dhi - divisor) < divisor
> + *
> + * Thus, the high part of the quotient is 1, and we can
> + * calculate the low part with a single call to udiv_qrnnd
> + * after subtracting divisor from dhi
> + */
> + dhi = int128_sub(dhi, divisor);
> + *phigh = int128_one();
> + }
> +
> + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
> + }
> +
> + /*
> + * since the dividend/divisor might have been normalized,
> + * the remainder might also have to be shifted back
> + */
> + rem = int128_urshift(rem, sh);
> + return rem;
> + }
> +}
I guess this works. I'm starting to wonder if we shouldn't use libgmp, instead of rolling
our own. In this case, mpn_tdiv_qr.
Anyway, modulo placement of clz128,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 6/9] host-utils: Implemented signed 256-by-128 division
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
` (4 preceding siblings ...)
2022-04-05 19:55 ` [PATCH v2 5/9] host-utils: Implemented unsigned 256-by-128 division Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-05 19:55 ` [PATCH v2 7/9] target/ppc: Implemented remaining vector divide extended Lucas Mateus Castro(alqotel)
` (2 subsequent siblings)
8 siblings, 0 replies; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: danielhb413, richard.henderson, Luis Pires,
Lucas Mateus Castro (alqotel), clg, Alex Bennée,
David Gibson
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Based on already existing QEMU implementation created a signed
256 bit by 128 bit division needed to implement the vector divide
extended signed quadword instruction from PowerISA 3.1
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
include/qemu/host-utils.h | 1 +
util/host-utils.c | 51 +++++++++++++++++++++++++++++++++++++++
2 files changed, 52 insertions(+)
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 6da6a93f69..d0b444a40f 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -864,4 +864,5 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
}
Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
+Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor);
#endif
diff --git a/util/host-utils.c b/util/host-utils.c
index c6a01638c7..d221657e43 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -394,3 +394,54 @@ Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor)
return rem;
}
}
+
+/*
+ * Signed 256-by-128 division.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor)
+{
+ bool neg_quotient = false, neg_remainder = false;
+ Int128 unsig_hi = *phigh, unsig_lo = *plow;
+ Int128 rem;
+
+ if (!int128_nonneg(*phigh)) {
+ neg_quotient = !neg_quotient;
+ neg_remainder = !neg_remainder;
+
+ if (!int128_nz(unsig_lo)) {
+ unsig_hi = int128_neg(unsig_hi);
+ } else {
+ unsig_hi = int128_not(unsig_hi);
+ unsig_lo = int128_neg(unsig_lo);
+ }
+ }
+
+ if (!int128_nonneg(divisor)) {
+ neg_quotient = !neg_quotient;
+
+ divisor = int128_neg(divisor);
+ }
+
+ rem = divu256(&unsig_lo, &unsig_hi, divisor);
+
+ if (neg_quotient) {
+ if (!int128_nz(unsig_lo)) {
+ *phigh = int128_neg(unsig_hi);
+ *plow = int128_zero();
+ } else {
+ *phigh = int128_not(unsig_hi);
+ *plow = int128_neg(unsig_lo);
+ }
+ } else {
+ *phigh = unsig_hi;
+ *plow = unsig_lo;
+ }
+
+ if (neg_remainder) {
+ return int128_neg(rem);
+ } else {
+ return rem;
+ }
+}
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 7/9] target/ppc: Implemented remaining vector divide extended
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
` (5 preceding siblings ...)
2022-04-05 19:55 ` [PATCH v2 6/9] host-utils: Implemented signed " Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-12 3:43 ` Richard Henderson
2022-04-05 19:55 ` [PATCH v2 8/9] target/ppc: Implemented vector module word/doubleword Lucas Mateus Castro(alqotel)
2022-04-05 19:55 ` [PATCH v2 9/9] target/ppc: Implemented vector module quadword Lucas Mateus Castro(alqotel)
8 siblings, 1 reply; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: danielhb413, richard.henderson, Greg Kurz,
Lucas Mateus Castro (alqotel), clg, David Gibson
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
vdivesd: Vector Divide Extended Signed Doubleword
vdiveud: Vector Divide Extended Unsigned Doubleword
vdivesq: Vector Divide Extended Signed Quadword
vdiveuq: Vector Divide Extended Unsigned Quadword
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/helper.h | 4 ++
target/ppc/insn32.decode | 4 ++
target/ppc/int_helper.c | 64 +++++++++++++++++++++++++++++
target/ppc/translate/vmx-impl.c.inc | 4 ++
4 files changed, 76 insertions(+)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 4cfdf7b3ec..67ecff2c9a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -173,6 +173,10 @@ DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VDIVSQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VDIVUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VDIVESD, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VDIVEUD, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VDIVESQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VDIVEUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_3(vslo, void, avr, avr, avr)
DEF_HELPER_3(vsro, void, avr, avr, avr)
DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 8c115c9c60..3eb920ac76 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -715,3 +715,7 @@ VDIVUQ 000100 ..... ..... ..... 00000001011 @VX
VDIVESW 000100 ..... ..... ..... 01110001011 @VX
VDIVEUW 000100 ..... ..... ..... 01010001011 @VX
+VDIVESD 000100 ..... ..... ..... 01111001011 @VX
+VDIVEUD 000100 ..... ..... ..... 01011001011 @VX
+VDIVESQ 000100 ..... ..... ..... 01100001011 @VX
+VDIVEUQ 000100 ..... ..... ..... 01000001011 @VX
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index ba5d4193ff..17a10c4412 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1057,6 +1057,70 @@ void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
}
}
+void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ int i;
+ int64_t high;
+ uint64_t low;
+ for (i = 0; i < 2; i++) {
+ high = a->s64[i];
+ low = 0;
+ if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
+ t->s64[i] = a->s64[i]; /* Undefined behavior */
+ } else {
+ divs128(&low, &high, b->s64[i]);
+ t->s64[i] = low;
+ }
+ }
+}
+
+void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ int i;
+ uint64_t high, low;
+ for (i = 0; i < 2; i++) {
+ high = a->u64[i];
+ low = 0;
+ if (unlikely(!b->u64[i])) {
+ t->u64[i] = a->u64[i]; /* Undefined behavior */
+ } else {
+ divu128(&low, &high, b->u64[i]);
+ t->u64[i] = low;
+ }
+ }
+}
+
+void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ Int128 high, low;
+ Int128 int128_min = int128_make128(0, INT64_MIN);
+ Int128 neg1 = int128_makes64(-1);
+
+ high = a->s128;
+ low = int128_zero();
+ if (unlikely(!int128_nz(b->s128) ||
+ (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
+ t->s128 = a->s128; /* Undefined behavior */
+ } else {
+ divs256(&low, &high, b->s128);
+ t->s128 = low;
+ }
+}
+
+void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ Int128 high, low;
+
+ high = a->s128;
+ low = int128_zero();
+ if (unlikely(!int128_nz(b->s128))) {
+ t->s128 = a->s128; /* Undefined behavior */
+ } else {
+ divu256(&low, &high, b->s128);
+ t->s128 = low;
+ }
+}
+
void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
ppc_avr_t result;
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 8799e945bd..23f215dbea 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3342,6 +3342,10 @@ DO_VDIV_VMOD(do_diveuw, 32, do_diveu_i32, false)
TRANS_VDIV_VMOD(ISA310, VDIVESW, MO_32, do_divesw, NULL)
TRANS_VDIV_VMOD(ISA310, VDIVEUW, MO_32, do_diveuw, NULL)
+TRANS_FLAGS2(ISA310, VDIVESD, do_vx_helper, gen_helper_VDIVESD)
+TRANS_FLAGS2(ISA310, VDIVEUD, do_vx_helper, gen_helper_VDIVEUD)
+TRANS_FLAGS2(ISA310, VDIVESQ, do_vx_helper, gen_helper_VDIVESQ)
+TRANS_FLAGS2(ISA310, VDIVEUQ, do_vx_helper, gen_helper_VDIVEUQ)
#undef DO_VDIV_VMOD
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 7/9] target/ppc: Implemented remaining vector divide extended
2022-04-05 19:55 ` [PATCH v2 7/9] target/ppc: Implemented remaining vector divide extended Lucas Mateus Castro(alqotel)
@ 2022-04-12 3:43 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-04-12 3:43 UTC (permalink / raw)
To: Lucas Mateus Castro(alqotel), qemu-devel, qemu-ppc
Cc: Greg Kurz, danielhb413, clg, David Gibson
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)"<lucas.araujo@eldorado.org.br>
>
> Implement the following PowerISA v3.1 instructions:
> vdivesd: Vector Divide Extended Signed Doubleword
> vdiveud: Vector Divide Extended Unsigned Doubleword
> vdivesq: Vector Divide Extended Signed Quadword
> vdiveuq: Vector Divide Extended Unsigned Quadword
>
> Signed-off-by: Lucas Mateus Castro (alqotel)<lucas.araujo@eldorado.org.br>
> ---
> target/ppc/helper.h | 4 ++
> target/ppc/insn32.decode | 4 ++
> target/ppc/int_helper.c | 64 +++++++++++++++++++++++++++++
> target/ppc/translate/vmx-impl.c.inc | 4 ++
> 4 files changed, 76 insertions(+)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 8/9] target/ppc: Implemented vector module word/doubleword
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
` (6 preceding siblings ...)
2022-04-05 19:55 ` [PATCH v2 7/9] target/ppc: Implemented remaining vector divide extended Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-12 3:43 ` Richard Henderson
2022-04-05 19:55 ` [PATCH v2 9/9] target/ppc: Implemented vector module quadword Lucas Mateus Castro(alqotel)
8 siblings, 1 reply; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: danielhb413, richard.henderson, Greg Kurz,
Lucas Mateus Castro (alqotel), clg, David Gibson
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
vmodsw: Vector Modulo Signed Word
vmoduw: Vector Modulo Unsigned Word
vmodsd: Vector Modulo Signed Doubleword
vmodud: Vector Modulo Unsigned Doubleword
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/insn32.decode | 5 +++++
target/ppc/translate/vmx-impl.c.inc | 10 ++++++++++
2 files changed, 15 insertions(+)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 3eb920ac76..36b42e41d2 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -719,3 +719,8 @@ VDIVESD 000100 ..... ..... ..... 01111001011 @VX
VDIVEUD 000100 ..... ..... ..... 01011001011 @VX
VDIVESQ 000100 ..... ..... ..... 01100001011 @VX
VDIVEUQ 000100 ..... ..... ..... 01000001011 @VX
+
+VMODSW 000100 ..... ..... ..... 11110001011 @VX
+VMODUW 000100 ..... ..... ..... 11010001011 @VX
+VMODSD 000100 ..... ..... ..... 11111001011 @VX
+VMODUD 000100 ..... ..... ..... 11011001011 @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 23f215dbea..c5178a0f1e 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3340,6 +3340,11 @@ static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
DO_VDIV_VMOD(do_divesw, 32, do_dives_i32, true)
DO_VDIV_VMOD(do_diveuw, 32, do_diveu_i32, false)
+DO_VDIV_VMOD(do_modsw, 32, tcg_gen_rem_i32, true)
+DO_VDIV_VMOD(do_moduw, 32, tcg_gen_remu_i32, false)
+DO_VDIV_VMOD(do_modsd, 64, tcg_gen_rem_i64, true)
+DO_VDIV_VMOD(do_modud, 64, tcg_gen_remu_i64, false)
+
TRANS_VDIV_VMOD(ISA310, VDIVESW, MO_32, do_divesw, NULL)
TRANS_VDIV_VMOD(ISA310, VDIVEUW, MO_32, do_diveuw, NULL)
TRANS_FLAGS2(ISA310, VDIVESD, do_vx_helper, gen_helper_VDIVESD)
@@ -3347,6 +3352,11 @@ TRANS_FLAGS2(ISA310, VDIVEUD, do_vx_helper, gen_helper_VDIVEUD)
TRANS_FLAGS2(ISA310, VDIVESQ, do_vx_helper, gen_helper_VDIVESQ)
TRANS_FLAGS2(ISA310, VDIVEUQ, do_vx_helper, gen_helper_VDIVEUQ)
+TRANS_VDIV_VMOD(ISA310, VMODSW, MO_32, do_modsw , NULL)
+TRANS_VDIV_VMOD(ISA310, VMODUW, MO_32, do_moduw, NULL)
+TRANS_VDIV_VMOD(ISA310, VMODSD, MO_64, NULL, do_modsd)
+TRANS_VDIV_VMOD(ISA310, VMODUD, MO_64, NULL, do_modud)
+
#undef DO_VDIV_VMOD
#undef GEN_VR_LDX
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 8/9] target/ppc: Implemented vector module word/doubleword
2022-04-05 19:55 ` [PATCH v2 8/9] target/ppc: Implemented vector module word/doubleword Lucas Mateus Castro(alqotel)
@ 2022-04-12 3:43 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-04-12 3:43 UTC (permalink / raw)
To: Lucas Mateus Castro(alqotel), qemu-devel, qemu-ppc
Cc: Greg Kurz, danielhb413, clg, David Gibson
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)"<lucas.araujo@eldorado.org.br>
>
> Implement the following PowerISA v3.1 instructions:
> vmodsw: Vector Modulo Signed Word
> vmoduw: Vector Modulo Unsigned Word
> vmodsd: Vector Modulo Signed Doubleword
> vmodud: Vector Modulo Unsigned Doubleword
>
> Signed-off-by: Lucas Mateus Castro (alqotel)<lucas.araujo@eldorado.org.br>
> ---
> target/ppc/insn32.decode | 5 +++++
> target/ppc/translate/vmx-impl.c.inc | 10 ++++++++++
> 2 files changed, 15 insertions(+)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 9/9] target/ppc: Implemented vector module quadword
2022-04-05 19:55 [PATCH v2 0/9] VDIV/VMOD Implementation Lucas Mateus Castro(alqotel)
` (7 preceding siblings ...)
2022-04-05 19:55 ` [PATCH v2 8/9] target/ppc: Implemented vector module word/doubleword Lucas Mateus Castro(alqotel)
@ 2022-04-05 19:55 ` Lucas Mateus Castro(alqotel)
2022-04-12 3:44 ` Richard Henderson
8 siblings, 1 reply; 19+ messages in thread
From: Lucas Mateus Castro(alqotel) @ 2022-04-05 19:55 UTC (permalink / raw)
To: qemu-devel, qemu-ppc
Cc: danielhb413, richard.henderson, Greg Kurz,
Lucas Mateus Castro (alqotel), clg, David Gibson
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
vmodsq: Vector Modulo Signed Quadword
vmoduq: Vector Modulo Unsigned Quadword
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/helper.h | 2 ++
target/ppc/insn32.decode | 2 ++
target/ppc/int_helper.c | 21 +++++++++++++++++++++
target/ppc/translate/vmx-impl.c.inc | 2 ++
4 files changed, 27 insertions(+)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 67ecff2c9a..881e03959a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -177,6 +177,8 @@ DEF_HELPER_FLAGS_3(VDIVESD, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VDIVEUD, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VDIVESQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(VDIVEUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VMODSQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VMODUQ, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_3(vslo, void, avr, avr, avr)
DEF_HELPER_3(vsro, void, avr, avr, avr)
DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 36b42e41d2..b53efe1915 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -724,3 +724,5 @@ VMODSW 000100 ..... ..... ..... 11110001011 @VX
VMODUW 000100 ..... ..... ..... 11010001011 @VX
VMODSD 000100 ..... ..... ..... 11111001011 @VX
VMODUD 000100 ..... ..... ..... 11011001011 @VX
+VMODSQ 000100 ..... ..... ..... 11100001011 @VX
+VMODUQ 000100 ..... ..... ..... 11000001011 @VX
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 17a10c4412..72b2b06078 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1121,6 +1121,27 @@ void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
}
}
+void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ Int128 neg1 = int128_makes64(-1);
+ Int128 int128_min = int128_make128(0, INT64_MIN);
+ if (likely(int128_nz(b->s128) &&
+ (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
+ t->s128 = int128_rems(a->s128, b->s128);
+ } else {
+ t->s128 = int128_zero(); /* Undefined behavior */
+ }
+}
+
+void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ if (likely(int128_nz(b->s128))) {
+ t->s128 = int128_remu(a->s128, b->s128);
+ } else {
+ t->s128 = int128_zero(); /* Undefined behavior */
+ }
+}
+
void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
ppc_avr_t result;
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index c5178a0f1e..7ced7ad655 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3356,6 +3356,8 @@ TRANS_VDIV_VMOD(ISA310, VMODSW, MO_32, do_modsw , NULL)
TRANS_VDIV_VMOD(ISA310, VMODUW, MO_32, do_moduw, NULL)
TRANS_VDIV_VMOD(ISA310, VMODSD, MO_64, NULL, do_modsd)
TRANS_VDIV_VMOD(ISA310, VMODUD, MO_64, NULL, do_modud)
+TRANS_FLAGS2(ISA310, VMODSQ, do_vx_helper, gen_helper_VMODSQ)
+TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, gen_helper_VMODUQ)
#undef DO_VDIV_VMOD
--
2.31.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 9/9] target/ppc: Implemented vector module quadword
2022-04-05 19:55 ` [PATCH v2 9/9] target/ppc: Implemented vector module quadword Lucas Mateus Castro(alqotel)
@ 2022-04-12 3:44 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2022-04-12 3:44 UTC (permalink / raw)
To: Lucas Mateus Castro(alqotel), qemu-devel, qemu-ppc
Cc: Greg Kurz, danielhb413, clg, David Gibson
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)"<lucas.araujo@eldorado.org.br>
>
> Implement the following PowerISA v3.1 instructions:
> vmodsq: Vector Modulo Signed Quadword
> vmoduq: Vector Modulo Unsigned Quadword
>
> Signed-off-by: Lucas Mateus Castro (alqotel)<lucas.araujo@eldorado.org.br>
> ---
> target/ppc/helper.h | 2 ++
> target/ppc/insn32.decode | 2 ++
> target/ppc/int_helper.c | 21 +++++++++++++++++++++
> target/ppc/translate/vmx-impl.c.inc | 2 ++
> 4 files changed, 27 insertions(+)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 19+ messages in thread