[PATCH v2] target/ppc: add vmsumudm vmsumcud instructions

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Lijun Pan <ljp@linux.ibm.com>
To: qemu-ppc@nongnu.org, qemu-devel@nongnu.org
Cc: Lijun Pan <ljp@linux.ibm.com>
Subject: [PATCH v2] target/ppc: add vmsumudm vmsumcud instructions
Date: Fri, 12 Jun 2020 22:55:46 -0500	[thread overview]
Message-ID: <20200613035546.22041-1-ljp@linux.ibm.com> (raw)

vmsumudm (Power ISA 3.0) - Vector Multiply-Sum Unsigned Doubleword Modulo
VA-form.
vmsumcud (Power ISA 3.1) - Vector Multiply-Sum & write Carry-out Unsigned
Doubleword VA-form.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
---
v2: move vmsumcudm() to qemu/int128.h as Richard Henderson suggested,
    also rename addu128() to uint128_add() and include it in qemu/int128.h

 disas/ppc.c                         |  2 +
 include/qemu/int128.h               | 97 +++++++++++++++++++++++++++++
 target/ppc/helper.h                 |  4 +-
 target/ppc/int_helper.c             | 19 +++++-
 target/ppc/translate.c              |  1 -
 target/ppc/translate/vmx-impl.inc.c | 39 ++++++------
 target/ppc/translate/vmx-ops.inc.c  |  2 +
 7 files changed, 143 insertions(+), 21 deletions(-)

diff --git a/disas/ppc.c b/disas/ppc.c
index 63e97cfe1d..3ed4d23ed3 100644
--- a/disas/ppc.c
+++ b/disas/ppc.c
@@ -2261,7 +2261,9 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "vmsumshs",  VXA(4,  41), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
 { "vmsumubm",  VXA(4,  36), VXA_MASK,   PPCVEC,		{ VD, VA, VB, VC } },
 { "vmsumuhm",  VXA(4,  38), VXA_MASK,   PPCVEC,		{ VD, VA, VB, VC } },
+{ "vmsumudm",  VXA(4,  35), VXA_MASK,   PPCVEC,		{ VD, VA, VB, VC } },
 { "vmsumuhs",  VXA(4,  39), VXA_MASK,   PPCVEC,		{ VD, VA, VB, VC } },
+{ "vmsumcud",  VXA(4,  23), VXA_MASK,   PPCVEC,		{ VD, VA, VB, VC } },
 { "vmulesb",   VX(4,  776), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
 { "vmulesh",   VX(4,  840), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
 { "vmuleub",   VX(4,  520), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 5c9890db8b..3362973cc5 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_INT128
 #include "qemu/bswap.h"
+#include "qemu/host-utils.h"
 
 typedef __int128_t Int128;
 
@@ -143,6 +144,55 @@ static inline Int128 bswap128(Int128 a)
     return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
 }
 
+/**
+ * uint128_add - add two 128-bit values (r=a+b, ca=carry-out)
+ * @ah: high 64 bits of a
+ * @al: low 64 bits of a
+ * @bh: high 64 bits of b
+ * @bl: low 64 bits of b
+ * @rh: high 64 bits of r to be returned
+ * @rl: low 64 bits of r to be returned
+ * @ca: carry out to be returned.
+ */
+static inline void uint128_add(uint64_t ah, uint64_t al, uint64_t bh,
+		uint64_t bl, uint64_t *rh, uint64_t *rl, uint64_t *ca)
+{
+	__uint128_t a = (__uint128_t)ah << 64 | (__uint128_t)al;
+	__uint128_t b = (__uint128_t)bh << 64 | (__uint128_t)bl;
+	__uint128_t r = a + b;
+
+	*rh = (uint64_t)(r >> 64);
+	*rl = (uint64_t)r;
+	*ca = (~a < b);
+}
+
+/**
+ * mulsum - (rh, rl) = ah*bh + al*bl + (ch, cl)
+ * @ah: high 64 bits of a
+ * @al: low 64 bits of a
+ * @bh: high 64 bits of b
+ * @bl: low 64 bits of b
+ * @ch: high 64 bits of c
+ * @cl: low 64 bits of c
+ * @rh: high 64 bits of r to be returned
+ * @rl: low 64 bits of r to be returned
+ * @ca: carry-out to be returned.
+ */
+static inline void mulsum(uint64_t ah, uint64_t al, uint64_t bh,
+		uint64_t bl, uint64_t ch, uint64_t cl, uint64_t *rh,
+		uint64_t *rl, uint64_t *ca)
+{
+	__uint128_t prod1, prod2, r;
+	__uint128_t c = (__uint128_t)ch << 64 | (__uint128_t)cl;
+
+	prod1 = (__uint128_t)ah * (__uint128_t)bh;
+	prod2 = (__uint128_t)al * (__uint128_t)bl;
+	r = prod1 + prod2 + c;
+	*rh = (uint64_t)(r >> 64);
+	*rl = (uint64_t)r;
+	*ca = (~prod1 < prod2) + (~c < (prod1 + prod2));
+}
+
 #else /* !CONFIG_INT128 */
 
 typedef struct Int128 Int128;
@@ -301,5 +351,52 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
     *a = int128_sub(*a, b);
 }
 
+/**
+ * uint128_add - add two 128-bit values (r=a+b, ca=carry-out)
+ * @ah: high 64 bits of a
+ * @al: low 64 bits of a
+ * @bh: high 64 bits of b
+ * @bl: low 64 bits of b
+ * @rh: high 64 bits of r to be returned
+ * @rl: low 64 bits of r to be returned
+ * @ca: carry out to be returned.
+ */
+static inline void uint128_add(uint64_t ah, uint64_t al, uint64_t bh,
+		uint64_t bl, uint64_t *rh, uint64_t *rl, uint64_t *ca)
+{
+	uint64_t lo = al + bl;
+	uint64_t hi = ah + bh + (~al < bl);
+	uint64_t hi_t = ah + bh;
+	uint64_t carry = (~ah < bh) + (~hi_t < (~al < bl));
+
+	*rl = lo;
+	*rh = hi;
+	*ca = carry;
+}
+
+/**
+ * mulsum - (rh, rl) = ah*bh + al*bl + (ch, cl)
+ * @ah: high 64 bits of a
+ * @al: low 64 bits of a
+ * @bh: high 64 bits of b
+ * @bl: low 64 bits of b
+ * @ch: high 64 bits of c
+ * @cl: low 64 bits of c
+ * @rh: high 64 bits of r to be returned
+ * @rl: low 64 bits of r to be returned
+ * @ca: carry-out to be returned.
+ */
+static inline void mulsum(uint64_t ah, uint64_t al, uint64_t bh,
+		uint64_t bl, uint64_t ch, uint64_t cl, uint64_t *rh,
+		uint64_t *rl, uint64_t *ca)
+{
+	uint64_t p1h, p1l, p2h, p2l, sh, sl, ca1, ca2;
+	mulu64(&p1l, &p1h ,ah, bh);
+	mulu64(&p2l, &p2h ,al, bl);
+	uint128_add(p1h, p1l, p2h, p2l, &sh, &sl, &ca1);
+	uint128_add(sh, sl, ch, cl, rh, rl, &ca2);
+	*ca = ca1 + ca2;
+}
+
 #endif /* CONFIG_INT128 */
 #endif /* INT128_H */
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 2dfa1c6942..d540e8f30b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -263,10 +263,12 @@ DEF_HELPER_3(vpkpx, void, avr, avr, avr)
 DEF_HELPER_5(vmhaddshs, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmhraddshs, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmsumuhm, void, env, avr, avr, avr, avr)
+DEF_HELPER_5(vmsumudm, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmsumuhs, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmsumshm, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmsumshs, void, env, avr, avr, avr, avr)
-DEF_HELPER_4(vmladduhm, void, avr, avr, avr, avr)
+DEF_HELPER_5(vmsumcud, void, env, avr, avr, avr, avr)
+DEF_HELPER_5(vmladduhm, void, env, avr, avr, avr, avr)
 DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32)
 DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env)
 DEF_HELPER_3(lvebx, void, env, avr, tl)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index be53cd6f68..5f257b7b86 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -926,7 +926,8 @@ void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
     }
 }
 
-void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+void helper_vmladduhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
+			ppc_avr_t *b, ppc_avr_t *c)
 {
     int i;
 
@@ -1064,6 +1065,22 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
     }
 }
 
+void helper_vmsumudm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
+			ppc_avr_t *b, ppc_avr_t *c)
+{
+	uint64_t ca;
+	mulsum(a->VsrD(0), a->VsrD(1), b->VsrD(0), b->VsrD(1), c->VsrD(0), c->VsrD(1),
+		&r->VsrD(0), &r->VsrD(1), &ca);
+}
+void helper_vmsumcud(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
+			ppc_avr_t *b, ppc_avr_t *c)
+{
+	uint64_t rh, rl;
+	mulsum(a->VsrD(0), a->VsrD(1), b->VsrD(0), b->VsrD(1), c->VsrD(0), c->VsrD(1),
+		&rh, &rl, &r->VsrD(1));
+	r->VsrD(0) = 0;
+}
+
 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
     {                                                                   \
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 4ce3d664b5..35ff1aa77e 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -7281,7 +7281,6 @@ GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
-GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
               PPC2_ISA300),
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 403ed3a01c..5c0e44d7fb 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -1248,6 +1248,25 @@ static void gen_vsldoi(DisasContext *ctx)
     tcg_temp_free_i32(sh);
 }
 
+#define GEN_VAFORM(name, opc2)	                                        \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+{                                                                       \
+    TCGv_ptr ra, rb, rc, rd;                                            \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
+    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
+    rc = gen_avr_ptr(rC(ctx->opcode));                                  \
+    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
+    gen_helper_##name(cpu_env, rd, ra, rb, rc);                         \
+    tcg_temp_free_ptr(ra);                                              \
+    tcg_temp_free_ptr(rb);                                              \
+    tcg_temp_free_ptr(rc);                                              \
+    tcg_temp_free_ptr(rd);                                              \
+}
+
 #define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
 static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
     {                                                                   \
@@ -1272,24 +1291,8 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
     }
 
 GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
-
-static void gen_vmladduhm(DisasContext *ctx)
-{
-    TCGv_ptr ra, rb, rc, rd;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    ra = gen_avr_ptr(rA(ctx->opcode));
-    rb = gen_avr_ptr(rB(ctx->opcode));
-    rc = gen_avr_ptr(rC(ctx->opcode));
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_vmladduhm(rd, ra, rb, rc);
-    tcg_temp_free_ptr(ra);
-    tcg_temp_free_ptr(rb);
-    tcg_temp_free_ptr(rc);
-    tcg_temp_free_ptr(rd);
-}
+GEN_VAFORM(vmsumcud, 11)
+GEN_VAFORM_PAIRED(vmladduhm, vmsumudm, 17)
 
 static void gen_vpermr(DisasContext *ctx)
 {
diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c
index 84e05fb827..aee23e31c6 100644
--- a/target/ppc/translate/vmx-ops.inc.c
+++ b/target/ppc/translate/vmx-ops.inc.c
@@ -276,6 +276,8 @@ GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19),
 GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20),
 GEN_VAFORM_PAIRED(vsel, vperm, 21),
 GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23),
+GEN_HANDLER(vmsumcud, 0x4, 11, 0xFF, 0x00000000, PPC_ALTIVEC),
+GEN_VAFORM_PAIRED(vmladduhm, vmsumudm, 17),
 
 GEN_VXFORM_DUAL(vclzb, vpopcntb, 1, 28, PPC_NONE, PPC2_ALTIVEC_207),
 GEN_VXFORM_DUAL(vclzh, vpopcnth, 1, 29, PPC_NONE, PPC2_ALTIVEC_207),
-- 
2.23.0

next             reply	other threads:[~2020-06-13  3:57 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-13  3:55 Lijun Pan [this message]
2020-06-15 16:12 ` [PATCH v2] target/ppc: add vmsumudm vmsumcud instructions Richard Henderson
2020-06-15 20:53   ` Lijun Pan
2020-06-18 23:09     ` Richard Henderson
2020-06-19  5:10       ` Lijun Pan
2020-06-19 20:45         ` Richard Henderson

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:63e97cfe1 dfblob:3ed4d23ed dfblob:5c9890db8 dfblob:3362973cc
dfblob:2dfa1c694 dfblob:d540e8f30 dfblob:be53cd6f6 dfblob:5f257b7b8
dfblob:4ce3d664b dfblob:35ff1aa77 dfblob:403ed3a01 dfblob:5c0e44d7f
dfblob:84e05fb82 dfblob:aee23e31c )
 OR (
bs:"[PATCH v2] target/ppc: add vmsumudm vmsumcud instructions" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200613035546.22041-1-ljp@linux.ibm.com \
    --to=ljp@linux.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.