[PULL 22/44] target/mips/mxu: Add Q8ADDE Q8ACCE D8SUM D8SUMC instructions

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: "Philippe Mathieu-Daudé" <philmd@linaro.org>
To: qemu-devel@nongnu.org
Cc: "Siarhei Volkau" <lis8215@gmail.com>,
	"Huacai Chen" <chenhuacai@kernel.org>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	"Jiaxun Yang" <jiaxun.yang@flygoat.com>
Subject: [PULL 22/44] target/mips/mxu: Add Q8ADDE Q8ACCE D8SUM D8SUMC instructions
Date: Tue, 11 Jul 2023 00:25:49 +0200	[thread overview]
Message-ID: <20230710222611.50978-23-philmd@linaro.org> (raw)
In-Reply-To: <20230710222611.50978-1-philmd@linaro.org>

From: Siarhei Volkau <lis8215@gmail.com>

These instructions are all dual 8-bit addition/subtraction in
various combinations. Most instructions are grouped in pool14,
see the opcode organization in the file.

Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Message-Id: <20230608104222.1520143-20-lis8215@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
 target/mips/tcg/mxu_translate.c | 200 ++++++++++++++++++++++++++++++++
 1 file changed, 200 insertions(+)

diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
index 18188208b7..5556fd152d 100644
--- a/target/mips/tcg/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -379,6 +379,8 @@ enum {
     OPC_MXU_D32ADD   = 0x18,
     OPC_MXU__POOL12  = 0x19,
     OPC_MXU__POOL13  = 0x1B,
+    OPC_MXU__POOL14  = 0x1C,
+    OPC_MXU_Q8ACCE   = 0x1D,
     OPC_MXU_S8LDD    = 0x22,
     OPC_MXU__POOL16  = 0x27,
     OPC_MXU__POOL17  = 0x28,
@@ -459,6 +461,15 @@ enum {
     OPC_MXU_D16ASUM   = 0x02,
 };
 
+/*
+ * MXU pool 14
+ */
+enum {
+    OPC_MXU_Q8ADDE    = 0x00,
+    OPC_MXU_D8SUM     = 0x01,
+    OPC_MXU_D8SUMC    = 0x02,
+};
+
 /*
  * MXU pool 16
  */
@@ -2183,6 +2194,168 @@ static void gen_mxu_Q8ADD(DisasContext *ctx)
     }
 }
 
+/*
+ *  Q8ADDE XRa, XRb, XRc, XRd, aptn2
+ *    Add/subtract quadruple of 8-bit packed in XRb
+ *    to another one in XRc, with zero extending
+ *    to 16-bit and put results as packed 16-bit data
+ *    into XRa and XRd.
+ *    aptn2 manages action add or subract of pairs of data.
+ *
+ *  Q8ACCE XRa, XRb, XRc, XRd, aptn2
+ *    Add/subtract quadruple of 8-bit packed in XRb
+ *    to another one in XRc, with zero extending
+ *    to 16-bit and accumulate results as packed 16-bit data
+ *    into XRa and XRd.
+ *    aptn2 manages action add or subract of pairs of data.
+ */
+static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
+{
+    uint32_t aptn2, XRd, XRc, XRb, XRa;
+
+    aptn2 = extract32(ctx->opcode, 24, 2);
+    XRd   = extract32(ctx->opcode, 18, 4);
+    XRc   = extract32(ctx->opcode, 14, 4);
+    XRb   = extract32(ctx->opcode, 10, 4);
+    XRa   = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        if (XRa != 0) {
+            tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+        }
+        if (XRd != 0) {
+            tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
+        }
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+        TCGv t5 = tcg_temp_new();
+
+        if (XRa != 0) {
+            tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8);
+            tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8);
+            tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8);
+            tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
+            if (aptn2 & 2) {
+                tcg_gen_sub_tl(t0, t0, t1);
+                tcg_gen_sub_tl(t2, t2, t3);
+            } else {
+                tcg_gen_add_tl(t0, t0, t1);
+                tcg_gen_add_tl(t2, t2, t3);
+            }
+            if (accumulate) {
+                gen_load_mxu_gpr(t5, XRa);
+                tcg_gen_extract_tl(t1, t5,  0, 16);
+                tcg_gen_extract_tl(t3, t5, 16, 16);
+                tcg_gen_add_tl(t0, t0, t1);
+                tcg_gen_add_tl(t2, t2, t3);
+            }
+            tcg_gen_shli_tl(t2, t2, 16);
+            tcg_gen_extract_tl(t0, t0, 0, 16);
+            tcg_gen_or_tl(t4, t2, t0);
+        }
+        if (XRd != 0) {
+            tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
+            tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8);
+            tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8);
+            tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8);
+            if (aptn2 & 1) {
+                tcg_gen_sub_tl(t0, t0, t1);
+                tcg_gen_sub_tl(t2, t2, t3);
+            } else {
+                tcg_gen_add_tl(t0, t0, t1);
+                tcg_gen_add_tl(t2, t2, t3);
+            }
+            if (accumulate) {
+                gen_load_mxu_gpr(t5, XRd);
+                tcg_gen_extract_tl(t1, t5,  0, 16);
+                tcg_gen_extract_tl(t3, t5, 16, 16);
+                tcg_gen_add_tl(t0, t0, t1);
+                tcg_gen_add_tl(t2, t2, t3);
+            }
+            tcg_gen_shli_tl(t2, t2, 16);
+            tcg_gen_extract_tl(t0, t0, 0, 16);
+            tcg_gen_or_tl(t5, t2, t0);
+        }
+
+        gen_store_mxu_gpr(t4, XRa);
+        gen_store_mxu_gpr(t5, XRd);
+    }
+}
+
+/*
+ *  D8SUM XRa, XRb, XRc
+ *    Double parallel add of quadruple unsigned 8-bit together
+ *    with zero extending to 16-bit data.
+ *  D8SUMC XRa, XRb, XRc
+ *    Double parallel add of quadruple unsigned 8-bit together
+ *    with zero extending to 16-bit data and adding 2 to each
+ *    parallel result.
+ */
+static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
+{
+    uint32_t pad, pad2, XRc, XRb, XRa;
+
+    pad  = extract32(ctx->opcode, 24, 2);
+    pad2 = extract32(ctx->opcode, 18, 4);
+    XRc  = extract32(ctx->opcode, 14, 4);
+    XRb  = extract32(ctx->opcode, 10, 4);
+    XRa  = extract32(ctx->opcode,  6, 4);
+
+    if (unlikely(pad != 0 || pad2 != 0)) {
+        /* opcode padding incorrect -> do nothing */
+    } else if (unlikely(XRa == 0)) {
+        /* destination is zero register -> do nothing */
+    } else if (unlikely((XRb == 0) && (XRc == 0))) {
+        /* both operands zero registers -> just set destination to zero */
+        tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+    } else {
+        /* the most general case */
+        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
+        TCGv t2 = tcg_temp_new();
+        TCGv t3 = tcg_temp_new();
+        TCGv t4 = tcg_temp_new();
+        TCGv t5 = tcg_temp_new();
+
+        if (XRb != 0) {
+            tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1],  0, 8);
+            tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1],  8, 8);
+            tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
+            tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
+            tcg_gen_add_tl(t4, t0, t1);
+            tcg_gen_add_tl(t4, t4, t2);
+            tcg_gen_add_tl(t4, t4, t3);
+        } else {
+            tcg_gen_mov_tl(t4, 0);
+        }
+        if (XRc != 0) {
+            tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1],  0, 8);
+            tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1],  8, 8);
+            tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
+            tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
+            tcg_gen_add_tl(t5, t0, t1);
+            tcg_gen_add_tl(t5, t5, t2);
+            tcg_gen_add_tl(t5, t5, t3);
+        } else {
+            tcg_gen_mov_tl(t5, 0);
+        }
+
+        if (sumc) {
+            tcg_gen_addi_tl(t4, t4, 2);
+            tcg_gen_addi_tl(t5, t5, 2);
+        }
+        tcg_gen_shli_tl(t4, t4, 16);
+
+        tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
+    }
+}
+
 /*
  * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
  * 16-bit pattern addition.
@@ -3335,6 +3508,27 @@ static void decode_opc_mxu__pool13(DisasContext *ctx)
     }
 }
 
+static void decode_opc_mxu__pool14(DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q8ADDE:
+        gen_mxu_q8adde(ctx, false);
+        break;
+    case OPC_MXU_D8SUM:
+        gen_mxu_d8sum(ctx, false);
+        break;
+    case OPC_MXU_D8SUMC:
+        gen_mxu_d8sum(ctx, true);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
 static void decode_opc_mxu__pool16(DisasContext *ctx)
 {
     uint32_t opcode = extract32(ctx->opcode, 18, 3);
@@ -3506,6 +3700,12 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
         case OPC_MXU__POOL13:
             decode_opc_mxu__pool13(ctx);
             break;
+        case OPC_MXU__POOL14:
+            decode_opc_mxu__pool14(ctx);
+            break;
+        case OPC_MXU_Q8ACCE:
+            gen_mxu_q8adde(ctx, true);
+            break;
         case OPC_MXU_S8LDD:
             gen_mxu_s8ldd(ctx);
             break;
-- 
2.38.1

next prev parent reply	other threads:[~2023-07-10 22:31 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-10 22:25 [PULL 00/44] MIPS patches for 2023-07-10 Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 01/44] target/mips: Rework cp0_timer with clock API Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 02/44] target/mips: Implement Loongson CSR instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 03/44] hw/mips/loongson3_virt: Relax CPU restrictions for TCG Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 04/44] target/mips: Add emulation of MXU instructions for 32-bit load/store Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 05/44] target/mips: Add support of two XBurst CPUs Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 06/44] target/mips/mxu: Add LXW LXB LXH LXBU LXHU instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 07/44] target/mips/mxu: Add S32MADD/MADDU/MSUB/MSUBU instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 08/44] target/mips/mxu: Add Q8SLT Q8SLTU instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 09/44] target/mips/mxu: Fix D16MAX D16MIN Q8MAX Q8MIN instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 10/44] target/mips/mxu: Add S32SLT D16SLT D16AVG[R] Q8AVG[R] insns Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 11/44] target/mips/mxu: Add Q8ADD instruction Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 12/44] target/mips/mxu: Add S32CPS D16CPS Q8ABD Q16SAT insns Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 13/44] target/mips/mxu: Add D16MULF D16MULE instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 14/44] target/mips/mxu: Add D16MACF D16MACE instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 15/44] target/mips/mxu: Add D16MADL instruction Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 16/44] target/mips/mxu: Add S16MAD instruction Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 17/44] target/mips/mxu: Add Q16ADD instruction Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 18/44] target/mips/mxu: Add D32ADD instruction Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 19/44] target/mips/mxu: Add D32ACC D32ACCM D32ASUM instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 20/44] target/mips/mxu: Add D32ADDC instruction Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 21/44] target/mips/mxu: Add Q16ACC Q16ACCM D16ASUM instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` Philippe Mathieu-Daudé [this message]
2023-07-10 22:25 ` [PULL 23/44] target/mips/mxu: Add S8STD S8LDI S8SDI instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 24/44] target/mips/mxu: Add S16LDD S16STD S16LDI S16SDI instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 25/44] target/mips/mxu: Add S32MUL S32MULU S32EXTR S32EXTRV insns Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 26/44] target/mips/mxu: Add S32ALN S32LUI insns Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 27/44] target/mips/mxu: Add D32SARL D32SARW instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 28/44] target/mips/mxu: Add D32SLL D32SLR D32SAR instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 29/44] target/mips/mxu: Add Q16SLL Q16SLR Q16SAR instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 30/44] target/mips/mxu: Add D32/Q16- SLLV/SLRV/SARV instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 31/44] target/mips/mxu: Add S32/D16/Q8- MOVZ/MOVN instructions Philippe Mathieu-Daudé
2023-07-10 22:25 ` [PULL 32/44] target/mips/mxu: Add Q8MAC Q8MACSU instructions Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 33/44] target/mips/mxu: Add Q16SCOP instruction Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 34/44] target/mips/mxu: Add Q8MADL instruction Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 35/44] target/mips/mxu: Add S32SFL instruction Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 36/44] target/mips/mxu: Add Q8SAD instruction Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 37/44] target/mips: enable GINVx support for I6400 and I6500 Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 38/44] hw/ide/pci: Expose legacy interrupts as named GPIOs Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 39/44] hw/ide/via: Wire up IDE legacy interrupts in host device Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 40/44] hw/isa/vt82c686: Remove via_isa_set_irq() Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 41/44] hw/ide: Extract IDEBus assignment into bmdma_init() Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 42/44] hw/ide: Extract bmdma_status_writeb() Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 43/44] hw/ide/pci: Replace some magic numbers by constants Philippe Mathieu-Daudé
2023-07-10 22:26 ` [PULL 44/44] hw/ide/piix: Move registration of VMStateDescription to DeviceClass Philippe Mathieu-Daudé
2023-07-11  8:32 ` [PULL 00/44] MIPS patches for 2023-07-10 Richard Henderson

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:18188208b dfblob:5556fd152 )
 OR (
bs:"[PULL 22/44] target/mips/mxu: Add Q8ADDE Q8ACCE D8SUM D8SUMC instructions" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230710222611.50978-23-philmd@linaro.org \
    --to=philmd@linaro.org \
    --cc=chenhuacai@kernel.org \
    --cc=jiaxun.yang@flygoat.com \
    --cc=lis8215@gmail.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).