qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL v3 29/38] target/i386: Use i128 for 128 and 256-bit loads and stores
Date: Mon, 23 Oct 2023 11:13:20 -0700	[thread overview]
Message-ID: <20231023181329.171490-30-richard.henderson@linaro.org> (raw)
In-Reply-To: <20231023181329.171490-1-richard.henderson@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/tcg/translate.c | 63 +++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 34 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 4f6f9fa7e5..18d06ab247 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2918,59 +2918,54 @@ static inline void gen_stq_env_A0(DisasContext *s, int offset)
 
 static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align)
 {
+    MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX
+                  ? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR);
+    MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0);
     int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
-                        MO_LEUQ | (align ? MO_ALIGN_16 : 0));
-    tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0)));
-    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
-    tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1)));
+    TCGv_i128 t = tcg_temp_new_i128();
+
+    tcg_gen_qemu_ld_i128(t, s->A0, mem_index, mop);
+    tcg_gen_st_i128(t, tcg_env, offset);
 }
 
 static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align)
 {
+    MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX
+                  ? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR);
+    MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0);
     int mem_index = s->mem_index;
-    tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0)));
-    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
-                        MO_LEUQ | (align ? MO_ALIGN_16 : 0));
-    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1)));
-    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
+    TCGv_i128 t = tcg_temp_new_i128();
+
+    tcg_gen_ld_i128(t, tcg_env, offset);
+    tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop);
 }
 
 static void gen_ldy_env_A0(DisasContext *s, int offset, bool align)
 {
+    MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
     int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
-                        MO_LEUQ | (align ? MO_ALIGN_32 : 0));
-    tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(0)));
-    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
-    tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(1)));
+    TCGv_i128 t0 = tcg_temp_new_i128();
+    TCGv_i128 t1 = tcg_temp_new_i128();
 
+    tcg_gen_qemu_ld_i128(t0, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
     tcg_gen_addi_tl(s->tmp0, s->A0, 16);
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
-    tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(2)));
-    tcg_gen_addi_tl(s->tmp0, s->A0, 24);
-    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
-    tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(3)));
+    tcg_gen_qemu_ld_i128(t1, s->tmp0, mem_index, mop);
+
+    tcg_gen_st_i128(t0, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
+    tcg_gen_st_i128(t1, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
 }
 
 static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
 {
+    MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
     int mem_index = s->mem_index;
-    tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(0)));
-    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
-                        MO_LEUQ | (align ? MO_ALIGN_32 : 0));
-    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(1)));
-    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
+    TCGv_i128 t = tcg_temp_new_i128();
+
+    tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
+    tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
     tcg_gen_addi_tl(s->tmp0, s->A0, 16);
-    tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(2)));
-    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
-    tcg_gen_addi_tl(s->tmp0, s->A0, 24);
-    tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(3)));
-    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
+    tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
+    tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
 }
 
 #include "decode-new.h"
-- 
2.34.1



  parent reply	other threads:[~2023-10-23 18:15 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-23 18:12 [PULL v3 00/38] tcg patch queue Richard Henderson
2023-10-23 18:12 ` [PULL v3 01/38] tcg/ppc: Untabify tcg-target.c.inc Richard Henderson
2023-10-23 18:12 ` [PULL v3 02/38] tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB Richard Henderson
2023-10-23 18:12 ` [PULL v3 03/38] tcg/ppc: Reinterpret tb-relative to TB+4 Richard Henderson
2023-10-23 18:12 ` [PULL v3 04/38] tcg/ppc: Use ADDPCIS in tcg_out_tb_start Richard Henderson
2023-10-23 18:12 ` [PULL v3 05/38] tcg/ppc: Use ADDPCIS in tcg_out_movi_int Richard Henderson
2023-10-23 18:12 ` [PULL v3 06/38] tcg/ppc: Use ADDPCIS for the constant pool Richard Henderson
2023-10-23 18:12 ` [PULL v3 07/38] tcg/ppc: Use ADDPCIS in tcg_out_goto_tb Richard Henderson
2023-10-23 18:12 ` [PULL v3 08/38] tcg/ppc: Use PADDI in tcg_out_movi Richard Henderson
2023-10-23 18:13 ` [PULL v3 09/38] tcg/ppc: Use prefixed instructions in tcg_out_mem_long Richard Henderson
2023-10-23 18:13 ` [PULL v3 10/38] tcg/ppc: Use PLD in tcg_out_movi for constant pool Richard Henderson
2023-10-23 18:13 ` [PULL v3 11/38] tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec Richard Henderson
2023-10-23 18:13 ` [PULL v3 12/38] tcg/ppc: Use PLD in tcg_out_goto_tb Richard Henderson
2023-10-23 18:13 ` [PULL v3 13/38] tcg/ppc: Disable TCG_REG_TB for Power9/Power10 Richard Henderson
2023-10-23 18:13 ` [PULL v3 14/38] tcg: Introduce tcg_use_softmmu Richard Henderson
2023-10-23 18:13 ` [PULL v3 15/38] tcg: Provide guest_base fallback for system mode Richard Henderson
2023-10-23 18:13 ` [PULL v3 16/38] tcg/arm: Use tcg_use_softmmu Richard Henderson
2023-10-23 18:13 ` [PULL v3 17/38] tcg/aarch64: " Richard Henderson
2023-10-23 18:13 ` [PULL v3 18/38] tcg/i386: " Richard Henderson
2023-10-23 18:13 ` [PULL v3 19/38] tcg/loongarch64: " Richard Henderson
2023-10-23 18:13 ` [PULL v3 20/38] tcg/mips: " Richard Henderson
2023-10-23 18:13 ` [PULL v3 21/38] tcg/ppc: " Richard Henderson
2023-10-23 18:13 ` [PULL v3 22/38] tcg/riscv: Do not reserve TCG_GUEST_BASE_REG for guest_base zero Richard Henderson
2023-10-23 18:13 ` [PULL v3 23/38] tcg/riscv: Use tcg_use_softmmu Richard Henderson
2023-10-23 18:13 ` [PULL v3 24/38] tcg/s390x: " Richard Henderson
2023-10-23 18:13 ` [PULL v3 25/38] tcg: drop unused tcg_temp_free define Richard Henderson
2023-10-23 18:13 ` [PULL v3 26/38] tcg: Use constant zero when expanding with divu2 Richard Henderson
2023-10-23 18:13 ` [PULL v3 27/38] tcg: Optimize past conditional branches Richard Henderson
2023-10-23 18:13 ` [PULL v3 28/38] tcg: Add tcg_gen_{ld,st}_i128 Richard Henderson
2023-10-23 18:13 ` Richard Henderson [this message]
2023-10-23 18:13 ` [PULL v3 30/38] tcg: add negsetcondi Richard Henderson
2023-10-23 18:13 ` [PULL v3 31/38] tcg: Export tcg_gen_ext_{i32,i64,tl} Richard Henderson
2023-10-23 18:13 ` [PULL v3 32/38] tcg: Define MO_TL Richard Henderson
2023-10-23 18:13 ` [PULL v3 33/38] target/arm: Use tcg_gen_ext_i64 Richard Henderson
2023-10-23 18:13 ` [PULL v3 34/38] target/i386: Use tcg_gen_ext_tl Richard Henderson
2023-10-23 18:13 ` [PULL v3 35/38] target/m68k: Use tcg_gen_ext_i32 Richard Henderson
2023-10-23 18:13 ` [PULL v3 36/38] target/rx: " Richard Henderson
2023-10-23 18:13 ` [PULL v3 37/38] target/tricore: Use tcg_gen_*extract_tl Richard Henderson
2023-10-23 18:13 ` [PULL v3 38/38] target/xtensa: Use tcg_gen_sextract_i32 Richard Henderson
2023-10-24  1:15 ` [PULL v3 00/38] tcg patch queue Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231023181329.171490-30-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).