* [PULL 01/12] softmmu: Tidy dirtylimit_dirty_ring_full_time
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 02/12] accel/tcg: Uncache the host address for instruction fetch when tlb size < 1 Richard Henderson
` (10 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Philippe Mathieu-Daudé, Thomas Huth, Juan Quintela
Drop inline marker: let compiler decide.
Change return type to uint64_t: this matches the computation in the
return statement and the local variable assignment in the caller.
Rename local to dirty_ring_size_MB to fix typo.
Simplify conversion to MiB via qemu_target_page_bits and right shift.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
softmmu/dirtylimit.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index 82986c1499..71bf6dc7a4 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -232,18 +232,23 @@ bool dirtylimit_vcpu_index_valid(int cpu_index)
cpu_index >= ms->smp.max_cpus);
}
-static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
+static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
{
static uint64_t max_dirtyrate;
- uint32_t dirty_ring_size = kvm_dirty_ring_size();
- uint64_t dirty_ring_size_meory_MB =
- dirty_ring_size * qemu_target_page_size() >> 20;
+ unsigned target_page_bits = qemu_target_page_bits();
+ uint64_t dirty_ring_size_MB;
+
+ /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */
+ assert(target_page_bits < 20);
+
+ /* Convert ring size (pages) to MiB (2**20). */
+ dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits);
if (max_dirtyrate < dirtyrate) {
max_dirtyrate = dirtyrate;
}
- return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
+ return dirty_ring_size_MB * 1000000 / max_dirtyrate;
}
static inline bool dirtylimit_done(uint64_t quota,
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 02/12] accel/tcg: Uncache the host address for instruction fetch when tlb size < 1
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
2023-05-02 11:20 ` [PULL 01/12] softmmu: Tidy dirtylimit_dirty_ring_full_time Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 03/12] qemu/bitops.h: Limit rotate amounts Richard Henderson
` (9 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Weiwei Li, Junqiang Wang, LIU Zhiwei
From: Weiwei Li <liweiwei@iscas.ac.cn>
When PMP entry overlap part of the page, we'll set the tlb_size to 1, which
will make the address in tlb entry set with TLB_INVALID_MASK, and the next
access will again go through tlb_fill.However, this way will not work in
tb_gen_code() => get_page_addr_code_hostp(): the TLB host address will be
cached, and the following instructions can use this host address directly
which may lead to the bypass of PMP related check.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1542.
Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230422130329.23555-6-liweiwei@iscas.ac.cn>
---
accel/tcg/cputlb.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index e984a98dc4..efa0cb67c9 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1696,6 +1696,11 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
if (p == NULL) {
return -1;
}
+
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
+ return -1;
+ }
+
if (hostp) {
*hostp = p;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 03/12] qemu/bitops.h: Limit rotate amounts
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
2023-05-02 11:20 ` [PULL 01/12] softmmu: Tidy dirtylimit_dirty_ring_full_time Richard Henderson
2023-05-02 11:20 ` [PULL 02/12] accel/tcg: Uncache the host address for instruction fetch when tlb size < 1 Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 04/12] qemu/host-utils.h: Add clz and ctz functions for lower-bit integers Richard Henderson
` (8 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Dickon Hood
From: Dickon Hood <dickon.hood@codethink.co.uk>
Rotates have been fixed up to only allow for reasonable rotate amounts
(ie, no rotates >7 on an 8b value etc.) This fixes a problem with riscv
vector rotate instructions.
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/qemu/bitops.h | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 03213ce952..c443995b3b 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -218,7 +218,8 @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
*/
static inline uint8_t rol8(uint8_t word, unsigned int shift)
{
- return (word << shift) | (word >> ((8 - shift) & 7));
+ shift &= 7;
+ return (word << shift) | (word >> (8 - shift));
}
/**
@@ -228,7 +229,8 @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
*/
static inline uint8_t ror8(uint8_t word, unsigned int shift)
{
- return (word >> shift) | (word << ((8 - shift) & 7));
+ shift &= 7;
+ return (word >> shift) | (word << (8 - shift));
}
/**
@@ -238,7 +240,8 @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
*/
static inline uint16_t rol16(uint16_t word, unsigned int shift)
{
- return (word << shift) | (word >> ((16 - shift) & 15));
+ shift &= 15;
+ return (word << shift) | (word >> (16 - shift));
}
/**
@@ -248,7 +251,8 @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
*/
static inline uint16_t ror16(uint16_t word, unsigned int shift)
{
- return (word >> shift) | (word << ((16 - shift) & 15));
+ shift &= 15;
+ return (word >> shift) | (word << (16 - shift));
}
/**
@@ -258,7 +262,8 @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
*/
static inline uint32_t rol32(uint32_t word, unsigned int shift)
{
- return (word << shift) | (word >> ((32 - shift) & 31));
+ shift &= 31;
+ return (word << shift) | (word >> (32 - shift));
}
/**
@@ -268,7 +273,8 @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
*/
static inline uint32_t ror32(uint32_t word, unsigned int shift)
{
- return (word >> shift) | (word << ((32 - shift) & 31));
+ shift &= 31;
+ return (word >> shift) | (word << (32 - shift));
}
/**
@@ -278,7 +284,8 @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
*/
static inline uint64_t rol64(uint64_t word, unsigned int shift)
{
- return (word << shift) | (word >> ((64 - shift) & 63));
+ shift &= 63;
+ return (word << shift) | (word >> (64 - shift));
}
/**
@@ -288,7 +295,8 @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
*/
static inline uint64_t ror64(uint64_t word, unsigned int shift)
{
- return (word >> shift) | (word << ((64 - shift) & 63));
+ shift &= 63;
+ return (word >> shift) | (word << (64 - shift));
}
/**
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 04/12] qemu/host-utils.h: Add clz and ctz functions for lower-bit integers
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (2 preceding siblings ...)
2023-05-02 11:20 ` [PULL 03/12] qemu/bitops.h: Limit rotate amounts Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 05/12] tcg: Add tcg_gen_gvec_andcs Richard Henderson
` (7 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Kiran Ostrolenk
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
This is for use in the RISC-V vclz and vctz instructions (implemented in
proceeding commit).
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230428144757.57530-11-lawrence.hunter@codethink.co.uk>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 3ce62bf4a5..d3b4dce6a9 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -107,6 +107,36 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
}
#endif
+/**
+ * clz8 - count leading zeros in a 8-bit value.
+ * @val: The value to search
+ *
+ * Returns 8 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ *
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
+ * so this function subtracts off the number of prepended zeroes.
+ */
+static inline int clz8(uint8_t val)
+{
+ return val ? __builtin_clz(val) - 24 : 8;
+}
+
+/**
+ * clz16 - count leading zeros in a 16-bit value.
+ * @val: The value to search
+ *
+ * Returns 16 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ *
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
+ * so this function subtracts off the number of prepended zeroes.
+ */
+static inline int clz16(uint16_t val)
+{
+ return val ? __builtin_clz(val) - 16 : 16;
+}
+
/**
* clz32 - count leading zeros in a 32-bit value.
* @val: The value to search
@@ -153,6 +183,30 @@ static inline int clo64(uint64_t val)
return clz64(~val);
}
+/**
+ * ctz8 - count trailing zeros in a 8-bit value.
+ * @val: The value to search
+ *
+ * Returns 8 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int ctz8(uint8_t val)
+{
+ return val ? __builtin_ctz(val) : 8;
+}
+
+/**
+ * ctz16 - count trailing zeros in a 16-bit value.
+ * @val: The value to search
+ *
+ * Returns 16 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int ctz16(uint16_t val)
+{
+ return val ? __builtin_ctz(val) : 16;
+}
+
/**
* ctz32 - count trailing zeros in a 32-bit value.
* @val: The value to search
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 05/12] tcg: Add tcg_gen_gvec_andcs
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (3 preceding siblings ...)
2023-05-02 11:20 ` [PULL 04/12] qemu/host-utils.h: Add clz and ctz functions for lower-bit integers Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 06/12] tcg: Add tcg_gen_gvec_rotrs Richard Henderson
` (6 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Nazar Kazakov
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
Add tcg expander and helper functions for and-compliment
vector with scalar operand.
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
[rth: Split out of larger patch.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/tcg-runtime.h | 1 +
include/tcg/tcg-op-gvec.h | 2 ++
accel/tcg/tcg-runtime-gvec.c | 11 +++++++++++
tcg/tcg-op-gvec.c | 17 +++++++++++++++++
4 files changed, 31 insertions(+)
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index e141a6ab24..b8e6421c8a 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -217,6 +217,7 @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 28cafbcc5c..6d58683171 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -330,6 +330,8 @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index ac7d28c251..97399493d5 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -550,6 +550,17 @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
clear_high(d, oprsz, desc);
}
+void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
+ }
+ clear_high(d, oprsz, desc);
+}
+
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 047a832f44..9c14908a46 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2761,6 +2761,23 @@ void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
}
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ static GVecGen2s g = {
+ .fni8 = tcg_gen_andc_i64,
+ .fniv = tcg_gen_andc_vec,
+ .fno = gen_helper_gvec_andcs,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64
+ };
+
+ TCGv_i64 tmp = tcg_temp_ebb_new_i64();
+ tcg_gen_dup_i64(vece, tmp, c);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g);
+ tcg_temp_free_i64(tmp);
+}
+
static const GVecGen2s gop_xors = {
.fni8 = tcg_gen_xor_i64,
.fniv = tcg_gen_xor_vec,
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 06/12] tcg: Add tcg_gen_gvec_rotrs
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (4 preceding siblings ...)
2023-05-02 11:20 ` [PULL 05/12] tcg: Add tcg_gen_gvec_andcs Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 07/12] qemu/int128: Re-shuffle Int128Alias members Richard Henderson
` (5 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Nazar Kazakov
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
Add tcg expander and helper functions for rotate right
vector with scalar operand.
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
[rth: Split out of larger patch; mask rotation count.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/tcg/tcg-op-gvec.h | 2 ++
tcg/tcg-op-gvec.c | 11 +++++++++++
2 files changed, 13 insertions(+)
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index 6d58683171..a8183bfeab 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -371,6 +371,8 @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
/*
* Perform vector shift by vector element, modulo the element size.
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 9c14908a46..f51bcaa87b 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -3353,6 +3353,17 @@ void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
}
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i32 tmp = tcg_temp_ebb_new_i32();
+
+ tcg_gen_neg_i32(tmp, shift);
+ tcg_gen_andi_i32(tmp, tmp, (8 << vece) - 1);
+ tcg_gen_gvec_rotls(vece, dofs, aofs, tmp, oprsz, maxsz);
+ tcg_temp_free_i32(tmp);
+}
+
/*
* Expand D = A << (B % element bits)
*
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 07/12] qemu/int128: Re-shuffle Int128Alias members
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (5 preceding siblings ...)
2023-05-02 11:20 ` [PULL 06/12] tcg: Add tcg_gen_gvec_rotrs Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 08/12] migration/xbzrle: Use __attribute__((target)) for avx512 Richard Henderson
` (4 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Bennée
Clang 14, with --enable-tcg-interpreter errors with
include/qemu/int128.h:487:16: error: alignment of field 'i' (128 bits)
does not match the alignment of the first field in transparent union;
transparent_union attribute ignored [-Werror,-Wignored-attributes]
__int128_t i;
^
include/qemu/int128.h:486:12: note: alignment of first field is 64 bits
Int128 s;
^
1 error generated.
By placing the __uint128_t member first, this is avoided.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20230501204625.277361-1-richard.henderson@linaro.org>
---
include/qemu/int128.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index f62a46b48c..9e46cfaefc 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -483,9 +483,9 @@ static inline void bswap128s(Int128 *s)
*/
#ifdef CONFIG_INT128
typedef union {
- Int128 s;
- __int128_t i;
__uint128_t u;
+ __int128_t i;
+ Int128 s;
} Int128Alias __attribute__((transparent_union));
#else
typedef Int128 Int128Alias;
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 08/12] migration/xbzrle: Use __attribute__((target)) for avx512
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (6 preceding siblings ...)
2023-05-02 11:20 ` [PULL 07/12] qemu/int128: Re-shuffle Int128Alias members Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 09/12] accel/tcg: Add cpu_ld*_code_mmu Richard Henderson
` (3 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Juan Quintela
Use the attribute, which is supported by clang, instead of
the #pragma, which is not supported and, for some reason,
also not detected by the meson probe, so we fail by -Werror.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230501210555.289806-1-richard.henderson@linaro.org>
---
meson.build | 5 +----
migration/xbzrle.c | 9 ++++-----
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/meson.build b/meson.build
index c44d05a13f..77d42898c8 100644
--- a/meson.build
+++ b/meson.build
@@ -2370,12 +2370,9 @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
.require(cc.links('''
- #pragma GCC push_options
- #pragma GCC target("avx512bw")
#include <cpuid.h>
#include <immintrin.h>
- static int bar(void *a) {
-
+ static int __attribute__((target("avx512bw"))) bar(void *a) {
__m512i *x = a;
__m512i res= _mm512_abs_epi8(*x);
return res[1];
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
index c6f8b20917..258e4959c9 100644
--- a/migration/xbzrle.c
+++ b/migration/xbzrle.c
@@ -177,11 +177,11 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
}
#if defined(CONFIG_AVX512BW_OPT)
-#pragma GCC push_options
-#pragma GCC target("avx512bw")
#include <immintrin.h>
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
- uint8_t *dst, int dlen)
+
+int __attribute__((target("avx512bw")))
+xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+ uint8_t *dst, int dlen)
{
uint32_t zrun_len = 0, nzrun_len = 0;
int d = 0, i = 0, num = 0;
@@ -296,5 +296,4 @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
}
return d;
}
-#pragma GCC pop_options
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 09/12] accel/tcg: Add cpu_ld*_code_mmu
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (7 preceding siblings ...)
2023-05-02 11:20 ` [PULL 08/12] migration/xbzrle: Use __attribute__((target)) for avx512 Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 10/12] tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64 Richard Henderson
` (2 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Alistair Francis, Weiwei Li, Daniel Henrique Barboza
At least RISC-V has the need to be able to perform a read
using execute permissions, outside of translation.
Add helpers to facilitate this.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
Tested-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Message-Id: <20230325105429.1142530-9-richard.henderson@linaro.org>
Message-Id: <20230412114333.118895-9-richard.henderson@linaro.org>
---
include/exec/cpu_ldst.h | 9 +++++++
accel/tcg/cputlb.c | 48 ++++++++++++++++++++++++++++++++++
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 115 insertions(+)
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 09b55cc0ee..c141f0394f 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -445,6 +445,15 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
# define cpu_stq_mmu cpu_stq_le_mmu
#endif
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra);
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra);
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra);
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra);
+
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index efa0cb67c9..c8bd642d0e 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2773,3 +2773,51 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
return full_ldq_code(env, addr, oi, 0);
}
+
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ return full_ldub_code(env, addr, oi, retaddr);
+}
+
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ MemOp mop = get_memop(oi);
+ int idx = get_mmuidx(oi);
+ uint16_t ret;
+
+ ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
+ if ((mop & MO_BSWAP) != MO_TE) {
+ ret = bswap16(ret);
+ }
+ return ret;
+}
+
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ MemOp mop = get_memop(oi);
+ int idx = get_mmuidx(oi);
+ uint32_t ret;
+
+ ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
+ if ((mop & MO_BSWAP) != MO_TE) {
+ ret = bswap32(ret);
+ }
+ return ret;
+}
+
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ MemOp mop = get_memop(oi);
+ int idx = get_mmuidx(oi);
+ uint64_t ret;
+
+ ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
+ if ((mop & MO_BSWAP) != MO_TE) {
+ ret = bswap64(ret);
+ }
+ return ret;
+}
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index a7e0c3e2f4..fc597a010d 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -1219,6 +1219,64 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
return ret;
}
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+ uint8_t ret;
+
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
+ ret = ldub_p(haddr);
+ clear_helper_retaddr();
+ return ret;
+}
+
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+ uint16_t ret;
+
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
+ ret = lduw_p(haddr);
+ clear_helper_retaddr();
+ if (get_memop(oi) & MO_BSWAP) {
+ ret = bswap16(ret);
+ }
+ return ret;
+}
+
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+ uint32_t ret;
+
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
+ ret = ldl_p(haddr);
+ clear_helper_retaddr();
+ if (get_memop(oi) & MO_BSWAP) {
+ ret = bswap32(ret);
+ }
+ return ret;
+}
+
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+ uint64_t ret;
+
+ validate_memop(oi, MO_BEUQ);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+ ret = ldq_p(haddr);
+ clear_helper_retaddr();
+ if (get_memop(oi) & MO_BSWAP) {
+ ret = bswap64(ret);
+ }
+ return ret;
+}
+
#include "ldst_common.c.inc"
/*
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 10/12] tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (8 preceding siblings ...)
2023-05-02 11:20 ` [PULL 09/12] accel/tcg: Add cpu_ld*_code_mmu Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 11/12] tcg/mips: " Richard Henderson
2023-05-02 11:20 ` [PULL 12/12] tcg: Introduce tcg_out_movext2 Richard Henderson
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Philippe Mathieu-Daudé
Since TCG_TYPE_I32 values are kept sign-extended in registers,
via ".w" instructions, we need not extend if the register matches.
This is already relied upon by comparisons.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/loongarch64/tcg-target.c.inc | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 21c2fc9e98..0940788c6f 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -463,7 +463,9 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
{
- tcg_out_ext32s(s, ret, arg);
+ if (ret != arg) {
+ tcg_out_ext32s(s, ret, arg);
+ }
}
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 11/12] tcg/mips: Conditionalize tcg_out_exts_i32_i64
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (9 preceding siblings ...)
2023-05-02 11:20 ` [PULL 10/12] tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64 Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
2023-05-02 11:20 ` [PULL 12/12] tcg: Introduce tcg_out_movext2 Richard Henderson
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Philippe Mathieu-Daudé
Since TCG_TYPE_I32 values are kept sign-extended in registers, we need not
extend if the register matches. This is already relied upon by comparisons.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/mips/tcg-target.c.inc | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 346c614354..a83ebe8729 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -582,7 +582,9 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs)
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
{
- tcg_out_ext32s(s, rd, rs);
+ if (rd != rs) {
+ tcg_out_ext32s(s, rd, rs);
+ }
}
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PULL 12/12] tcg: Introduce tcg_out_movext2
2023-05-02 11:20 [PULL 00/12] tcg patch queue Richard Henderson
` (10 preceding siblings ...)
2023-05-02 11:20 ` [PULL 11/12] tcg/mips: " Richard Henderson
@ 2023-05-02 11:20 ` Richard Henderson
11 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2023-05-02 11:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Philippe Mathieu-Daudé
This is common code in most qemu_{ld,st} slow paths, moving two
registers when there may be overlap between sources and destinations.
At present, this is only used by 32-bit hosts for 64-bit data,
but will shortly be used for more than that.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tcg.c | 69 ++++++++++++++++++++++++++++++++++++---
tcg/arm/tcg-target.c.inc | 44 ++++++++++---------------
tcg/i386/tcg-target.c.inc | 19 +++++------
3 files changed, 90 insertions(+), 42 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index fde5ccc57c..cfd3262a4a 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -115,8 +115,7 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
-static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
- __attribute__((unused));
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
static void tcg_out_goto_tb(TCGContext *s, int which);
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
@@ -354,6 +353,14 @@ void tcg_raise_tb_overflow(TCGContext *s)
siglongjmp(s->jmp_trans, -2);
}
+typedef struct TCGMovExtend {
+ TCGReg dst;
+ TCGReg src;
+ TCGType dst_type;
+ TCGType src_type;
+ MemOp src_ext;
+} TCGMovExtend;
+
/**
* tcg_out_movext -- move and extend
* @s: tcg context
@@ -365,9 +372,8 @@ void tcg_raise_tb_overflow(TCGContext *s)
*
* Move or extend @src into @dst, depending on @src_ext and the types.
*/
-static void __attribute__((unused))
-tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
- TCGType src_type, MemOp src_ext, TCGReg src)
+static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
+ TCGType src_type, MemOp src_ext, TCGReg src)
{
switch (src_ext) {
case MO_UB:
@@ -417,6 +423,59 @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
}
}
+/* Minor variations on a theme, using a structure. */
+static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
+ TCGReg src)
+{
+ tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
+}
+
+static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
+{
+ tcg_out_movext1_new_src(s, i, i->src);
+}
+
+/**
+ * tcg_out_movext2 -- move and extend two pair
+ * @s: tcg context
+ * @i1: first move description
+ * @i2: second move description
+ * @scratch: temporary register, or -1 for none
+ *
+ * As tcg_out_movext, for both @i1 and @i2, caring for overlap
+ * between the sources and destinations.
+ */
+
+static void __attribute__((unused))
+tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
+ const TCGMovExtend *i2, int scratch)
+{
+ TCGReg src1 = i1->src;
+ TCGReg src2 = i2->src;
+
+ if (i1->dst != src2) {
+ tcg_out_movext1(s, i1);
+ tcg_out_movext1(s, i2);
+ return;
+ }
+ if (i2->dst == src1) {
+ TCGType src1_type = i1->src_type;
+ TCGType src2_type = i2->src_type;
+
+ if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
+ /* The data is now in the correct registers, now extend. */
+ src1 = i2->src;
+ src2 = i1->src;
+ } else {
+ tcg_debug_assert(scratch >= 0);
+ tcg_out_mov(s, src1_type, scratch, src1);
+ src1 = scratch;
+ }
+ }
+ tcg_out_movext1_new_src(s, i2, src2);
+ tcg_out_movext1_new_src(s, i1, src1);
+}
+
#define C_PFX1(P, A) P##A
#define C_PFX2(P, A, B) P##A##_##B
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 8d769ca0a2..83c818a58b 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1545,7 +1545,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- TCGReg argreg, datalo, datahi;
+ TCGReg argreg;
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
@@ -1565,22 +1565,16 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
/* Use the canonical unsigned helpers and minimize icache usage. */
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
- datalo = lb->datalo_reg;
- datahi = lb->datahi_reg;
if ((opc & MO_SIZE) == MO_64) {
- if (datalo != TCG_REG_R1) {
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
- } else if (datahi != TCG_REG_R0) {
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
- } else {
- tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
- }
+ TCGMovExtend ext[2] = {
+ { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
+ .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
+ { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
+ .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
+ };
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
} else {
- tcg_out_movext(s, TCG_TYPE_I32, datalo,
+ tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
}
@@ -1663,17 +1657,15 @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
if (TARGET_LONG_BITS == 64) {
/* 64-bit target address is aligned into R2:R3. */
- if (l->addrhi_reg != TCG_REG_R2) {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
- } else if (l->addrlo_reg != TCG_REG_R3) {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
- } else {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
- }
+ TCGMovExtend ext[2] = {
+ { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
+ .src = l->addrlo_reg,
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
+ { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
+ .src = l->addrhi_reg,
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
+ };
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
} else {
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
}
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index c8e2bf537f..caf91a3151 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1914,7 +1914,6 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
- TCGReg data_reg;
tcg_insn_unit **label_ptr = &l->label_ptr[0];
/* resolve label address */
@@ -1951,18 +1950,16 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
- data_reg = l->datalo_reg;
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
- if (data_reg == TCG_REG_EDX) {
- /* xchg %edx, %eax */
- tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
- } else {
- tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
- }
+ TCGMovExtend ext[2] = {
+ { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
+ .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
+ { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
+ .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
+ };
+ tcg_out_movext2(s, &ext[0], &ext[1], -1);
} else {
- tcg_out_movext(s, l->type, data_reg,
+ tcg_out_movext(s, l->type, l->datalo_reg,
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 13+ messages in thread