* [Qemu-devel] [PATCH 1/6] target/ppc: implement complete set of Vsr* macros
2018-12-23 11:38 [Qemu-devel] [PATCH 0/6] target/ppc: remove various endian hacks from int_helper.c Mark Cave-Ayland
@ 2018-12-23 11:38 ` Mark Cave-Ayland
2018-12-25 20:07 ` Richard Henderson
2018-12-23 11:38 ` [Qemu-devel] [PATCH 2/6] target/ppc: rework vmrg{l, h}{b, h, w} instructions to use " Mark Cave-Ayland
` (4 subsequent siblings)
5 siblings, 1 reply; 12+ messages in thread
From: Mark Cave-Ayland @ 2018-12-23 11:38 UTC (permalink / raw)
To: qemu-devel, qemu-ppc, richard.henderson, david
This prepares us for eliminating the use of direct array access within the VMX
instruction implementations.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
target/ppc/internal.h | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index c7c0f77dd6..f26a71ffcf 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -206,16 +206,23 @@ EXTRACT_HELPER_SPLIT_3(DCMX_XV, 5, 16, 0, 1, 2, 5, 1, 6, 6);
#if defined(HOST_WORDS_BIGENDIAN)
#define VsrB(i) u8[i]
+#define VsrSB(i) s8[i]
#define VsrH(i) u16[i]
+#define VsrSH(i) s16[i]
#define VsrW(i) u32[i]
+#define VsrSW(i) s32[i]
#define VsrD(i) u64[i]
+#define VsrSD(i) s64[i]
#else
#define VsrB(i) u8[15 - (i)]
+#define VsrSB(i) s8[15 - (i)]
#define VsrH(i) u16[7 - (i)]
+#define VsrSH(i) s16[7 - (i)]
#define VsrW(i) u32[3 - (i)]
+#define VsrSW(i) s32[3 - (i)]
#define VsrD(i) u64[1 - (i)]
+#define VsrSD(i) s64[1 - (i)]
#endif
-
static inline void getVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
{
vsr->VsrD(0) = env->vsr[n].u64[0];
--
2.11.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 2/6] target/ppc: rework vmrg{l, h}{b, h, w} instructions to use Vsr* macros
2018-12-23 11:38 [Qemu-devel] [PATCH 0/6] target/ppc: remove various endian hacks from int_helper.c Mark Cave-Ayland
2018-12-23 11:38 ` [Qemu-devel] [PATCH 1/6] target/ppc: implement complete set of Vsr* macros Mark Cave-Ayland
@ 2018-12-23 11:38 ` Mark Cave-Ayland
2018-12-23 11:38 ` [Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, " Mark Cave-Ayland
` (3 subsequent siblings)
5 siblings, 0 replies; 12+ messages in thread
From: Mark Cave-Ayland @ 2018-12-23 11:38 UTC (permalink / raw)
To: qemu-devel, qemu-ppc, richard.henderson, david
The current implementations make use of the endian-specific macros MRGLO/MRGHI
and also reference HI_IDX and LO_IDX directly to calculate array offsets.
Rework the implementation to use the Vsr* macros so that these per-endian
references can be removed.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
target/ppc/int_helper.c | 52 ++++++++++++++++++++++++-------------------------
1 file changed, 25 insertions(+), 27 deletions(-)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 598731d47a..f084a706ee 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -976,43 +976,41 @@ void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
}
}
-#define VMRG_DO(name, element, highp) \
+#define VMRG_DOLO(name, element, access) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
ppc_avr_t result; \
int i; \
- size_t n_elems = ARRAY_SIZE(r->element); \
+ int m = ARRAY_SIZE(r->element) - 1; \
\
- for (i = 0; i < n_elems / 2; i++) { \
- if (highp) { \
- result.element[i*2+HI_IDX] = a->element[i]; \
- result.element[i*2+LO_IDX] = b->element[i]; \
- } else { \
- result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
- b->element[n_elems - i - 1]; \
- result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
- a->element[n_elems - i - 1]; \
- } \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ result.access(m - i) = (i & 1) ? a->access(m - (i >> 1)) \
+ : b->access(m - (i >> 1)); \
} \
*r = result; \
}
-#if defined(HOST_WORDS_BIGENDIAN)
-#define MRGHI 0
-#define MRGLO 1
-#else
-#define MRGHI 1
-#define MRGLO 0
-#endif
-#define VMRG(suffix, element) \
- VMRG_DO(mrgl##suffix, element, MRGHI) \
- VMRG_DO(mrgh##suffix, element, MRGLO)
-VMRG(b, u8)
-VMRG(h, u16)
-VMRG(w, u32)
+
+#define VMRG_DOHI(name, element, access) \
+ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
+ { \
+ ppc_avr_t result; \
+ int i; \
+ \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ result.access(i) = (i & 1) ? b->access(i >> 1) \
+ : a->access(i >> 1); \
+ } \
+ *r = result; \
+ }
+
+#define VMRG(suffix, element, access) \
+ VMRG_DOLO(mrgl##suffix, element, access) \
+ VMRG_DOHI(mrgh##suffix, element, access)
+VMRG(b, u8, VsrB)
+VMRG(h, u16, VsrH)
+VMRG(w, u32, VsrW)
#undef VMRG_DO
#undef VMRG
-#undef MRGHI
-#undef MRGLO
void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
ppc_avr_t *b, ppc_avr_t *c)
--
2.11.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, h, w} instructions to use Vsr* macros
2018-12-23 11:38 [Qemu-devel] [PATCH 0/6] target/ppc: remove various endian hacks from int_helper.c Mark Cave-Ayland
2018-12-23 11:38 ` [Qemu-devel] [PATCH 1/6] target/ppc: implement complete set of Vsr* macros Mark Cave-Ayland
2018-12-23 11:38 ` [Qemu-devel] [PATCH 2/6] target/ppc: rework vmrg{l, h}{b, h, w} instructions to use " Mark Cave-Ayland
@ 2018-12-23 11:38 ` Mark Cave-Ayland
2018-12-25 20:11 ` Richard Henderson
2018-12-23 11:38 ` [Qemu-devel] [PATCH 4/6] target/ppc: eliminate use of HI_IDX and LO_IDX macros from int_helper.c Mark Cave-Ayland
` (2 subsequent siblings)
5 siblings, 1 reply; 12+ messages in thread
From: Mark Cave-Ayland @ 2018-12-23 11:38 UTC (permalink / raw)
To: qemu-devel, qemu-ppc, richard.henderson, david
The current implementations make use of the endian-specific macros HI_IDX and
LO_IDX directly to calculate array offsets.
Rework the implementation to use the Vsr* macros so that these per-endian
references can be removed.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
target/ppc/int_helper.c | 48 +++++++++++++++++++++++++++---------------------
1 file changed, 27 insertions(+), 21 deletions(-)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index f084a706ee..dc5c9fb8d8 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1118,33 +1118,39 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
}
}
-#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
+#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
int i; \
\
- VECTOR_FOR_INORDER_I(i, prod_element) { \
- if (evenp) { \
- r->prod_element[i] = \
- (cast)a->mul_element[i * 2 + HI_IDX] * \
- (cast)b->mul_element[i * 2 + HI_IDX]; \
- } else { \
- r->prod_element[i] = \
- (cast)a->mul_element[i * 2 + LO_IDX] * \
- (cast)b->mul_element[i * 2 + LO_IDX]; \
- } \
+ for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
+ r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
+ (cast)b->mul_access(i); \
+ } \
+ }
+
+#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
+ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
+ { \
+ int i; \
+ \
+ for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
+ r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
+ (cast)b->mul_access(i + 1); \
} \
}
-#define VMUL(suffix, mul_element, prod_element, cast) \
- VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
- VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
-VMUL(sb, s8, s16, int16_t)
-VMUL(sh, s16, s32, int32_t)
-VMUL(sw, s32, s64, int64_t)
-VMUL(ub, u8, u16, uint16_t)
-VMUL(uh, u16, u32, uint32_t)
-VMUL(uw, u32, u64, uint64_t)
-#undef VMUL_DO
+
+#define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
+ VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
+ VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
+VMUL(sb, s8, VsrSB, VsrSH, int16_t)
+VMUL(sh, s16, VsrSH, VsrSW, int32_t)
+VMUL(sw, s32, VsrSW, VsrSD, int64_t)
+VMUL(ub, u8, VsrB, VsrH, uint16_t)
+VMUL(uh, u16, VsrH, VsrW, uint32_t)
+VMUL(uw, u32, VsrW, VsrD, uint64_t)
+#undef VMUL_DO_EVN
+#undef VMUL_DO_ODD
#undef VMUL
void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
--
2.11.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, h, w} instructions to use Vsr* macros
2018-12-23 11:38 ` [Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, " Mark Cave-Ayland
@ 2018-12-25 20:11 ` Richard Henderson
2018-12-28 13:46 ` Mark Cave-Ayland
0 siblings, 1 reply; 12+ messages in thread
From: Richard Henderson @ 2018-12-25 20:11 UTC (permalink / raw)
To: Mark Cave-Ayland, qemu-devel, qemu-ppc, david
On 12/23/18 10:38 PM, Mark Cave-Ayland wrote:
> -#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
> +#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
> { \
> int i; \
> \
> + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
> + r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
> + (cast)b->mul_access(i); \
> + } \
> + }
> +
> +#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
> + void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
> + { \
> + int i; \
> + \
> + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
> + r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
> + (cast)b->mul_access(i + 1); \
> } \
> }
FWIW,
for (i = odd; i < ARRAY_SIZE; i += 2) {
r->pacc(i >> 1) = (cast)a->macc(i) * b->macc(i);
}
is sufficient to unify these two. But what you have isn't wrong.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, h, w} instructions to use Vsr* macros
2018-12-25 20:11 ` Richard Henderson
@ 2018-12-28 13:46 ` Mark Cave-Ayland
0 siblings, 0 replies; 12+ messages in thread
From: Mark Cave-Ayland @ 2018-12-28 13:46 UTC (permalink / raw)
To: Richard Henderson, qemu-devel, qemu-ppc, david
On 25/12/2018 20:11, Richard Henderson wrote:
> On 12/23/18 10:38 PM, Mark Cave-Ayland wrote:
>> -#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
>> +#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
>> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
>> { \
>> int i; \
>> \
>> + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
>> + r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
>> + (cast)b->mul_access(i); \
>> + } \
>> + }
>> +
>> +#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
>> + void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
>> + { \
>> + int i; \
>> + \
>> + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
>> + r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
>> + (cast)b->mul_access(i + 1); \
>> } \
>> }
>
> FWIW,
>
> for (i = odd; i < ARRAY_SIZE; i += 2) {
> r->pacc(i >> 1) = (cast)a->macc(i) * b->macc(i);
> }
>
> is sufficient to unify these two. But what you have isn't wrong.
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Ah indeed that's quite neat! Thinking about it for a few days, I've decided to leave
it as-is for now, since it was useful to be able to test the odd/even variants
separately (as they were often used together in testing) and it's just that tiny bit
easier to relate to the ISA documentation.
ATB,
Mark.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 4/6] target/ppc: eliminate use of HI_IDX and LO_IDX macros from int_helper.c
2018-12-23 11:38 [Qemu-devel] [PATCH 0/6] target/ppc: remove various endian hacks from int_helper.c Mark Cave-Ayland
` (2 preceding siblings ...)
2018-12-23 11:38 ` [Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, " Mark Cave-Ayland
@ 2018-12-23 11:38 ` Mark Cave-Ayland
2018-12-23 11:38 ` [Qemu-devel] [PATCH 5/6] target/ppc: eliminate use of EL_IDX " Mark Cave-Ayland
2018-12-23 11:38 ` [Qemu-devel] [PATCH 6/6] target/ppc: remove various HOST_WORDS_BIGENDIAN hacks in int_helper.c Mark Cave-Ayland
5 siblings, 0 replies; 12+ messages in thread
From: Mark Cave-Ayland @ 2018-12-23 11:38 UTC (permalink / raw)
To: qemu-devel, qemu-ppc, richard.henderson, david
The original purpose of these macros was to correctly reference the high and low
parts of the VSRs regardless of the host endianness.
Replace these direct references to high and low parts with the relevant VsrD
macro instead, and completely remove the now-unused HI_IDX and LO_IDX macros.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
target/ppc/int_helper.c | 180 +++++++++++++++++++++++-------------------------
1 file changed, 85 insertions(+), 95 deletions(-)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index dc5c9fb8d8..addbe54c21 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -389,14 +389,6 @@ target_ulong helper_602_mfrom(target_ulong arg)
/*****************************************************************************/
/* Altivec extension helpers */
#if defined(HOST_WORDS_BIGENDIAN)
-#define HI_IDX 0
-#define LO_IDX 1
-#else
-#define HI_IDX 1
-#define LO_IDX 0
-#endif
-
-#if defined(HOST_WORDS_BIGENDIAN)
#define VECTOR_FOR_INORDER_I(index, element) \
for (index = 0; index < ARRAY_SIZE(r->element); index++)
#else
@@ -514,8 +506,8 @@ void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
res ^= res >> 32;
res ^= res >> 16;
res ^= res >> 8;
- r->u64[LO_IDX] = res & 1;
- r->u64[HI_IDX] = 0;
+ r->VsrD(1) = res & 1;
+ r->VsrD(0) = 0;
}
#define VARITH_DO(name, op, element) \
@@ -1243,8 +1235,8 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
}
}
- r->u64[HI_IDX] = perm;
- r->u64[LO_IDX] = 0;
+ r->VsrD(0) = perm;
+ r->VsrD(1) = 0;
}
#undef VBPERMQ_INDEX
@@ -1573,25 +1565,25 @@ void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
ppc_avr_t prod[2];
VECTOR_FOR_INORDER_I(i, u64) {
- prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
+ prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
for (j = 0; j < 64; j++) {
if (a->u64[i] & (1ull<<j)) {
ppc_avr_t bshift;
if (j == 0) {
- bshift.u64[HI_IDX] = 0;
- bshift.u64[LO_IDX] = b->u64[i];
+ bshift.VsrD(0) = 0;
+ bshift.VsrD(1) = b->u64[i];
} else {
- bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
- bshift.u64[LO_IDX] = b->u64[i] << j;
+ bshift.VsrD(0) = b->u64[i] >> (64 - j);
+ bshift.VsrD(1) = b->u64[i] << j;
}
- prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
- prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
+ prod[i].VsrD(1) ^= bshift.VsrD(1);
+ prod[i].VsrD(0) ^= bshift.VsrD(0);
}
}
}
- r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
- r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
+ r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
+ r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
#endif
}
@@ -1809,7 +1801,7 @@ VEXTU_X_DO(vextuwrx, 32, 0)
#define VSHIFT(suffix, leftp) \
void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
- int shift = b->u8[LO_IDX*15] & 0x7; \
+ int shift = b->VsrB(15) & 0x7; \
int doit = 1; \
int i; \
\
@@ -1820,15 +1812,15 @@ VEXTU_X_DO(vextuwrx, 32, 0)
if (shift == 0) { \
*r = *a; \
} else if (leftp) { \
- uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
+ uint64_t carry = a->VsrD(1) >> (64 - shift); \
\
- r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
- r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
+ r->VsrD(0) = (a->VsrD(0) << shift) | carry; \
+ r->VsrD(1) = a->VsrD(1) << shift; \
} else { \
- uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
+ uint64_t carry = a->VsrD(0) << (64 - shift); \
\
- r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
- r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
+ r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \
+ r->VsrD(0) = a->VsrD(0) >> shift; \
} \
} \
}
@@ -1914,7 +1906,7 @@ void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
- int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
+ int sh = (b->VsrB(0xf) >> 3) & 0xf;
#if defined(HOST_WORDS_BIGENDIAN)
memmove(&r->u8[0], &a->u8[sh], 16 - sh);
@@ -2110,7 +2102,7 @@ VSR(d, u64, 0x3F)
void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
- int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
+ int sh = (b->VsrB(0xf) >> 3) & 0xf;
#if defined(HOST_WORDS_BIGENDIAN)
memmove(&r->u8[sh], &a->u8[0], 16 - sh);
@@ -2366,13 +2358,13 @@ static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
{
- if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
+ if (a.VsrD(0) < b.VsrD(0)) {
return -1;
- } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
+ } else if (a.VsrD(0) > b.VsrD(0)) {
return 1;
- } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
+ } else if (a.VsrD(1) < b.VsrD(1)) {
return -1;
- } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
+ } else if (a.VsrD(1) > b.VsrD(1)) {
return 1;
} else {
return 0;
@@ -2381,17 +2373,17 @@ static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
{
- t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
- t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
- (~a.u64[LO_IDX] < b.u64[LO_IDX]);
+ t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
+ t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
+ (~a.VsrD(1) < b.VsrD(1));
}
static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
{
ppc_avr_t not_a;
- t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
- t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
- (~a.u64[LO_IDX] < b.u64[LO_IDX]);
+ t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
+ t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
+ (~a.VsrD(1) < b.VsrD(1));
avr_qw_not(¬_a, a);
return avr_qw_cmpu(not_a, b) < 0;
}
@@ -2413,11 +2405,11 @@ void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
r->u128 = a->u128 + b->u128 + (c->u128 & 1);
#else
- if (c->u64[LO_IDX] & 1) {
+ if (c->VsrD(1) & 1) {
ppc_avr_t tmp;
- tmp.u64[HI_IDX] = 0;
- tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
+ tmp.VsrD(0) = 0;
+ tmp.VsrD(1) = c->VsrD(1) & 1;
avr_qw_add(&tmp, *a, tmp);
avr_qw_add(r, tmp, *b);
} else {
@@ -2435,8 +2427,8 @@ void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
avr_qw_not(¬_a, *a);
- r->u64[HI_IDX] = 0;
- r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
+ r->VsrD(0) = 0;
+ r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
#endif
}
@@ -2451,7 +2443,7 @@ void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
r->u128 = carry_out;
#else
- int carry_in = c->u64[LO_IDX] & 1;
+ int carry_in = c->VsrD(1) & 1;
int carry_out = 0;
ppc_avr_t tmp;
@@ -2461,8 +2453,8 @@ void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
ppc_avr_t one = QW_ONE;
carry_out = avr_qw_addc(&tmp, tmp, one);
}
- r->u64[HI_IDX] = 0;
- r->u64[LO_IDX] = carry_out;
+ r->VsrD(0) = 0;
+ r->VsrD(1) = carry_out;
#endif
}
@@ -2490,8 +2482,8 @@ void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
avr_qw_not(&tmp, *b);
avr_qw_add(&sum, *a, tmp);
- tmp.u64[HI_IDX] = 0;
- tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
+ tmp.VsrD(0) = 0;
+ tmp.VsrD(1) = c->VsrD(1) & 1;
avr_qw_add(r, sum, tmp);
#endif
}
@@ -2507,10 +2499,10 @@ void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
ppc_avr_t tmp;
avr_qw_not(&tmp, *b);
avr_qw_add(&tmp, *a, tmp);
- carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
+ carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
}
- r->u64[HI_IDX] = 0;
- r->u64[LO_IDX] = carry;
+ r->VsrD(0) = 0;
+ r->VsrD(1) = carry;
#endif
}
@@ -2521,17 +2513,17 @@ void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
(~a->u128 < ~b->u128) ||
((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
#else
- int carry_in = c->u64[LO_IDX] & 1;
+ int carry_in = c->VsrD(1) & 1;
int carry_out = (avr_qw_cmpu(*a, *b) > 0);
if (!carry_out && carry_in) {
ppc_avr_t tmp;
avr_qw_not(&tmp, *b);
avr_qw_add(&tmp, *a, tmp);
- carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
+ carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
}
- r->u64[HI_IDX] = 0;
- r->u64[LO_IDX] = carry_out;
+ r->VsrD(0) = 0;
+ r->VsrD(1) = carry_out;
#endif
}
@@ -2629,7 +2621,7 @@ static bool bcd_is_valid(ppc_avr_t *bcd)
static int bcd_cmp_zero(ppc_avr_t *bcd)
{
- if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
+ if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
return CRF_EQ;
} else {
return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
@@ -2749,7 +2741,7 @@ uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
}
if (unlikely(invalid)) {
- result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
+ result.VsrD(0) = result.VsrD(1) = -1;
cr = CRF_SO;
} else if (overflow) {
cr |= CRF_SO;
@@ -2818,7 +2810,7 @@ uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
int invalid = (sgnb == 0);
ppc_avr_t ret = { .u64 = { 0, 0 } };
- int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
+ int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
for (i = 1; i < 8; i++) {
set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
@@ -2898,7 +2890,7 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
int invalid = (sgnb == 0);
ppc_avr_t ret = { .u64 = { 0, 0 } };
- int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
+ int ox_flag = ((b->VsrD(0) >> 4) != 0);
for (i = 0; i < 16; i++) {
digit = bcd_get_digit(b, i + 1, &invalid);
@@ -2939,13 +2931,13 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
uint64_t hi_value;
ppc_avr_t ret = { .u64 = { 0, 0 } };
- if (b->s64[HI_IDX] < 0) {
- lo_value = -b->s64[LO_IDX];
- hi_value = ~b->u64[HI_IDX] + !lo_value;
+ if (b->VsrSD(0) < 0) {
+ lo_value = -b->VsrSD(1);
+ hi_value = ~b->VsrD(0) + !lo_value;
bcd_put_digit(&ret, 0xD, 0);
} else {
- lo_value = b->u64[LO_IDX];
- hi_value = b->u64[HI_IDX];
+ lo_value = b->VsrD(1);
+ hi_value = b->VsrD(0);
bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
}
@@ -2993,11 +2985,11 @@ uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
}
if (sgnb == -1) {
- r->s64[LO_IDX] = -lo_value;
- r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX];
+ r->VsrSD(1) = -lo_value;
+ r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
} else {
- r->s64[LO_IDX] = lo_value;
- r->s64[HI_IDX] = hi_value;
+ r->VsrSD(1) = lo_value;
+ r->VsrSD(0) = hi_value;
}
cr = bcd_cmp_zero(b);
@@ -3057,7 +3049,7 @@ uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
bool ox_flag = false;
int sgnb = bcd_get_sgn(b);
ppc_avr_t ret = *b;
- ret.u64[LO_IDX] &= ~0xf;
+ ret.VsrD(1) &= ~0xf;
if (bcd_is_valid(b) == false) {
return CRF_SO;
@@ -3070,9 +3062,9 @@ uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
}
if (i > 0) {
- ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
+ ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
} else {
- urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
+ urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
}
bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
@@ -3109,13 +3101,13 @@ uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
#endif
if (i >= 32) {
ox_flag = true;
- ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
+ ret.VsrD(1) = ret.VsrD(0) = 0;
} else if (i <= -32) {
- ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
+ ret.VsrD(1) = ret.VsrD(0) = 0;
} else if (i > 0) {
- ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
+ ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
} else {
- urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
+ urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
}
*r = ret;
@@ -3135,7 +3127,7 @@ uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
bool ox_flag = false;
int sgnb = bcd_get_sgn(b);
ppc_avr_t ret = *b;
- ret.u64[LO_IDX] &= ~0xf;
+ ret.VsrD(1) &= ~0xf;
#if defined(HOST_WORDS_BIGENDIAN)
int i = a->s8[7];
@@ -3156,9 +3148,9 @@ uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
}
if (i > 0) {
- ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
+ ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
} else {
- urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
+ urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
@@ -3192,19 +3184,19 @@ uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
if (i > 16 && i < 32) {
mask = (uint64_t)-1 >> (128 - i * 4);
- if (ret.u64[HI_IDX] & ~mask) {
+ if (ret.VsrD(0) & ~mask) {
ox_flag = CRF_SO;
}
- ret.u64[HI_IDX] &= mask;
+ ret.VsrD(0) &= mask;
} else if (i >= 0 && i <= 16) {
mask = (uint64_t)-1 >> (64 - i * 4);
- if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
+ if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
ox_flag = CRF_SO;
}
- ret.u64[LO_IDX] &= mask;
- ret.u64[HI_IDX] = 0;
+ ret.VsrD(1) &= mask;
+ ret.VsrD(0) = 0;
}
bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
*r = ret;
@@ -3235,28 +3227,28 @@ uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
#endif
if (i > 16 && i < 33) {
mask = (uint64_t)-1 >> (128 - i * 4);
- if (ret.u64[HI_IDX] & ~mask) {
+ if (ret.VsrD(0) & ~mask) {
ox_flag = CRF_SO;
}
- ret.u64[HI_IDX] &= mask;
+ ret.VsrD(0) &= mask;
} else if (i > 0 && i <= 16) {
mask = (uint64_t)-1 >> (64 - i * 4);
- if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
+ if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
ox_flag = CRF_SO;
}
- ret.u64[LO_IDX] &= mask;
- ret.u64[HI_IDX] = 0;
+ ret.VsrD(1) &= mask;
+ ret.VsrD(0) = 0;
} else if (i == 0) {
- if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) {
+ if (ret.VsrD(0) || ret.VsrD(1)) {
ox_flag = CRF_SO;
}
- ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0;
+ ret.VsrD(0) = ret.VsrD(1) = 0;
}
*r = ret;
- if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) {
+ if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
return ox_flag | CRF_EQ;
}
@@ -3428,8 +3420,6 @@ void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
}
#undef VECTOR_FOR_INORDER_I
-#undef HI_IDX
-#undef LO_IDX
/*****************************************************************************/
/* SPE extension helpers */
--
2.11.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 5/6] target/ppc: eliminate use of EL_IDX macros from int_helper.c
2018-12-23 11:38 [Qemu-devel] [PATCH 0/6] target/ppc: remove various endian hacks from int_helper.c Mark Cave-Ayland
` (3 preceding siblings ...)
2018-12-23 11:38 ` [Qemu-devel] [PATCH 4/6] target/ppc: eliminate use of HI_IDX and LO_IDX macros from int_helper.c Mark Cave-Ayland
@ 2018-12-23 11:38 ` Mark Cave-Ayland
2018-12-25 20:22 ` Richard Henderson
2018-12-23 11:38 ` [Qemu-devel] [PATCH 6/6] target/ppc: remove various HOST_WORDS_BIGENDIAN hacks in int_helper.c Mark Cave-Ayland
5 siblings, 1 reply; 12+ messages in thread
From: Mark Cave-Ayland @ 2018-12-23 11:38 UTC (permalink / raw)
To: qemu-devel, qemu-ppc, richard.henderson, david
These macros can be eliminated by instead using the relavant Vsr* macros in
the few locations where they appear.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
target/ppc/int_helper.c | 66 ++++++++++++++++++++-----------------------------
1 file changed, 27 insertions(+), 39 deletions(-)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index addbe54c21..4cc7fdfd25 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -3320,12 +3320,7 @@ void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
*r = result;
}
-#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
-#if defined(HOST_WORDS_BIGENDIAN)
-#define EL_IDX(i) (i)
-#else
-#define EL_IDX(i) (3 - (i))
-#endif
+#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32 - n)))
void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
{
@@ -3333,40 +3328,34 @@ void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
int six = st_six & 0xF;
int i;
- VECTOR_FOR_INORDER_I(i, u32) {
+ for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
if (st == 0) {
if ((six & (0x8 >> i)) == 0) {
- r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
- ROTRu32(a->u32[EL_IDX(i)], 18) ^
- (a->u32[EL_IDX(i)] >> 3);
+ r->VsrW(i) = ROTRu32(a->VsrW(i), 7) ^
+ ROTRu32(a->VsrW(i), 18) ^
+ (a->VsrW(i) >> 3);
} else { /* six.bit[i] == 1 */
- r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
- ROTRu32(a->u32[EL_IDX(i)], 19) ^
- (a->u32[EL_IDX(i)] >> 10);
+ r->VsrW(i) = ROTRu32(a->VsrW(i), 17) ^
+ ROTRu32(a->VsrW(i), 19) ^
+ (a->VsrW(i) >> 10);
}
} else { /* st == 1 */
if ((six & (0x8 >> i)) == 0) {
- r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
- ROTRu32(a->u32[EL_IDX(i)], 13) ^
- ROTRu32(a->u32[EL_IDX(i)], 22);
+ r->VsrW(i) = ROTRu32(a->VsrW(i), 2) ^
+ ROTRu32(a->VsrW(i), 13) ^
+ ROTRu32(a->VsrW(i), 22);
} else { /* six.bit[i] == 1 */
- r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
- ROTRu32(a->u32[EL_IDX(i)], 11) ^
- ROTRu32(a->u32[EL_IDX(i)], 25);
+ r->VsrW(i) = ROTRu32(a->VsrW(i), 6) ^
+ ROTRu32(a->VsrW(i), 11) ^
+ ROTRu32(a->VsrW(i), 25);
}
}
}
}
#undef ROTRu32
-#undef EL_IDX
#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
-#if defined(HOST_WORDS_BIGENDIAN)
-#define EL_IDX(i) (i)
-#else
-#define EL_IDX(i) (1 - (i))
-#endif
void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
{
@@ -3374,33 +3363,32 @@ void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
int six = st_six & 0xF;
int i;
- VECTOR_FOR_INORDER_I(i, u64) {
+ for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
if (st == 0) {
if ((six & (0x8 >> (2*i))) == 0) {
- r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
- ROTRu64(a->u64[EL_IDX(i)], 8) ^
- (a->u64[EL_IDX(i)] >> 7);
+ r->VsrD(i) = ROTRu64(a->VsrD(i), 1) ^
+ ROTRu64(a->VsrD(i), 8) ^
+ (a->VsrD(i) >> 7);
} else { /* six.bit[2*i] == 1 */
- r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
- ROTRu64(a->u64[EL_IDX(i)], 61) ^
- (a->u64[EL_IDX(i)] >> 6);
+ r->VsrD(i) = ROTRu64(a->VsrD(i), 19) ^
+ ROTRu64(a->VsrD(i), 61) ^
+ (a->VsrD(i) >> 6);
}
} else { /* st == 1 */
if ((six & (0x8 >> (2*i))) == 0) {
- r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
- ROTRu64(a->u64[EL_IDX(i)], 34) ^
- ROTRu64(a->u64[EL_IDX(i)], 39);
+ r->VsrD(i) = ROTRu64(a->VsrD(i), 28) ^
+ ROTRu64(a->VsrD(i), 34) ^
+ ROTRu64(a->VsrD(i), 39);
} else { /* six.bit[2*i] == 1 */
- r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
- ROTRu64(a->u64[EL_IDX(i)], 18) ^
- ROTRu64(a->u64[EL_IDX(i)], 41);
+ r->VsrD(i) = ROTRu64(a->VsrD(i), 14) ^
+ ROTRu64(a->VsrD(i), 18) ^
+ ROTRu64(a->VsrD(i), 41);
}
}
}
}
#undef ROTRu64
-#undef EL_IDX
void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
--
2.11.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [Qemu-devel] [PATCH 5/6] target/ppc: eliminate use of EL_IDX macros from int_helper.c
2018-12-23 11:38 ` [Qemu-devel] [PATCH 5/6] target/ppc: eliminate use of EL_IDX " Mark Cave-Ayland
@ 2018-12-25 20:22 ` Richard Henderson
0 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2018-12-25 20:22 UTC (permalink / raw)
To: Mark Cave-Ayland, qemu-devel, qemu-ppc, david
On 12/23/18 10:38 PM, Mark Cave-Ayland wrote:
> These macros can be eliminated by instead using the relavant Vsr* macros in
> the few locations where they appear.
>
> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> ---
> target/ppc/int_helper.c | 66 ++++++++++++++++++++-----------------------------
> 1 file changed, 27 insertions(+), 39 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> -#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
> +#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32 - n)))
For a different patch, this is ror32().
This version contains a -fsanitize=shift bug, in that it improperly handles n
== 0 into a shift by 32.
> #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
Similarly.
r~
^ permalink raw reply [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 6/6] target/ppc: remove various HOST_WORDS_BIGENDIAN hacks in int_helper.c
2018-12-23 11:38 [Qemu-devel] [PATCH 0/6] target/ppc: remove various endian hacks from int_helper.c Mark Cave-Ayland
` (4 preceding siblings ...)
2018-12-23 11:38 ` [Qemu-devel] [PATCH 5/6] target/ppc: eliminate use of EL_IDX " Mark Cave-Ayland
@ 2018-12-23 11:38 ` Mark Cave-Ayland
2018-12-25 20:44 ` Richard Henderson
5 siblings, 1 reply; 12+ messages in thread
From: Mark Cave-Ayland @ 2018-12-23 11:38 UTC (permalink / raw)
To: qemu-devel, qemu-ppc, richard.henderson, david
Following on from the previous work, there are numerous endian-related hacks
in int_helper.c that can now be replaced with Vsr* macros.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
target/ppc/int_helper.c | 205 +++++++++++++++++-------------------------------
1 file changed, 70 insertions(+), 135 deletions(-)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 4cc7fdfd25..0d97eb9383 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -443,8 +443,8 @@ void helper_lvsl(ppc_avr_t *r, target_ulong sh)
{
int i, j = (sh & 0xf);
- VECTOR_FOR_INORDER_I(i, u8) {
- r->u8[i] = j++;
+ for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+ r->VsrB(i) = j++;
}
}
@@ -452,18 +452,14 @@ void helper_lvsr(ppc_avr_t *r, target_ulong sh)
{
int i, j = 0x10 - (sh & 0xf);
- VECTOR_FOR_INORDER_I(i, u8) {
- r->u8[i] = j++;
+ for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+ r->VsrB(i) = j++;
}
}
void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
{
-#if defined(HOST_WORDS_BIGENDIAN)
- env->vscr = r->u32[3];
-#else
- env->vscr = r->u32[0];
-#endif
+ env->vscr = r->VsrW(3);
set_flush_to_zero(vscr_nj, &env->vec_status);
}
@@ -870,8 +866,8 @@ target_ulong helper_vclzlsbb(ppc_avr_t *r)
{
target_ulong count = 0;
int i;
- VECTOR_FOR_INORDER_I(i, u8) {
- if (r->u8[i] & 0x01) {
+ for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+ if (r->VsrB(i) & 0x01) {
break;
}
count++;
@@ -883,12 +879,8 @@ target_ulong helper_vctzlsbb(ppc_avr_t *r)
{
target_ulong count = 0;
int i;
-#if defined(HOST_WORDS_BIGENDIAN)
for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
-#else
- for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
-#endif
- if (r->u8[i] & 0x01) {
+ if (r->VsrB(i) & 0x01) {
break;
}
count++;
@@ -1151,18 +1143,14 @@ void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
ppc_avr_t result;
int i;
- VECTOR_FOR_INORDER_I(i, u8) {
- int s = c->u8[i] & 0x1f;
-#if defined(HOST_WORDS_BIGENDIAN)
+ for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+ int s = c->VsrB(i) & 0x1f;
int index = s & 0xf;
-#else
- int index = 15 - (s & 0xf);
-#endif
if (s & 0x10) {
- result.u8[i] = b->u8[index];
+ result.VsrB(i) = b->VsrB(index);
} else {
- result.u8[i] = a->u8[index];
+ result.VsrB(i) = a->VsrB(index);
}
}
*r = result;
@@ -1174,18 +1162,14 @@ void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
ppc_avr_t result;
int i;
- VECTOR_FOR_INORDER_I(i, u8) {
- int s = c->u8[i] & 0x1f;
-#if defined(HOST_WORDS_BIGENDIAN)
+ for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+ int s = c->VsrB(i) & 0x1f;
int index = 15 - (s & 0xf);
-#else
- int index = s & 0xf;
-#endif
if (s & 0x10) {
- result.u8[i] = a->u8[index];
+ result.VsrB(i) = a->VsrB(index);
} else {
- result.u8[i] = b->u8[index];
+ result.VsrB(i) = b->VsrB(index);
}
}
*r = result;
@@ -1882,25 +1866,14 @@ void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
int i;
ppc_avr_t result;
-#if defined(HOST_WORDS_BIGENDIAN)
for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
int index = sh + i;
if (index > 0xf) {
- result.u8[i] = b->u8[index - 0x10];
- } else {
- result.u8[i] = a->u8[index];
- }
- }
-#else
- for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
- int index = (16 - sh) + i;
- if (index > 0xf) {
- result.u8[i] = a->u8[index - 0x10];
+ result.VsrB(i) = b->VsrB(index - 0x10);
} else {
- result.u8[i] = b->u8[index];
+ result.VsrB(i) = a->VsrB(index);
}
}
-#endif
*r = result;
}
@@ -1919,25 +1892,20 @@ void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
/* Experimental testing shows that hardware masks the immediate. */
#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
-#if defined(HOST_WORDS_BIGENDIAN)
#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
-#else
-#define SPLAT_ELEMENT(element) \
- (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
-#endif
-#define VSPLT(suffix, element) \
+#define VSPLT(suffix, element, access) \
void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
{ \
- uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
+ uint32_t s = b->access(SPLAT_ELEMENT(element)); \
int i; \
\
for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
- r->element[i] = s; \
+ r->access(i) = s; \
} \
}
-VSPLT(b, u8)
-VSPLT(h, u16)
-VSPLT(w, u32)
+VSPLT(b, u8, VsrB)
+VSPLT(h, u16, VsrH)
+VSPLT(w, u32, VsrW)
#undef VSPLT
#undef SPLAT_ELEMENT
#undef _SPLAT_MASKED
@@ -1998,17 +1966,10 @@ void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
getVSR(xbn, &xb, env);
memset(&xt, 0, sizeof(xt));
-#if defined(HOST_WORDS_BIGENDIAN)
ext_index = index;
for (i = 0; i < es; i++, ext_index++) {
- xt.u8[8 - es + i] = xb.u8[ext_index % 16];
- }
-#else
- ext_index = 15 - index;
- for (i = es - 1; i >= 0; i--, ext_index--) {
- xt.u8[8 + i] = xb.u8[ext_index % 16];
+ xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16);
}
-#endif
putVSR(xtn, &xt, env);
}
@@ -2023,46 +1984,39 @@ void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
getVSR(xbn, &xb, env);
getVSR(xtn, &xt, env);
-#if defined(HOST_WORDS_BIGENDIAN)
ins_index = index;
for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
- xt.u8[ins_index] = xb.u8[8 - es + i];
- }
-#else
- ins_index = 15 - index;
- for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) {
- xt.u8[ins_index] = xb.u8[8 + i];
+ xt.VsrB(ins_index) = xb.VsrB(8 - es + i);
}
-#endif
putVSR(xtn, &xt, env);
}
-#define VEXT_SIGNED(name, element, mask, cast, recast) \
+#define VEXT_SIGNED(name, element, access, mask, cast, recast) \
void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
{ \
int i; \
- VECTOR_FOR_INORDER_I(i, element) { \
- r->element[i] = (recast)((cast)(b->element[i] & mask)); \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ r->access(i) = (recast)((cast)(b->access(i) & mask)); \
} \
}
-VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
-VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
-VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
-VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
-VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
+VEXT_SIGNED(vextsb2w, s32, VsrSW, UINT8_MAX, int8_t, int32_t)
+VEXT_SIGNED(vextsb2d, s64, VsrSD, UINT8_MAX, int8_t, int64_t)
+VEXT_SIGNED(vextsh2w, s32, VsrSW, UINT16_MAX, int16_t, int32_t)
+VEXT_SIGNED(vextsh2d, s64, VsrSD, UINT16_MAX, int16_t, int64_t)
+VEXT_SIGNED(vextsw2d, s64, VsrSD, UINT32_MAX, int32_t, int64_t)
#undef VEXT_SIGNED
-#define VNEG(name, element) \
+#define VNEG(name, element, access) \
void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
{ \
int i; \
- VECTOR_FOR_INORDER_I(i, element) { \
- r->element[i] = -b->element[i]; \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ r->access(i) = -b->access(i); \
} \
}
-VNEG(vnegw, s32)
-VNEG(vnegd, s64)
+VNEG(vnegw, s32, VsrSW)
+VNEG(vnegd, s64, VsrSD)
#undef VNEG
#define VSPLTI(suffix, element, splat_type) \
@@ -2129,17 +2083,13 @@ void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
ppc_avr_t result;
int sat = 0;
-#if defined(HOST_WORDS_BIGENDIAN)
- upper = ARRAY_SIZE(r->s32)-1;
-#else
- upper = 0;
-#endif
- t = (int64_t)b->s32[upper];
+ upper = ARRAY_SIZE(r->s32) - 1;
+ t = (int64_t)b->VsrSW(upper);
for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
- t += a->s32[i];
- result.s32[i] = 0;
+ t += a->VsrSW(i);
+ result.VsrSW(i) = 0;
}
- result.s32[upper] = cvtsdsw(t, &sat);
+ result.VsrSW(upper) = cvtsdsw(t, &sat);
*r = result;
if (sat) {
@@ -2153,19 +2103,15 @@ void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
ppc_avr_t result;
int sat = 0;
-#if defined(HOST_WORDS_BIGENDIAN)
upper = 1;
-#else
- upper = 0;
-#endif
for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
- int64_t t = (int64_t)b->s32[upper + i * 2];
+ int64_t t = (int64_t)b->VsrSW(upper + i * 2);
- result.u64[i] = 0;
+ result.VsrW(i) = 0;
for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
- t += a->s32[2 * i + j];
+ t += a->VsrSW(2 * i + j);
}
- result.s32[upper + i * 2] = cvtsdsw(t, &sat);
+ result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
}
*r = result;
@@ -2285,13 +2231,13 @@ VUPK(lsw, s64, s32, UPKLO)
#undef UPKHI
#undef UPKLO
-#define VGENERIC_DO(name, element) \
+#define VGENERIC_DO(name, element, access) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
{ \
int i; \
\
- VECTOR_FOR_INORDER_I(i, element) { \
- r->element[i] = name(b->element[i]); \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ r->access(i) = name(b->access(i)); \
} \
}
@@ -2300,10 +2246,10 @@ VUPK(lsw, s64, s32, UPKLO)
#define clzw(v) clz32((v))
#define clzd(v) clz64((v))
-VGENERIC_DO(clzb, u8)
-VGENERIC_DO(clzh, u16)
-VGENERIC_DO(clzw, u32)
-VGENERIC_DO(clzd, u64)
+VGENERIC_DO(clzb, u8, VsrB)
+VGENERIC_DO(clzh, u16, VsrH)
+VGENERIC_DO(clzw, u32, VsrW)
+VGENERIC_DO(clzd, u64, VsrD)
#undef clzb
#undef clzh
@@ -2315,10 +2261,10 @@ VGENERIC_DO(clzd, u64)
#define ctzw(v) ctz32((v))
#define ctzd(v) ctz64((v))
-VGENERIC_DO(ctzb, u8)
-VGENERIC_DO(ctzh, u16)
-VGENERIC_DO(ctzw, u32)
-VGENERIC_DO(ctzd, u64)
+VGENERIC_DO(ctzb, u8, VsrB)
+VGENERIC_DO(ctzh, u16, VsrH)
+VGENERIC_DO(ctzw, u32, VsrW)
+VGENERIC_DO(ctzd, u64, VsrD)
#undef ctzb
#undef ctzh
@@ -2330,10 +2276,10 @@ VGENERIC_DO(ctzd, u64)
#define popcntw(v) ctpop32(v)
#define popcntd(v) ctpop64(v)
-VGENERIC_DO(popcntb, u8)
-VGENERIC_DO(popcnth, u16)
-VGENERIC_DO(popcntw, u32)
-VGENERIC_DO(popcntd, u64)
+VGENERIC_DO(popcntb, u8, VsrB)
+VGENERIC_DO(popcnth, u16, VsrH)
+VGENERIC_DO(popcntw, u32, VsrW)
+VGENERIC_DO(popcntd, u64, VsrD)
#undef popcntb
#undef popcnth
@@ -2630,20 +2576,12 @@ static int bcd_cmp_zero(ppc_avr_t *bcd)
static uint16_t get_national_digit(ppc_avr_t *reg, int n)
{
-#if defined(HOST_WORDS_BIGENDIAN)
- return reg->u16[7 - n];
-#else
- return reg->u16[n];
-#endif
+ return reg->VsrH(7 - n);
}
static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
{
-#if defined(HOST_WORDS_BIGENDIAN)
- reg->u16[7 - n] = val;
-#else
- reg->u16[n] = val;
-#endif
+ reg->VsrH(7 - n) = val;
}
static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
@@ -3395,14 +3333,11 @@ void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
ppc_avr_t result;
int i;
- VECTOR_FOR_INORDER_I(i, u8) {
- int indexA = c->u8[i] >> 4;
- int indexB = c->u8[i] & 0xF;
-#if defined(HOST_WORDS_BIGENDIAN)
- result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
-#else
- result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
-#endif
+ for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+ int indexA = c->VsrB(i) >> 4;
+ int indexB = c->VsrB(i) & 0xF;
+
+ result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
}
*r = result;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [Qemu-devel] [PATCH 6/6] target/ppc: remove various HOST_WORDS_BIGENDIAN hacks in int_helper.c
2018-12-23 11:38 ` [Qemu-devel] [PATCH 6/6] target/ppc: remove various HOST_WORDS_BIGENDIAN hacks in int_helper.c Mark Cave-Ayland
@ 2018-12-25 20:44 ` Richard Henderson
0 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2018-12-25 20:44 UTC (permalink / raw)
To: Mark Cave-Ayland, qemu-devel, qemu-ppc, david
On 12/23/18 10:38 PM, Mark Cave-Ayland wrote:
> -#define VEXT_SIGNED(name, element, mask, cast, recast) \
> +#define VEXT_SIGNED(name, element, access, mask, cast, recast) \
> void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
> { \
> int i; \
> - VECTOR_FOR_INORDER_I(i, element) { \
> - r->element[i] = (recast)((cast)(b->element[i] & mask)); \
> + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
> + r->access(i) = (recast)((cast)(b->access(i) & mask)); \
> } \
> }
> -VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
> -VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
> -VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
> -VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
> -VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
> +VEXT_SIGNED(vextsb2w, s32, VsrSW, UINT8_MAX, int8_t, int32_t)
> +VEXT_SIGNED(vextsb2d, s64, VsrSD, UINT8_MAX, int8_t, int64_t)
> +VEXT_SIGNED(vextsh2w, s32, VsrSW, UINT16_MAX, int16_t, int32_t)
> +VEXT_SIGNED(vextsh2d, s64, VsrSD, UINT16_MAX, int16_t, int64_t)
> +VEXT_SIGNED(vextsw2d, s64, VsrSD, UINT32_MAX, int32_t, int64_t)
Your conversion is technically fine, but this macro is just confused.
It does not need the mask argument, nor does it need the recast argument.
The masking is implied by the cast argument, and the recast is implied by the
assignment.
Nor, really, does it need the access argument. The data is handled in strict
lanes, and it does not matter in which order the lanes are processed.
> -#define VNEG(name, element) \
> +#define VNEG(name, element, access) \
> void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
> { \
> int i; \
> - VECTOR_FOR_INORDER_I(i, element) { \
> - r->element[i] = -b->element[i]; \
> + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
> + r->access(i) = -b->access(i); \
> } \
> }
> -VNEG(vnegw, s32)
> -VNEG(vnegd, s64)
> +VNEG(vnegw, s32, VsrSW)
> +VNEG(vnegd, s64, VsrSD)
Similarly, this does not require access nor in-order processing.
> -#define VGENERIC_DO(name, element) \
> +#define VGENERIC_DO(name, element, access) \
> void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
> { \
> int i; \
> \
> - VECTOR_FOR_INORDER_I(i, element) { \
> - r->element[i] = name(b->element[i]); \
> + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
> + r->access(i) = name(b->access(i)); \
> } \
> }
Likewise.
r~
^ permalink raw reply [flat|nested] 12+ messages in thread