* [PATCH RFC] target/arm: Implement SVE2 MATCH, NMATCH
@ 2020-04-13 23:42 Stephen Long
2020-04-14 14:47 ` Richard Henderson
0 siblings, 1 reply; 2+ messages in thread
From: Stephen Long @ 2020-04-13 23:42 UTC (permalink / raw)
To: qemu-devel; +Cc: apazos, qemu-arm, richard.henderson
Signed-off-by: Stephen Long <steplong@quicinc.com>
---
Submitting this for early review. I'm working with Richard on SVE2 support for
qemu. I'll be attempting to tackle the insns in the 'SVE2 integer add/subtract
narrow high part' category next [1].
[1] ISA manual: https://static.docs.arm.com/ddi0602/d/ISA_A64_xml_futureA-2019-12_OPT.pdf (page 2950)
target/arm/helper-sve.h | 10 +++++++++
target/arm/sve.decode | 5 +++++
target/arm/sve_helper.c | 29 +++++++++++++++++++++++++
target/arm/translate-sve.c | 43 ++++++++++++++++++++++++++++++++++++++
4 files changed, 87 insertions(+)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 5dd880cf6d..2077df9a95 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2516,6 +2516,16 @@ DEF_HELPER_FLAGS_3(sve2_uqrshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_uqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_match_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_match_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_nmatch_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_nmatch_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 374e47fb05..652668df02 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1305,6 +1305,11 @@ UQSHRNT 01000101 .. 1 ..... 00 1101 ..... ..... @rd_rn_tszimm_shr
UQRSHRNB 01000101 .. 1 ..... 00 1110 ..... ..... @rd_rn_tszimm_shr
UQRSHRNT 01000101 .. 1 ..... 00 1111 ..... ..... @rd_rn_tszimm_shr
+### SVE2 Character Match
+
+MATCH 01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
+NMATCH 01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
+
## SVE2 floating-point pairwise operations
FADDP 01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index b68f62cd7f..c75258b56d 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -6890,3 +6890,32 @@ DO_ST1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_ST1_ZPZ_S
#undef DO_ST1_ZPZ_D
+
+#define DO_ZPZZ_CHAR_MATCH(NAME, TYPE, H, EQUALS) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ uint16_t *pd = (uint16_t *)(vd + H1_2(i >> 3)); \
+ *pd = (*pd & ~1) | ((0 & EQUALS) | (1 & !EQUALS)); \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ for (intptr_t j = 0; j < 16; j += sizeof(TYPE)) { \
+ TYPE mm = *(TYPE *)(vm + H(i * 16 + j)); \
+ bool eq = nn == mm; \
+ if ((eq && EQUALS) || (!eq && !EQUALS)) { \
+ *pd = (*pd & ~1) | ((1 & EQUALS) | (0 & !EQUALS)); \
+ } \
+ } \
+ } \
+ } \
+}
+
+DO_ZPZZ_CHAR_MATCH(sve2_match_zpzz_b, uint8_t, H1, true)
+DO_ZPZZ_CHAR_MATCH(sve2_match_zpzz_h, uint16_t, H1_2, true)
+
+DO_ZPZZ_CHAR_MATCH(sve2_nmatch_zpzz_b, uint8_t, H1, false)
+DO_ZPZZ_CHAR_MATCH(sve2_nmatch_zpzz_h, uint16_t, H1_2, false)
+
+#undef DO_ZPZZ_CHAR_MATCH
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 07a2040208..7175148bfd 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7246,6 +7246,49 @@ static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
return do_sve2_shr_narrow(s, a, ops);
}
+static bool do_sve2_zpzz_char_match(DisasContext *s, arg_rprr_esz *a,
+ gen_helper_gvec_4 *fn)
+{
+ if (!dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned psz = pred_full_reg_size(s);
+ int dofs = pred_full_reg_offset(s, a->rd);
+ int nofs = vec_full_reg_offset(s, a->rn);
+ int mofs = vec_full_reg_offset(s, a->rm);
+ int gofs = pred_full_reg_offset(s, a->pg);
+
+ /* Save a copy if the destination overwrites the guarding predicate */
+ int tofs = gofs;
+ if (a->rd == a->pg) {
+ tofs = offsetof(CPUARMState, vfp.preg_tmp);
+ tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
+ }
+
+ tcg_gen_gvec_4_ool(dofs, nofs, mofs, gofs, vsz, vsz, 0, fn);
+ do_predtest(s, dofs, tofs, psz / 8);
+ }
+ return true;
+}
+
+#define DO_SVE2_ZPZZ_CHAR_MATCH(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
+{ \
+ static gen_helper_gvec_4 * const fns[4] = { \
+ gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
+ NULL, NULL \
+ }; \
+ return do_sve2_zpzz_char_match(s, a, fns[a->esz]); \
+}
+
+DO_SVE2_ZPZZ_CHAR_MATCH(MATCH, match)
+DO_SVE2_ZPZZ_CHAR_MATCH(NMATCH, nmatch)
+
static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
gen_helper_gvec_4_ptr *fn)
{
--
2.17.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH RFC] target/arm: Implement SVE2 MATCH, NMATCH
2020-04-13 23:42 [PATCH RFC] target/arm: Implement SVE2 MATCH, NMATCH Stephen Long
@ 2020-04-14 14:47 ` Richard Henderson
0 siblings, 0 replies; 2+ messages in thread
From: Richard Henderson @ 2020-04-14 14:47 UTC (permalink / raw)
To: Stephen Long, qemu-devel; +Cc: apazos, qemu-arm
On 4/13/20 4:42 PM, Stephen Long wrote:
> +#define DO_ZPZZ_CHAR_MATCH(NAME, TYPE, H, EQUALS) \
> +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
> +{ \
> + intptr_t i, opr_sz = simd_oprsz(desc); \
> + for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
> + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
> + uint16_t *pd = (uint16_t *)(vd + H1_2(i >> 3)); \
> + *pd = (*pd & ~1) | ((0 & EQUALS) | (1 & !EQUALS)); \
> + if (pg & 1) { \
The important error here is that the predicate is not always the low bit. When
operating on bytes, every bit of the predicate is significant. When operating
on halfwords, every even bit of the predicate is significant. In addition,
when operating on halfwords, every odd bit of the result predicate must be zero.
Which is why, generally, I have constructed the output predicate as we go.
See, for instance, DO_CMP_PPZZ.
> + TYPE nn = *(TYPE *)(vn + H(i)); \
> + for (intptr_t j = 0; j < 16; j += sizeof(TYPE)) { \
> + TYPE mm = *(TYPE *)(vm + H(i * 16 + j)); \
mm needs to start at the beginning of the segment, which in this case is (i &
-16). You don't need the elements of mm in any particular order (all of them
are significant), so you can drop the use of H() here.
Therefore the indexing for mm should be vm + (i & -16) + j.
> + bool eq = nn == mm; \
> + if ((eq && EQUALS) || (!eq && !EQUALS)) { \
> + *pd = (*pd & ~1) | ((1 & EQUALS) | (0 & !EQUALS)); \
> + } \
It might be handy to split out the inner loop to a helper function, as, while
the basic loop is ok, there are tricks that can improve it, so that we're
comparing 8 bytes at a time.
> +static bool do_sve2_zpzz_char_match(DisasContext *s, arg_rprr_esz *a,
> + gen_helper_gvec_4 *fn)
> +{
> + if (!dc_isar_feature(aa64_sve2, s)) {
> + return false;
> + }
> + if (fn == NULL) {
> + return false;
> + }
> + if (sve_access_check(s)) {
> + unsigned vsz = vec_full_reg_size(s);
> + unsigned psz = pred_full_reg_size(s);
> + int dofs = pred_full_reg_offset(s, a->rd);
> + int nofs = vec_full_reg_offset(s, a->rn);
> + int mofs = vec_full_reg_offset(s, a->rm);
> + int gofs = pred_full_reg_offset(s, a->pg);
> +
> + /* Save a copy if the destination overwrites the guarding predicate */
> + int tofs = gofs;
> + if (a->rd == a->pg) {
> + tofs = offsetof(CPUARMState, vfp.preg_tmp);
> + tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
> + }
> +
> + tcg_gen_gvec_4_ool(dofs, nofs, mofs, gofs, vsz, vsz, 0, fn);
> + do_predtest(s, dofs, tofs, psz / 8);
You can avoid the copy and the predtest by using the iter_predtest_* functions
and returning the flags result directly from the helper. Again, see DO_CMP_PPZZ.
r~
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-04-14 17:12 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-04-13 23:42 [PATCH RFC] target/arm: Implement SVE2 MATCH, NMATCH Stephen Long
2020-04-14 14:47 ` Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).