qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: Stephen Long <steplong@quicinc.com>, qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org, apazos@quicinc.com
Subject: Re: [PATCH RFC v2] target/arm: Implement SVE2 MATCH, NMATCH
Date: Tue, 14 Apr 2020 19:05:10 -0700	[thread overview]
Message-ID: <e7fb7a8c-0dc9-45c3-a0f7-9952b4a60aeb@linaro.org> (raw)
In-Reply-To: <20200414231610.8387-1-steplong@quicinc.com>

On 4/14/20 4:16 PM, Stephen Long wrote:
> Signed-off-by: Stephen Long <steplong@quicinc.com>
> ---
>  target/arm/helper-sve.h    | 10 ++++++++
>  target/arm/sve.decode      |  5 ++++
>  target/arm/sve_helper.c    | 51 ++++++++++++++++++++++++++++++++++++++
>  target/arm/translate-sve.c | 22 ++++++++++++++++
>  4 files changed, 88 insertions(+)
> 
> diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
> index 5dd880cf6d..bc4a463bc7 100644
> --- a/target/arm/helper-sve.h
> +++ b/target/arm/helper-sve.h
> @@ -2516,6 +2516,16 @@ DEF_HELPER_FLAGS_3(sve2_uqrshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_3(sve2_uqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_3(sve2_uqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
>  
> +DEF_HELPER_FLAGS_5(sve2_match_ppzz_b, TCG_CALL_NO_RWG,
> +                   i32, ptr, ptr, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_5(sve2_match_ppzz_h, TCG_CALL_NO_RWG,
> +                   i32, ptr, ptr, ptr, ptr, i32)
> +
> +DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_b, TCG_CALL_NO_RWG,
> +                   i32, ptr, ptr, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_h, TCG_CALL_NO_RWG,
> +                   i32, ptr, ptr, ptr, ptr, i32)
> +
>  DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
>                     void, ptr, ptr, ptr, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
> diff --git a/target/arm/sve.decode b/target/arm/sve.decode
> index 374e47fb05..652668df02 100644
> --- a/target/arm/sve.decode
> +++ b/target/arm/sve.decode
> @@ -1305,6 +1305,11 @@ UQSHRNT         01000101 .. 1 ..... 00 1101 ..... .....  @rd_rn_tszimm_shr
>  UQRSHRNB        01000101 .. 1 ..... 00 1110 ..... .....  @rd_rn_tszimm_shr
>  UQRSHRNT        01000101 .. 1 ..... 00 1111 ..... .....  @rd_rn_tszimm_shr
>  
> +### SVE2 Character Match
> +
> +MATCH           01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
> +NMATCH          01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
> +
>  ## SVE2 floating-point pairwise operations
>  
>  FADDP           01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm
> diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
> index b68f62cd7f..78abd8b62a 100644
> --- a/target/arm/sve_helper.c
> +++ b/target/arm/sve_helper.c
> @@ -6890,3 +6890,54 @@ DO_ST1_ZPZ_D(dd_be, zd, MO_64)
>  
>  #undef DO_ST1_ZPZ_S
>  #undef DO_ST1_ZPZ_D
> +
> +#define DO_PPZZ_CHAR_MATCH(NAME, TYPE, OP, H, MASK, DEFAULT_VAL)              \
> +static inline bool NAME##_inner_loop(TYPE nn, void *segmentbase)              \
> +{                                                                             \
> +    intptr_t i = 128;                                                         \
> +    do {                                                                      \
> +        do {                                                                  \
> +            i -= sizeof(TYPE) * 8;                                            \
> +            TYPE mm = *(TYPE *)(segmentbase + H1(i));                         \
> +            if (nn OP mm) {                                                   \
> +                return !DEFAULT_VAL;                                          \
> +            }                                                                 \
> +        } while (i & 63);                                                     \
> +    } while (i > 0);                                                          \
> +    return DEFAULT_VAL;                                                       \
> +}                                                                             \

You seem to be mixing up bit and bytes here, with 128 bits and H1 as a byte index.

I note that we don't need to keep re-loading the Zm segment elements from
memory.  Perhaps something like

static inline bool do_match1(uint64_t n, uint64_t m, int esz)
{
    int i, bits = 8 << esz;
    n = extract64(n, 0, bits);
    for (i = 0; i < 64; i += bits) {
        if (n == extract64(m, i, bits)) {
            return true;
        }
    }
    return false;
}

static inline bool do_match2(uint64_t n, uint64_t m0,
                             uint64_t m1, int esz)
{
    return do_match1(n, m0, esz) || do_match1(n, m1, esz);
}


As an improvement, we can use

https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord

static inline bool do_match2(uint64_t n, uint64_t m0,
                             uint64_t m1, int esz)
{
    int bits = 8 << esz;
    uint64_t ones = dup_const(esz, 1);
    uint64_t signs = ones << (bits - 1);
    uint64_t cmp0, cmp1;

    cmp1 = dup_const(esz, n);
    cmp0 = cmp1 ^ m0;
    cmp1 = cmp1 ^ m1;
    cmp0 = (cmp0 - ones) & ~cmp0;
    cmp1 = (cmp1 - ones) & ~cmp1;
    return (cmp0 | cmp1) & signs;
}


> +uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)  \
> +{                                                                             \
> +    intptr_t opr_sz = simd_oprsz(desc);                                       \
> +    uint32_t flags = PREDTEST_INIT;                                           \
> +    intptr_t i = opr_sz;                                                      \
> +    do {                                                                      \
> +        uint64_t out = 0;                                                     \
> +        uint64_t pg;                                                          \
> +        do {                                                                  \
> +            i -= sizeof(TYPE), out <<= sizeof(TYPE);                          \
> +            TYPE nn = *(TYPE *)(vn + H(i));                                   \
> +            out = (out & ~1ull) | DEFAULT_VAL;                                \
> +            out |= NAME##_inner_loop(nn, vm + (i & -16));                     \
> +        } while (i & 63);                                                     \
> +        pg = *(uint64_t *)(vg + (i >> 3)) & MASK;                             \
> +        out &= pg;                                                            \
> +        *(uint64_t *)(vd + (i >> 3)) = out;                                   \
> +        flags = iter_predtest_bwd(out, pg, flags);                            \
> +    } while (i > 0);                                                          \
> +    return 0;                                                                 \

static inline uint32_t do_match(void *vd, void *vn,
    void *vm, void *vg, uint32_t desc,
    int esz, bool nmatch)
{
    intptr_opr_sz = simd_oprsz(desc);
    uint32_t flags = PREDTEST_INIT;
    intptr_t i, j, k;

    for (i = 0; i < opr_sz; i += 16) {
        uint64_t m0 = *(uint64_t *)(vm + i);
        uint64_t m1 = *(uint64_t *)(vm + i + 8);
        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
        uint16_t out = 0;

        for (j = 0; j < 16; j += 8) {
            uint64_t n = *(uint64_t *)(vn + i + j);

            for (k = 0; k < 8; k += 1 << esz) {
                if (pg & (1 << (j + k))) {
                    bool o = do_match2(n >> (k * 8),
                                       m0, m1, esz);
                    out |= (o ^ nmatch) << (j + k);
                }
            }
        }
        *(uint16_t *)(vd + H1_2(i >> 3)) = out;
        flags = iter_predtest_fwd(out, pg, flags);
    }
    return flags;
}

#define DO_PPZZ_MATCH(NAME, ESZ, INV) \
uint32_t HELPER(NAME)(void *vd, void *vn, void *vm,   \
                      void *vg, uint32_t desc)        \
{                                                     \
    return do_match(vd, vn, vm, vg, desc, ESZ, INV);  \
}

DO_PPZZ_MATCH(sve2_match_ppzz_b, MO_8, false)
DO_PPZZ_MATCH(sve2_match_ppzz_h, MO_16, false)

DO_PPZZ_MATCH(sve2_nmatch_ppzz_b, MO_8, true)
DO_PPZZ_MATCH(sve2_nmatch_ppzz_h, MO_16, true)


r~


      reply	other threads:[~2020-04-15  2:06 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-14 23:16 [PATCH RFC v2] target/arm: Implement SVE2 MATCH, NMATCH Stephen Long
2020-04-15  2:05 ` Richard Henderson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e7fb7a8c-0dc9-45c3-a0f7-9952b4a60aeb@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=apazos@quicinc.com \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=steplong@quicinc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).