All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Cc: qemu-ppc@nongnu.org, rth@twiddle.net, qemu-devel@nongnu.org,
	benh@kernel.crashing.org
Subject: Re: [Qemu-devel] [PATCH RESEND v2 15/17] target-ppc: add lxvb16x and lxvh8x
Date: Thu, 15 Sep 2016 11:41:42 +1000	[thread overview]
Message-ID: <20160915014142.GK15077@voom.fritz.box> (raw)
In-Reply-To: <1473662506-27441-16-git-send-email-nikunj@linux.vnet.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 5778 bytes --]

On Mon, Sep 12, 2016 at 12:11:44PM +0530, Nikunj A Dadhania wrote:
> lxvb16x: Load VSX Vector Byte*16
> lxvh8x:  Load VSX Vector Halfword*8
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/helper.h                 |  1 +
>  target-ppc/mem_helper.c             |  6 ++++
>  target-ppc/translate/vsx-impl.inc.c | 57 +++++++++++++++++++++++++++++++++++++
>  target-ppc/translate/vsx-ops.inc.c  |  2 ++
>  4 files changed, 66 insertions(+)
> 
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 1bbeac4..6de0db7 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -298,6 +298,7 @@ DEF_HELPER_3(lvebx, void, env, avr, tl)
>  DEF_HELPER_3(lvehx, void, env, avr, tl)
>  DEF_HELPER_3(lvewx, void, env, avr, tl)
>  DEF_HELPER_1(bswap32x2, i64, i64)
> +DEF_HELPER_1(bswap16x4, i64, i64)
>  DEF_HELPER_3(stvebx, void, env, avr, tl)
>  DEF_HELPER_3(stvehx, void, env, avr, tl)
>  DEF_HELPER_3(stvewx, void, env, avr, tl)
> diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
> index a56051a..608803f 100644
> --- a/target-ppc/mem_helper.c
> +++ b/target-ppc/mem_helper.c
> @@ -290,6 +290,12 @@ uint64_t helper_bswap32x2(uint64_t x)
>      return deposit64((x >> 32), 32, 32, (x));
>  }
>  
> +uint64_t helper_bswap16x4(uint64_t x)
> +{
> +    uint64_t m = 0x00ff00ff00ff00ffull;
> +    return ((x & m) << 8) | ((x >> 8) & m);
> +}

This doesn't seem to match the bswap32x2 function above.  bswap32x2
just swaps the two 32-bit words in the 64-bit word.  This one swaps
the bytes in each individual 16 bit work in the 64-bit word.

I suspect the bswap32x2 is wrong, which would explain why the previous
patch didn't seem to make sense.

> +
>  #undef HI_IDX
>  #undef LO_IDX
>  
> diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
> index e3374df..caa6660 100644
> --- a/target-ppc/translate/vsx-impl.inc.c
> +++ b/target-ppc/translate/vsx-impl.inc.c
> @@ -108,6 +108,63 @@ static void gen_lxvw4x(DisasContext *ctx)
>      tcg_temp_free(EA);
>  }
>  
> +static void gen_lxvb16x(DisasContext *ctx)
> +{
> +    TCGv EA;
> +    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> +    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> +
> +    if (unlikely(!ctx->vsx_enabled)) {
> +        gen_exception(ctx, POWERPC_EXCP_VSXU);
> +        return;
> +    }
> +    gen_set_access_type(ctx, ACCESS_INT);
> +    EA = tcg_temp_new();
> +    gen_addr_reg_index(ctx, EA);
> +    if (ctx->le_mode) {
> +        tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
> +        tcg_gen_addi_tl(EA, EA, 8);
> +        tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
> +    } else {
> +        tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ);
> +        gen_helper_bswap32x2(xth, xth);

I really don't understand how a bswap32x2 helps here, either as it's
defined now, or as I suspect it should be defined by analogy with
bswap16x4.

> +        tcg_gen_addi_tl(EA, EA, 8);
> +        tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ);
> +        gen_helper_bswap32x2(xtl, xtl);

Also.. if I'm understanding the ISA correctly, this instruction loads
subsequent higher-address bytes into subsequent (i.e. less signficant,
since IBM uses BE bit/byte numbering) bytes in the vector.  Doesn't
that mean you want a BE load in all cases, not just the LE guest case?

> +    }
> +    tcg_temp_free(EA);
> +}
> +
> +static void gen_lxvh8x(DisasContext *ctx)
> +{
> +    TCGv EA;
> +    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> +    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> +
> +    if (unlikely(!ctx->vsx_enabled)) {
> +        gen_exception(ctx, POWERPC_EXCP_VSXU);
> +        return;
> +    }
> +    gen_set_access_type(ctx, ACCESS_INT);
> +    EA = tcg_temp_new();
> +    gen_addr_reg_index(ctx, EA);
> +
> +    if (ctx->le_mode) {
> +        tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
> +        gen_helper_bswap16x4(xth, xth);
> +        tcg_gen_addi_tl(EA, EA, 8);
> +        tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
> +        gen_helper_bswap16x4(xtl, xtl);
> +    } else {
> +        tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ);
> +        gen_helper_bswap32x2(xth, xth);
> +        tcg_gen_addi_tl(EA, EA, 8);
> +        tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ);
> +        gen_helper_bswap32x2(xtl, xtl);

Again, I think you want a BE load in both cases, and the bswap32x2
makes no sense to me.

> +    }
> +    tcg_temp_free(EA);
> +}
> +
>  #define VSX_STORE_SCALAR(name, operation)                     \
>  static void gen_##name(DisasContext *ctx)                     \
>  {                                                             \
> diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
> index 414b73b..598b349 100644
> --- a/target-ppc/translate/vsx-ops.inc.c
> +++ b/target-ppc/translate/vsx-ops.inc.c
> @@ -7,6 +7,8 @@ GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207),
>  GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
>  GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
>  GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
> +GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE,  PPC2_ISA300),
> +GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300),
>  
>  GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
>  GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

  reply	other threads:[~2016-09-15  3:41 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-12  6:41 [Qemu-devel] [PATCH RESEND v2 00/17] POWER9 TCG enablements - part4 Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 01/17] target-ppc: consolidate load operations Nikunj A Dadhania
2016-09-15  0:46   ` David Gibson
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 02/17] target-ppc: convert ld64 to use new macro Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 03/17] target-ppc: convert ld[16, 32, 64]ur " Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 04/17] target-ppc: consolidate store operations Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 05/17] target-ppc: convert st64 to use new macro Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 06/17] target-ppc: convert st[16, 32, 64]r " Nikunj A Dadhania
2016-09-15  0:48   ` David Gibson
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 07/17] target-ppc: consolidate load with reservation Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 08/17] target-ppc: move out stqcx impementation Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 09/17] target-ppc: consolidate store conditional Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 10/17] target-ppc: add xxspltib instruction Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 11/17] target-ppc: implement darn instruction Nikunj A Dadhania
2016-09-15  1:07   ` David Gibson
2016-09-15  6:40     ` Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 12/17] target-ppc: add lxsi[bw]zx instruction Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 13/17] target-ppc: add stxsi[bh]x instruction Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 14/17] target-ppc: improve lxvw4x implementation Nikunj A Dadhania
2016-09-15  1:20   ` David Gibson
2016-09-15  9:57     ` Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 15/17] target-ppc: add lxvb16x and lxvh8x Nikunj A Dadhania
2016-09-15  1:41   ` David Gibson [this message]
2016-09-16  8:26     ` Nikunj A Dadhania
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 16/17] target-ppc: improve stxvw4x implementation Nikunj A Dadhania
2016-09-15  1:44   ` David Gibson
2016-09-12  6:41 ` [Qemu-devel] [PATCH RESEND v2 17/17] target-ppc: add stxvb16x and stxvh8x Nikunj A Dadhania
2016-09-15  1:46   ` David Gibson
2016-09-16  8:28     ` Nikunj A Dadhania
2016-09-12  7:19 ` [Qemu-devel] [PATCH RESEND v2 00/17] POWER9 TCG enablements - part4 no-reply
2016-09-15  0:56 ` David Gibson
2016-09-15  1:49   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160915014142.GK15077@voom.fritz.box \
    --to=david@gibson.dropbear.id.au \
    --cc=benh@kernel.crashing.org \
    --cc=nikunj@linux.vnet.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.