qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Jonathan Cameron via <qemu-devel@nongnu.org>
To: Peter Maydell <peter.maydell@linaro.org>
Cc: <qemu-devel@nongnu.org>
Subject: Re: [PULL 26/42] target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r
Date: Mon, 12 Jun 2023 16:20:44 +0100	[thread overview]
Message-ID: <20230612162044.00002fdc@huawei.com> (raw)
In-Reply-To: <20230606094814.3581397-27-peter.maydell@linaro.org>

On Tue,  6 Jun 2023 10:47:58 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> From: Richard Henderson <richard.henderson@linaro.org>
> 
> Round len_align to 16 instead of 8, handling an odd 8-byte as part
> of the tail.  Use MO_ATOM_NONE to indicate that all of these memory
> ops have only byte atomicity.
> 
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> Message-id: 20230530191438.411344-8-richard.henderson@linaro.org
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

Early in debugging but a git bisect pointed at this patch causing:

ERROR:../../tcg/tcg.c:4317:temp_load: code should not be reached
Bail out! ERROR:../../tcg/tcg.c:4317:temp_load: code should not be reached
Aborted

Just after Run /sbin/init arm64 / virt running on an x86 host.
cpu max.

Just reverting this patch results in length check failures.
I reverted as follows


revert: Revert "target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r"
revert: Revert "target/arm: Sink gen_mte_check1 into load/store_exclusive"
revert: Revert "target/arm: Load/store integer pair with one tcg operation"
revert: Revert "target/arm: Hoist finalize_memop out of do_gpr_{ld, st}"
revert: Revert "target/arm: Hoist finalize_memop out of do_fp_{ld, st}"
revert: Revert "target/arm: Pass memop to gen_mte_check1*"
revert: Revert "target/arm: Pass single_memop to gen_mte_checkN"
revert: Revert "target/arm: Check alignment in helper_mte_check"
revert: Revert "target/arm: Add SCTLR.nAA to TBFLAG_A64"
revert: Revert "target/arm: Relax ordered/atomic alignment checks for LSE2"
revert: Revert "target/arm: Move mte check for store-exclusive"

and all is good - obviously that's probably massive overkill.

Jonathan

> ---
>  target/arm/tcg/translate-sve.c | 95 +++++++++++++++++++++++++---------
>  1 file changed, 70 insertions(+), 25 deletions(-)
> 
> diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
> index d9d5810ddea..57051a4729a 100644
> --- a/target/arm/tcg/translate-sve.c
> +++ b/target/arm/tcg/translate-sve.c
> @@ -4167,11 +4167,12 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
>  void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
>                   int len, int rn, int imm)
>  {
> -    int len_align = QEMU_ALIGN_DOWN(len, 8);
> -    int len_remain = len % 8;
> -    int nparts = len / 8 + ctpop8(len_remain);
> +    int len_align = QEMU_ALIGN_DOWN(len, 16);
> +    int len_remain = len % 16;
> +    int nparts = len / 16 + ctpop8(len_remain);
>      int midx = get_mem_index(s);
>      TCGv_i64 dirty_addr, clean_addr, t0, t1;
> +    TCGv_i128 t16;
>  
>      dirty_addr = tcg_temp_new_i64();
>      tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
> @@ -4188,10 +4189,16 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
>          int i;
>  
>          t0 = tcg_temp_new_i64();
> -        for (i = 0; i < len_align; i += 8) {
> -            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
> +        t1 = tcg_temp_new_i64();
> +        t16 = tcg_temp_new_i128();
> +
> +        for (i = 0; i < len_align; i += 16) {
> +            tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
> +                                 MO_LE | MO_128 | MO_ATOM_NONE);
> +            tcg_gen_extr_i128_i64(t0, t1, t16);
>              tcg_gen_st_i64(t0, base, vofs + i);
> -            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
> +            tcg_gen_st_i64(t1, base, vofs + i + 8);
> +            tcg_gen_addi_i64(clean_addr, clean_addr, 16);
>          }
>      } else {
>          TCGLabel *loop = gen_new_label();
> @@ -4200,14 +4207,21 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
>          tcg_gen_movi_ptr(i, 0);
>          gen_set_label(loop);
>  
> -        t0 = tcg_temp_new_i64();
> -        tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
> -        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
> +        t16 = tcg_temp_new_i128();
> +        tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
> +                             MO_LE | MO_128 | MO_ATOM_NONE);
> +        tcg_gen_addi_i64(clean_addr, clean_addr, 16);
>  
>          tp = tcg_temp_new_ptr();
>          tcg_gen_add_ptr(tp, base, i);
> -        tcg_gen_addi_ptr(i, i, 8);
> +        tcg_gen_addi_ptr(i, i, 16);
> +
> +        t0 = tcg_temp_new_i64();
> +        t1 = tcg_temp_new_i64();
> +        tcg_gen_extr_i128_i64(t0, t1, t16);
> +
>          tcg_gen_st_i64(t0, tp, vofs);
> +        tcg_gen_st_i64(t1, tp, vofs + 8);
>  
>          tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
>      }
> @@ -4216,6 +4230,16 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
>       * Predicate register loads can be any multiple of 2.
>       * Note that we still store the entire 64-bit unit into cpu_env.
>       */
> +    if (len_remain >= 8) {
> +        t0 = tcg_temp_new_i64();
> +        tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
> +        tcg_gen_st_i64(t0, base, vofs + len_align);
> +        len_remain -= 8;
> +        len_align += 8;
> +        if (len_remain) {
> +            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
> +        }
> +    }
>      if (len_remain) {
>          t0 = tcg_temp_new_i64();
>          switch (len_remain) {
> @@ -4223,14 +4247,14 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
>          case 4:
>          case 8:
>              tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
> -                                MO_LE | ctz32(len_remain));
> +                                MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
>              break;
>  
>          case 6:
>              t1 = tcg_temp_new_i64();
> -            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
> +            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
>              tcg_gen_addi_i64(clean_addr, clean_addr, 4);
> -            tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
> +            tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
>              tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
>              break;
>  
> @@ -4245,11 +4269,12 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
>  void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
>                   int len, int rn, int imm)
>  {
> -    int len_align = QEMU_ALIGN_DOWN(len, 8);
> -    int len_remain = len % 8;
> -    int nparts = len / 8 + ctpop8(len_remain);
> +    int len_align = QEMU_ALIGN_DOWN(len, 16);
> +    int len_remain = len % 16;
> +    int nparts = len / 16 + ctpop8(len_remain);
>      int midx = get_mem_index(s);
> -    TCGv_i64 dirty_addr, clean_addr, t0;
> +    TCGv_i64 dirty_addr, clean_addr, t0, t1;
> +    TCGv_i128 t16;
>  
>      dirty_addr = tcg_temp_new_i64();
>      tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
> @@ -4267,10 +4292,15 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
>          int i;
>  
>          t0 = tcg_temp_new_i64();
> +        t1 = tcg_temp_new_i64();
> +        t16 = tcg_temp_new_i128();
>          for (i = 0; i < len_align; i += 8) {
>              tcg_gen_ld_i64(t0, base, vofs + i);
> -            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
> -            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
> +            tcg_gen_ld_i64(t1, base, vofs + i + 8);
> +            tcg_gen_concat_i64_i128(t16, t0, t1);
> +            tcg_gen_qemu_st_i128(t16, clean_addr, midx,
> +                                 MO_LE | MO_128 | MO_ATOM_NONE);
> +            tcg_gen_addi_i64(clean_addr, clean_addr, 16);
>          }
>      } else {
>          TCGLabel *loop = gen_new_label();
> @@ -4280,18 +4310,33 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
>          gen_set_label(loop);
>  
>          t0 = tcg_temp_new_i64();
> +        t1 = tcg_temp_new_i64();
>          tp = tcg_temp_new_ptr();
>          tcg_gen_add_ptr(tp, base, i);
>          tcg_gen_ld_i64(t0, tp, vofs);
> -        tcg_gen_addi_ptr(i, i, 8);
> +        tcg_gen_ld_i64(t1, tp, vofs + 8);
> +        tcg_gen_addi_ptr(i, i, 16);
>  
> -        tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
> -        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
> +        t16 = tcg_temp_new_i128();
> +        tcg_gen_concat_i64_i128(t16, t0, t1);
> +
> +        tcg_gen_qemu_st_i128(t16, clean_addr, midx, MO_LEUQ);
> +        tcg_gen_addi_i64(clean_addr, clean_addr, 16);
>  
>          tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
>      }
>  
>      /* Predicate register stores can be any multiple of 2.  */
> +    if (len_remain >= 8) {
> +        t0 = tcg_temp_new_i64();
> +        tcg_gen_st_i64(t0, base, vofs + len_align);
> +        tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
> +        len_remain -= 8;
> +        len_align += 8;
> +        if (len_remain) {
> +            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
> +        }
> +    }
>      if (len_remain) {
>          t0 = tcg_temp_new_i64();
>          tcg_gen_ld_i64(t0, base, vofs + len_align);
> @@ -4301,14 +4346,14 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
>          case 4:
>          case 8:
>              tcg_gen_qemu_st_i64(t0, clean_addr, midx,
> -                                MO_LE | ctz32(len_remain));
> +                                MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
>              break;
>  
>          case 6:
> -            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
> +            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
>              tcg_gen_addi_i64(clean_addr, clean_addr, 4);
>              tcg_gen_shri_i64(t0, t0, 32);
> -            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
> +            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
>              break;
>  
>          default:



  reply	other threads:[~2023-06-12 15:21 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-06  9:47 [PULL 00/42] target-arm queue Peter Maydell
2023-06-06  9:47 ` [PULL 01/42] arm: move KVM breakpoints helpers Peter Maydell
2023-06-06  9:47 ` [PULL 02/42] hvf: handle access for more registers Peter Maydell
2023-06-06  9:47 ` [PULL 03/42] hvf: add breakpoint handlers Peter Maydell
2023-06-06  9:47 ` [PULL 04/42] hvf: add guest debugging handlers for Apple Silicon hosts Peter Maydell
2023-06-06  9:47 ` [PULL 05/42] hw/net/can: Introduce Xilinx Versal CANFD controller Peter Maydell
2023-06-06  9:47 ` [PULL 06/42] xlnx-versal: Connect Xilinx VERSAL CANFD controllers Peter Maydell
2023-06-06  9:47 ` [PULL 07/42] MAINTAINERS: Include canfd tests under Xilinx CAN Peter Maydell
2023-06-06  9:47 ` [PULL 08/42] tests/qtest: Introduce tests for Xilinx VERSAL CANFD controller Peter Maydell
2023-06-06  9:47 ` [PULL 09/42] hw: arm: Add bananapi M2-Ultra and allwinner-r40 support Peter Maydell
2023-06-06  9:47 ` [PULL 10/42] hw/arm/allwinner-r40: add Clock Control Unit Peter Maydell
2023-06-06  9:47 ` [PULL 11/42] hw: allwinner-r40: Complete uart devices Peter Maydell
2023-06-06  9:47 ` [PULL 12/42] hw: arm: allwinner-r40: Add i2c0 device Peter Maydell
2023-06-06  9:47 ` [PULL 13/42] hw/misc: Rename axp209 to axp22x and add support AXP221 PMU Peter Maydell
2023-06-06  9:47 ` [PULL 14/42] hw/arm/allwinner-r40: add SDRAM controller device Peter Maydell
2023-06-06  9:47 ` [PULL 15/42] hw: sd: allwinner-sdhost: Add sun50i-a64 SoC support Peter Maydell
2023-06-06  9:47 ` [PULL 16/42] hw: arm: allwinner-r40: Add emac and gmac support Peter Maydell
2023-06-06  9:47 ` [PULL 17/42] hw: arm: allwinner-sramc: Add SRAM Controller support for R40 Peter Maydell
2023-06-06  9:47 ` [PULL 18/42] tests: avocado: boot_linux_console: Add test case for bpim2u Peter Maydell
2023-06-29 11:35   ` Thomas Huth
2023-06-30  6:15     ` qianfan
2023-06-30  6:22       ` qianfan
2023-06-30  7:27       ` Thomas Huth
2023-06-30  8:45         ` qianfan
2023-06-30  8:53           ` Thomas Huth
2023-06-30  9:04             ` qianfan
2023-06-30 15:45               ` Thomas Huth
2023-07-03 11:14                 ` Peter Maydell
2023-06-06  9:47 ` [PULL 19/42] docs: system: arm: Introduce bananapi_m2u Peter Maydell
2023-06-06  9:47 ` [PULL 20/42] target/arm: Add commentary for CPUARMState.exclusive_high Peter Maydell
2023-06-06  9:47 ` [PULL 21/42] target/arm: Add feature test for FEAT_LSE2 Peter Maydell
2023-06-06  9:47 ` [PULL 22/42] target/arm: Introduce finalize_memop_{atom,pair} Peter Maydell
2023-06-06  9:47 ` [PULL 23/42] target/arm: Use tcg_gen_qemu_ld_i128 for LDXP Peter Maydell
2023-06-06  9:47 ` [PULL 24/42] target/arm: Use tcg_gen_qemu_{st, ld}_i128 for do_fp_{st, ld} Peter Maydell
2023-06-06  9:47 ` [PULL 25/42] target/arm: Use tcg_gen_qemu_st_i128 for STZG, STZ2G Peter Maydell
2023-06-06  9:47 ` [PULL 26/42] target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r Peter Maydell
2023-06-12 15:20   ` Jonathan Cameron via [this message]
2023-06-12 18:40     ` Mark Cave-Ayland
2023-06-13  9:26       ` Jonathan Cameron via
2023-06-06  9:47 ` [PULL 27/42] target/arm: Sink gen_mte_check1 into load/store_exclusive Peter Maydell
2023-06-06  9:48 ` [PULL 28/42] target/arm: Load/store integer pair with one tcg operation Peter Maydell
2023-06-06  9:48 ` [PULL 29/42] target/arm: Hoist finalize_memop out of do_gpr_{ld, st} Peter Maydell
2023-06-06  9:48 ` [PULL 30/42] target/arm: Hoist finalize_memop out of do_fp_{ld, st} Peter Maydell
2023-06-06  9:48 ` [PULL 31/42] target/arm: Pass memop to gen_mte_check1* Peter Maydell
2023-06-06  9:48 ` [PULL 32/42] target/arm: Pass single_memop to gen_mte_checkN Peter Maydell
2023-06-06  9:48 ` [PULL 33/42] target/arm: Check alignment in helper_mte_check Peter Maydell
2023-06-06  9:48 ` [PULL 34/42] target/arm: Add SCTLR.nAA to TBFLAG_A64 Peter Maydell
2023-06-06  9:48 ` [PULL 35/42] target/arm: Relax ordered/atomic alignment checks for LSE2 Peter Maydell
2023-06-06  9:48 ` [PULL 36/42] target/arm: Move mte check for store-exclusive Peter Maydell
2023-06-06  9:48 ` [PULL 37/42] tests/tcg/aarch64: Use stz2g in mte-7.c Peter Maydell
2023-06-06  9:48 ` [PULL 38/42] tests/tcg/multiarch: Adjust sigbus.c Peter Maydell
2023-06-06  9:48 ` [PULL 39/42] target/arm: Enable FEAT_LSE2 for -cpu max Peter Maydell
2023-06-06  9:48 ` [PULL 40/42] target/arm: allow DC CVA[D]P in user mode emulation Peter Maydell
2023-06-06  9:48 ` [PULL 41/42] tests/tcg/aarch64: add DC CVA[D]P tests Peter Maydell
2023-06-06  9:48 ` [PULL 42/42] target/arm: trap DCC access in user mode emulation Peter Maydell
2023-06-06 21:36 ` [PULL 00/42] target-arm queue Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230612162044.00002fdc@huawei.com \
    --to=qemu-devel@nongnu.org \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=peter.maydell@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).