qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR
@ 2025-08-03  1:40 Richard Henderson
  2025-08-03  1:40 ` [PATCH 01/11] target/arm: Add feature predicate for FEAT_CSSC Richard Henderson
                   ` (11 more replies)
  0 siblings, 12 replies; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

These two extensions are related due to sharing ID_AA64ISAR2.CSSC.

I had intended to test by bootstrapping gcc under emulation with
-march=native (that seemed the easiest way to test the short
range branches), but that failed for unrelated reasons.

I'll try again next week.


r~


Richard Henderson (11):
  target/arm: Add feature predicate for FEAT_CSSC
  target/arm: Implement MIN/MAX (immediate)
  target/arm: Implement MIN/MAX (register)
  target/arm: Split out gen_wrap2_i32 helper
  target/arm: Implement CTZ, CNT, ABS
  target/arm: Enable FEAT_CSSC for -cpu max
  target/arm: Add feature predicate for FEAT_CMPBR
  target/arm: Implement CB, CBB, CBH
  target/arm: Implement CB (immediate)
  linux-user: Change exported get_elf_hwcap to abi_ulong
  target/arm: Enable FEAT_CMPBR for -cpu max

 linux-user/loader.h            |   2 +-
 target/arm/cpu-features.h      |  10 ++
 linux-user/elfload.c           |  12 +-
 target/arm/tcg/cpu64.c         |   1 +
 target/arm/tcg/translate-a64.c | 219 +++++++++++++++++++++++++++++++--
 docs/system/arm/emulation.rst  |   2 +
 target/arm/tcg/a64.decode      |  26 ++++
 7 files changed, 256 insertions(+), 16 deletions(-)

-- 
2.43.0



^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 01/11] target/arm: Add feature predicate for FEAT_CSSC
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-15 13:06   ` Peter Maydell
  2025-08-03  1:40 ` [PATCH 02/11] target/arm: Implement MIN/MAX (immediate) Richard Henderson
                   ` (10 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu-features.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 5876162428..f12c5c8be7 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -604,6 +604,11 @@ static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id)
     return FIELD_EX64_IDREG(id, ID_AA64ISAR2, RPRES);
 }
 
+static inline bool isar_feature_aa64_cssc(const ARMISARegisters *id)
+{
+    return FIELD_EX64_IDREG(id, ID_AA64ISAR2, CSSC) != 0;
+}
+
 static inline bool isar_feature_aa64_lut(const ARMISARegisters *id)
 {
     return FIELD_EX64_IDREG(id, ID_AA64ISAR2, LUT);
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 02/11] target/arm: Implement MIN/MAX (immediate)
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
  2025-08-03  1:40 ` [PATCH 01/11] target/arm: Add feature predicate for FEAT_CSSC Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-15 13:18   ` Peter Maydell
  2025-08-03  1:40 ` [PATCH 03/11] target/arm: Implement MIN/MAX (register) Richard Henderson
                   ` (9 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-a64.c | 44 ++++++++++++++++++++++++++++++++++
 target/arm/tcg/a64.decode      | 10 ++++++++
 2 files changed, 54 insertions(+)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index dbf47595db..b70ae5befd 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -4552,6 +4552,50 @@ TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
 
+/*
+ * Min/Max (immediate)
+ */
+
+static void gen_wrap3_i32(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, NeonGenTwoOpFn fn)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_extrl_i64_i32(t1, n);
+    tcg_gen_extrl_i64_i32(t2, m);
+    fn(t1, t1, t2);
+    tcg_gen_extu_i32_i64(d, t1);
+}
+
+static void gen_smax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+    gen_wrap3_i32(d, n, m, tcg_gen_smax_i32);
+}
+
+static void gen_smin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+    gen_wrap3_i32(d, n, m, tcg_gen_smin_i32);
+}
+
+static void gen_umax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+    gen_wrap3_i32(d, n, m, tcg_gen_umax_i32);
+}
+
+static void gen_umin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+    gen_wrap3_i32(d, n, m, tcg_gen_umin_i32);
+}
+
+TRANS_FEAT(SMAX_i, aa64_cssc, gen_rri, a, 0, 0,
+           a->sf ? tcg_gen_smax_i64 : gen_smax32_i64)
+TRANS_FEAT(SMIN_i, aa64_cssc, gen_rri, a, 0, 0,
+           a->sf ? tcg_gen_smin_i64 : gen_smin32_i64)
+TRANS_FEAT(UMAX_i, aa64_cssc, gen_rri, a, 0, 0,
+           a->sf ? tcg_gen_umax_i64 : gen_umax32_i64)
+TRANS_FEAT(UMIN_i, aa64_cssc, gen_rri, a, 0, 0,
+           a->sf ? tcg_gen_umin_i64 : gen_umin32_i64)
+
 /*
  * Add/subtract (immediate, with tags)
  */
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 8c798cde2b..c1811b0274 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -156,6 +156,16 @@ MOVZ            . 10 100101 .. ................ .....   @movw_32
 MOVK            . 11 100101 .. ................ .....   @movw_64
 MOVK            . 11 100101 .. ................ .....   @movw_32
 
+# Min/Max (immediate)
+
+@minmaxi_s      sf:1 .. ........... imm:s8 rn:5 rd:5    &rri_sf
+@minmaxi_u      sf:1 .. ........... imm:8  rn:5 rd:5    &rri_sf
+
+SMAX_i          . 00 1000111 0000 ........ ..... .....  @minmaxi_s
+SMIN_i          . 00 1000111 0010 ........ ..... .....  @minmaxi_s
+UMAX_i          . 00 1000111 0001 ........ ..... .....  @minmaxi_u
+UMIN_i          . 00 1000111 0011 ........ ..... .....  @minmaxi_u
+
 # Bitfield
 
 &bitfield       rd rn sf immr imms
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 03/11] target/arm: Implement MIN/MAX (register)
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
  2025-08-03  1:40 ` [PATCH 01/11] target/arm: Add feature predicate for FEAT_CSSC Richard Henderson
  2025-08-03  1:40 ` [PATCH 02/11] target/arm: Implement MIN/MAX (immediate) Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-15 13:20   ` Peter Maydell
  2025-08-03  1:40 ` [PATCH 04/11] target/arm: Split out gen_wrap2_i32 helper Richard Henderson
                   ` (8 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-a64.c | 22 ++++++++++++++++++++++
 target/arm/tcg/a64.decode      |  5 +++++
 2 files changed, 27 insertions(+)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index b70ae5befd..bb92bdc296 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -8201,6 +8201,28 @@ static bool trans_PACGA(DisasContext *s, arg_rrr *a)
     return false;
 }
 
+static bool gen_rrr(DisasContext *s, arg_rrr_sf *a, ArithTwoOp fn)
+{
+    TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
+    TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
+    TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+
+    fn(tcg_rd, tcg_rn, tcg_rm);
+    if (!a->sf) {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+    }
+    return true;
+}
+
+TRANS_FEAT(SMAX, aa64_cssc, gen_rrr, a,
+           a->sf ? tcg_gen_smax_i64 : gen_smax32_i64)
+TRANS_FEAT(SMIN, aa64_cssc, gen_rrr, a,
+           a->sf ? tcg_gen_smin_i64 : gen_smin32_i64)
+TRANS_FEAT(UMAX, aa64_cssc, gen_rrr, a,
+           a->sf ? tcg_gen_umax_i64 : gen_umax32_i64)
+TRANS_FEAT(UMIN, aa64_cssc, gen_rrr, a,
+           a->sf ? tcg_gen_umin_i64 : gen_umin32_i64)
+
 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
 
 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index c1811b0274..a886b3ba4c 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -708,6 +708,11 @@ GMI             1 00 11010110 ..... 000101 ..... .....  @rrr
 
 PACGA           1 00 11010110 ..... 001100 ..... .....  @rrr
 
+SMAX            . 00 11010110 ..... 011000 ..... .....  @rrr_sf
+SMIN            . 00 11010110 ..... 011010 ..... .....  @rrr_sf
+UMAX            . 00 11010110 ..... 011001 ..... .....  @rrr_sf
+UMIN            . 00 11010110 ..... 011011 ..... .....  @rrr_sf
+
 # Data Processing (1-source)
 
 @rr             . .......... ..... ...... rn:5 rd:5     &rr
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 04/11] target/arm: Split out gen_wrap2_i32 helper
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (2 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 03/11] target/arm: Implement MIN/MAX (register) Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-15 13:09   ` Peter Maydell
  2025-08-15 16:05   ` Peter Maydell
  2025-08-03  1:40 ` [PATCH 05/11] target/arm: Implement CTZ, CNT, ABS Richard Henderson
                   ` (7 subsequent siblings)
  11 siblings, 2 replies; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Wrapper to extract the low 32 bits, perform an operation,
and zero-extend back to 64 bits.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-a64.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index bb92bdc296..64a845d5fb 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -8231,13 +8231,18 @@ static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
     return true;
 }
 
+static void gen_wrap2_i32(TCGv_i64 d, TCGv_i64 n, NeonGenOneOpFn fn)
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+
+    tcg_gen_extrl_i64_i32(t, n);
+    fn(t, t);
+    tcg_gen_extu_i32_i64(d, t);
+}
+
 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
 {
-    TCGv_i32 t32 = tcg_temp_new_i32();
-
-    tcg_gen_extrl_i64_i32(t32, tcg_rn);
-    gen_helper_rbit(t32, t32);
-    tcg_gen_extu_i32_i64(tcg_rd, t32);
+    gen_wrap2_i32(tcg_rn, tcg_rn, gen_helper_rbit);
 }
 
 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
@@ -8293,11 +8298,7 @@ static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
 
 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
 {
-    TCGv_i32 t32 = tcg_temp_new_i32();
-
-    tcg_gen_extrl_i64_i32(t32, tcg_rn);
-    tcg_gen_clrsb_i32(t32, t32);
-    tcg_gen_extu_i32_i64(tcg_rd, t32);
+    gen_wrap2_i32(tcg_rn, tcg_rn, tcg_gen_clrsb_i32);
 }
 
 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 05/11] target/arm: Implement CTZ, CNT, ABS
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (3 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 04/11] target/arm: Split out gen_wrap2_i32 helper Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-15 13:26   ` Peter Maydell
  2025-08-26 16:11   ` Peter Maydell
  2025-08-03  1:40 ` [PATCH 06/11] target/arm: Enable FEAT_CSSC for -cpu max Richard Henderson
                   ` (6 subsequent siblings)
  11 siblings, 2 replies; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-a64.c | 31 +++++++++++++++++++++++++++++++
 target/arm/tcg/a64.decode      |  4 ++++
 2 files changed, 35 insertions(+)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 64a845d5fb..0c78d4bb79 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -8304,6 +8304,37 @@ static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
 
+static void gen_ctz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+    TCGv_i32 t32 = tcg_temp_new_i32();
+
+    tcg_gen_extrl_i64_i32(t32, tcg_rn);
+    tcg_gen_ctzi_i32(t32, t32, 32);
+    tcg_gen_extu_i32_i64(tcg_rd, t32);
+}
+
+static void gen_ctz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+    tcg_gen_ctzi_i64(tcg_rd, tcg_rn, 64);
+}
+
+static void gen_cnt32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+    gen_wrap2_i32(tcg_rn, tcg_rn, tcg_gen_ctpop_i32);
+}
+
+static void gen_abs32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
+{
+    gen_wrap2_i32(tcg_rn, tcg_rn, tcg_gen_abs_i32);
+}
+
+TRANS_FEAT(CTZ, aa64_cssc, gen_rr, a->rd, a->rn,
+           a->sf ? gen_ctz64 : gen_ctz32)
+TRANS_FEAT(CNT, aa64_cssc, gen_rr, a->rd, a->rn,
+           a->sf ? tcg_gen_ctpop_i64 : gen_cnt32)
+TRANS_FEAT(ABS, aa64_cssc, gen_rr, a->rd, a->rn,
+           a->sf ? tcg_gen_abs_i64 : gen_abs32)
+
 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
 {
     TCGv_i64 tcg_rd, tcg_rn;
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index a886b3ba4c..766c610c01 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -726,6 +726,10 @@ REV64           1 10 11010110 00000 000011 ..... .....  @rr
 CLZ             . 10 11010110 00000 000100 ..... .....  @rr_sf
 CLS             . 10 11010110 00000 000101 ..... .....  @rr_sf
 
+CTZ             . 10 11010110 00000 000110 ..... .....  @rr_sf
+CNT             . 10 11010110 00000 000111 ..... .....  @rr_sf
+ABS             . 10 11010110 00000 001000 ..... .....  @rr_sf
+
 &pacaut         rd rn z
 @pacaut         . .. ........ ..... .. z:1 ... rn:5 rd:5  &pacaut
 
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 06/11] target/arm: Enable FEAT_CSSC for -cpu max
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (4 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 05/11] target/arm: Implement CTZ, CNT, ABS Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-15 13:10   ` Peter Maydell
  2025-08-03  1:40 ` [PATCH 07/11] target/arm: Add feature predicate for FEAT_CMPBR Richard Henderson
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/elfload.c          | 1 +
 target/arm/tcg/cpu64.c        | 1 +
 docs/system/arm/emulation.rst | 1 +
 3 files changed, 3 insertions(+)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index ea214105ff..baa618d6c2 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -923,6 +923,7 @@ uint64_t get_elf_hwcap2(void)
     GET_FEATURE_ID(aa64_sme_b16b16, ARM_HWCAP2_A64_SME_B16B16);
     GET_FEATURE_ID(aa64_sme_f16f16, ARM_HWCAP2_A64_SME_F16F16);
     GET_FEATURE_ID(aa64_sve_b16b16, ARM_HWCAP2_A64_SVE_B16B16);
+    GET_FEATURE_ID(aa64_cssc, ARM_HWCAP2_A64_CSSC);
 
     return hwcaps;
 }
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 35cddbafa4..d4c021e34d 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1178,6 +1178,7 @@ void aarch64_max_tcg_initfn(Object *obj)
     t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1);     /* FEAT_MOPS */
     t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1);       /* FEAT_HBC */
     t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2);     /* FEAT_WFxT */
+    t = FIELD_DP64(t, ID_AA64ISAR2, CSSC, 1);     /* FEAT_CSSC */
     SET_IDREG(isar, ID_AA64ISAR2, t);
 
     t = GET_IDREG(isar, ID_AA64PFR0);
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index 890dc6fee2..b9b11e82b3 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -30,6 +30,7 @@ the following architecture extensions:
 - FEAT_CMOW (Control for cache maintenance permission)
 - FEAT_CRC32 (CRC32 instructions)
 - FEAT_Crypto (Cryptographic Extension)
+- FEAT_CSSC (Common Short Sequence Compression instructions)
 - FEAT_CSV2 (Cache speculation variant 2)
 - FEAT_CSV2_1p1 (Cache speculation variant 2, version 1.1)
 - FEAT_CSV2_1p2 (Cache speculation variant 2, version 1.2)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 07/11] target/arm: Add feature predicate for FEAT_CMPBR
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (5 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 06/11] target/arm: Enable FEAT_CSSC for -cpu max Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-03  1:40 ` [PATCH 08/11] target/arm: Implement CB, CBB, CBH Richard Henderson
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu-features.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index f12c5c8be7..744199b776 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -609,6 +609,11 @@ static inline bool isar_feature_aa64_cssc(const ARMISARegisters *id)
     return FIELD_EX64_IDREG(id, ID_AA64ISAR2, CSSC) != 0;
 }
 
+static inline bool isar_feature_aa64_cmpbr(const ARMISARegisters *id)
+{
+    return FIELD_EX64_IDREG(id, ID_AA64ISAR2, CSSC) >= 2;
+}
+
 static inline bool isar_feature_aa64_lut(const ARMISARegisters *id)
 {
     return FIELD_EX64_IDREG(id, ID_AA64ISAR2, LUT);
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 08/11] target/arm: Implement CB, CBB, CBH
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (6 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 07/11] target/arm: Add feature predicate for FEAT_CMPBR Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-03 12:31   ` Paolo Bonzini
  2025-08-03  1:40 ` [PATCH 09/11] target/arm: Implement CB (immediate) Richard Henderson
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Compare and branch instructions, with various operand widths.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-a64.c | 60 ++++++++++++++++++++++++++++++++++
 target/arm/tcg/a64.decode      |  5 +++
 2 files changed, 65 insertions(+)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 0c78d4bb79..f3970ac599 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -1706,6 +1706,66 @@ static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
     return true;
 }
 
+static bool do_cb_cond(DisasContext *s, int cc, int imm,
+                       int rt, int rm, MemOp mop)
+{
+    static const TCGCond cb_cond[8] = {
+        [0] = TCG_COND_GT,
+        [1] = TCG_COND_GE,
+        [2] = TCG_COND_GTU,
+        [3] = TCG_COND_GEU,
+        [4] = TCG_COND_NEVER,  /* reserved */
+        [5] = TCG_COND_NEVER,  /* reserved */
+        [6] = TCG_COND_EQ,
+        [7] = TCG_COND_NE,
+    };
+    TCGCond cond = cb_cond[cc];
+    TCGv_i64 t, m;
+
+    if (!dc_isar_feature(aa64_cmpbr, s) || cond == TCG_COND_NEVER) {
+        return false;
+    }
+
+    t = cpu_reg(s, rt);
+    m = cpu_reg(s, rm);
+    if (mop != MO_64) {
+        TCGv_i64 tt = tcg_temp_new_i64();
+        TCGv_i64 tm = tcg_temp_new_i64();
+
+        if (is_signed_cond(cond)) {
+            mop |= MO_SIGN;
+        }
+        tcg_gen_ext_i64(tt, t, mop);
+        tcg_gen_ext_i64(tm, m, mop);
+        t = tt;
+        m = tm;
+    }
+
+    reset_btype(s);
+    DisasLabel match = gen_disas_label(s);
+
+    tcg_gen_brcond_i64(cond, t, m, match.label);
+    gen_goto_tb(s, 0, 4);
+    set_disas_label(s, match);
+    gen_goto_tb(s, 1, imm);
+    return true;
+}
+
+static bool trans_CB_cond(DisasContext *s, arg_CB_cond *a)
+{
+    return do_cb_cond(s, a->cc, a->imm, a->rt, a->rm, a->sf ? 64 : 32);
+}
+
+static bool trans_CBB_cond(DisasContext *s, arg_CBB_cond *a)
+{
+    return do_cb_cond(s, a->cc, a->imm, a->rt, a->rm, 8);
+}
+
+static bool trans_CBH_cond(DisasContext *s, arg_CBH_cond *a)
+{
+    return do_cb_cond(s, a->cc, a->imm, 16, a->rt, a->rm);
+}
+
 static void set_btype_for_br(DisasContext *s, int rn)
 {
     if (dc_isar_feature(aa64_bti, s)) {
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 766c610c01..fa94631123 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -208,6 +208,11 @@ TBZ             . 011011 nz:1 ..... .............. rt:5 &tbz  imm=%imm14 bitpos=
 # B.cond and BC.cond
 B_cond          0101010 0 ................... c:1 cond:4 imm=%imm19
 
+%imm9           5:s9   !function=times_4
+CB_cond         sf:1 1110100 cc:3 rm:5 00 ......... rt:5    imm=%imm9
+CBB_cond        0    1110100 cc:3 rm:5 10 ......... rt:5    imm=%imm9
+CBH_cond        0    1110100 cc:3 rm:5 11 ......... rt:5    imm=%imm9
+
 BR              1101011 0000 11111 000000 rn:5 00000 &r
 BLR             1101011 0001 11111 000000 rn:5 00000 &r
 RET             1101011 0010 11111 000000 rn:5 00000 &r
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 09/11] target/arm: Implement CB (immediate)
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (7 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 08/11] target/arm: Implement CB, CBB, CBH Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-03  1:40 ` [PATCH 10/11] linux-user: Change exported get_elf_hwcap to abi_ulong Richard Henderson
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-a64.c | 41 ++++++++++++++++++++++++++++++++++
 target/arm/tcg/a64.decode      |  2 ++
 2 files changed, 43 insertions(+)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index f3970ac599..c47d4d0a0b 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -1766,6 +1766,47 @@ static bool trans_CBH_cond(DisasContext *s, arg_CBH_cond *a)
     return do_cb_cond(s, a->cc, a->imm, 16, a->rt, a->rm);
 }
 
+static bool trans_CB_cond_imm(DisasContext *s, arg_CB_cond_imm *a)
+{
+    static const TCGCond cb_cond[8] = {
+        [0] = TCG_COND_GT,
+        [1] = TCG_COND_LT,
+        [2] = TCG_COND_GTU,
+        [3] = TCG_COND_LTU,
+        [4] = TCG_COND_NEVER,  /* reserved */
+        [5] = TCG_COND_NEVER,  /* reserved */
+        [6] = TCG_COND_EQ,
+        [7] = TCG_COND_NE,
+    };
+    TCGCond cond = cb_cond[a->cc];
+    TCGv_i64 t;
+
+    if (!dc_isar_feature(aa64_cmpbr, s) || cond == TCG_COND_NEVER) {
+        return false;
+    }
+
+    t = cpu_reg(s, a->rt);
+    if (!a->sf) {
+        TCGv_i64 tt = tcg_temp_new_i64();
+
+        if (is_signed_cond(cond)) {
+            tcg_gen_ext32s_i64(tt, t);
+        } else {
+            tcg_gen_ext32u_i64(tt, t);
+        }
+        t = tt;
+    }
+
+    reset_btype(s);
+    DisasLabel match = gen_disas_label(s);
+
+    tcg_gen_brcondi_i64(cond, t, a->imm6, match.label);
+    gen_goto_tb(s, 0, 4);
+    set_disas_label(s, match);
+    gen_goto_tb(s, 1, a->imm9);
+    return true;
+}
+
 static void set_btype_for_br(DisasContext *s, int rn)
 {
     if (dc_isar_feature(aa64_bti, s)) {
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index fa94631123..02e0aa1fe4 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -213,6 +213,8 @@ CB_cond         sf:1 1110100 cc:3 rm:5 00 ......... rt:5    imm=%imm9
 CBB_cond        0    1110100 cc:3 rm:5 10 ......... rt:5    imm=%imm9
 CBH_cond        0    1110100 cc:3 rm:5 11 ......... rt:5    imm=%imm9
 
+CB_cond_imm     sf:1 1110101 cc:3 imm6:6 0 ......... rt:5   %imm9
+
 BR              1101011 0000 11111 000000 rn:5 00000 &r
 BLR             1101011 0001 11111 000000 rn:5 00000 &r
 RET             1101011 0010 11111 000000 rn:5 00000 &r
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 10/11] linux-user: Change exported get_elf_hwcap to abi_ulong
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (8 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 09/11] target/arm: Implement CB (immediate) Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-15 13:28   ` Peter Maydell
  2025-08-03  1:40 ` [PATCH 11/11] target/arm: Enable FEAT_CMPBR for -cpu max Richard Henderson
  2025-08-15 14:10 ` [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Peter Maydell
  11 siblings, 1 reply; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

AArch64 is about to add the first 64-bit hwcap bit, so we
have to expand the return type.  Since the only user
assigns this to a abi_ulong, match that type.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/loader.h  |  2 +-
 linux-user/elfload.c | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/linux-user/loader.h b/linux-user/loader.h
index e102e6f410..e1badd4393 100644
--- a/linux-user/loader.h
+++ b/linux-user/loader.h
@@ -99,7 +99,7 @@ abi_long memcpy_to_target(abi_ulong dest, const void *src,
 extern unsigned long guest_stack_size;
 
 #if defined(TARGET_S390X) || defined(TARGET_AARCH64) || defined(TARGET_ARM)
-uint32_t get_elf_hwcap(void);
+abi_ulong get_elf_hwcap(void);
 const char *elf_hwcap_str(uint32_t bit);
 #endif
 #if defined(TARGET_AARCH64) || defined(TARGET_ARM)
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index baa618d6c2..4ab6bcd50d 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -511,10 +511,10 @@ static bool init_guest_commpage(void)
 #define ELF_HWCAP get_elf_hwcap()
 #define ELF_HWCAP2 get_elf_hwcap2()
 
-uint32_t get_elf_hwcap(void)
+abi_ulong get_elf_hwcap(void)
 {
     ARMCPU *cpu = ARM_CPU(thread_cpu);
-    uint32_t hwcaps = 0;
+    abi_ulong hwcaps = 0;
 
     hwcaps |= ARM_HWCAP_ARM_SWP;
     hwcaps |= ARM_HWCAP_ARM_HALF;
@@ -841,10 +841,10 @@ enum {
 #define GET_FEATURE_ID(feat, hwcap) \
     do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0)
 
-uint32_t get_elf_hwcap(void)
+abi_ulong get_elf_hwcap(void)
 {
     ARMCPU *cpu = ARM_CPU(thread_cpu);
-    uint32_t hwcaps = 0;
+    abi_ulong hwcaps = 0;
 
     hwcaps |= ARM_HWCAP_A64_FP;
     hwcaps |= ARM_HWCAP_A64_ASIMD;
@@ -1825,7 +1825,7 @@ static inline void init_thread(struct target_pt_regs *regs,
 #define GET_FEATURE(_feat, _hwcap) \
     do { if (s390_has_feat(_feat)) { hwcap |= _hwcap; } } while (0)
 
-uint32_t get_elf_hwcap(void)
+abi_ulong get_elf_hwcap(void)
 {
     /*
      * Let's assume we always have esan3 and zarch.
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 11/11] target/arm: Enable FEAT_CMPBR for -cpu max
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (9 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 10/11] linux-user: Change exported get_elf_hwcap to abi_ulong Richard Henderson
@ 2025-08-03  1:40 ` Richard Henderson
  2025-08-15 14:10 ` [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Peter Maydell
  11 siblings, 0 replies; 25+ messages in thread
From: Richard Henderson @ 2025-08-03  1:40 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/elfload.c          | 1 +
 target/arm/tcg/cpu64.c        | 2 +-
 docs/system/arm/emulation.rst | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 4ab6bcd50d..b2461aad2c 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -877,6 +877,7 @@ abi_ulong get_elf_hwcap(void)
     GET_FEATURE_ID(aa64_dcpop, ARM_HWCAP_A64_DCPOP);
     GET_FEATURE_ID(aa64_rcpc_8_3, ARM_HWCAP_A64_LRCPC);
     GET_FEATURE_ID(aa64_rcpc_8_4, ARM_HWCAP_A64_ILRCPC);
+    GET_FEATURE_ID(aa64_cmpbr, ARM_HWCAP_A64_CMPBR);
 
     return hwcaps;
 }
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index d4c021e34d..d0ecd517e7 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1178,7 +1178,7 @@ void aarch64_max_tcg_initfn(Object *obj)
     t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1);     /* FEAT_MOPS */
     t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1);       /* FEAT_HBC */
     t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2);     /* FEAT_WFxT */
-    t = FIELD_DP64(t, ID_AA64ISAR2, CSSC, 1);     /* FEAT_CSSC */
+    t = FIELD_DP64(t, ID_AA64ISAR2, CSSC, 2);     /* FEAT_CSSC, FEAT_CMPBR */
     SET_IDREG(isar, ID_AA64ISAR2, t);
 
     t = GET_IDREG(isar, ID_AA64PFR0);
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index b9b11e82b3..1b7f7881fe 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -28,6 +28,7 @@ the following architecture extensions:
 - FEAT_BTI (Branch Target Identification)
 - FEAT_CCIDX (Extended cache index)
 - FEAT_CMOW (Control for cache maintenance permission)
+- FEAT_CMPBR (Compare and Branch instructions)
 - FEAT_CRC32 (CRC32 instructions)
 - FEAT_Crypto (Cryptographic Extension)
 - FEAT_CSSC (Common Short Sequence Compression instructions)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 08/11] target/arm: Implement CB, CBB, CBH
  2025-08-03  1:40 ` [PATCH 08/11] target/arm: Implement CB, CBB, CBH Richard Henderson
@ 2025-08-03 12:31   ` Paolo Bonzini
  2025-08-03 20:28     ` Richard Henderson
  0 siblings, 1 reply; 25+ messages in thread
From: Paolo Bonzini @ 2025-08-03 12:31 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: qemu-arm

On 8/3/25 03:40, Richard Henderson wrote:
> +static bool do_cb_cond(DisasContext *s, int cc, int imm,
> +                       int rt, int rm, MemOp mop)

This is a memop...

> +static bool trans_CB_cond(DisasContext *s, arg_CB_cond *a)
> +{
> +    return do_cb_cond(s, a->cc, a->imm, a->rt, a->rm, a->sf ? 64 : 32);
> +}
> +
> +static bool trans_CBB_cond(DisasContext *s, arg_CBB_cond *a)
> +{
> +    return do_cb_cond(s, a->cc, a->imm, a->rt, a->rm, 8);
> +}

... but these use bits

> +static bool trans_CBH_cond(DisasContext *s, arg_CBH_cond *a)
> +{
> +    return do_cb_cond(s, a->cc, a->imm, 16, a->rt, a->rm);
> +}
> +

... and this one also has arguments in the wrong order.

Paolo



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 08/11] target/arm: Implement CB, CBB, CBH
  2025-08-03 12:31   ` Paolo Bonzini
@ 2025-08-03 20:28     ` Richard Henderson
  0 siblings, 0 replies; 25+ messages in thread
From: Richard Henderson @ 2025-08-03 20:28 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: qemu-arm

On 8/3/25 22:31, Paolo Bonzini wrote:
> On 8/3/25 03:40, Richard Henderson wrote:
>> +static bool do_cb_cond(DisasContext *s, int cc, int imm,
>> +                       int rt, int rm, MemOp mop)
> 
> This is a memop...
> 
>> +static bool trans_CB_cond(DisasContext *s, arg_CB_cond *a)
>> +{
>> +    return do_cb_cond(s, a->cc, a->imm, a->rt, a->rm, a->sf ? 64 : 32);
>> +}
>> +
>> +static bool trans_CBB_cond(DisasContext *s, arg_CBB_cond *a)
>> +{
>> +    return do_cb_cond(s, a->cc, a->imm, a->rt, a->rm, 8);
>> +}
> 
> ... but these use bits
> 
>> +static bool trans_CBH_cond(DisasContext *s, arg_CBH_cond *a)
>> +{
>> +    return do_cb_cond(s, a->cc, a->imm, 16, a->rt, a->rm);
>> +}
>> +
> 
> ... and this one also has arguments in the wrong order.

Gah.  Thanks.  I've reorganized the code to avoid this entirely.

Now to fix a gcc crash when emitting code for cmpbr...  :-)


r~


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 01/11] target/arm: Add feature predicate for FEAT_CSSC
  2025-08-03  1:40 ` [PATCH 01/11] target/arm: Add feature predicate for FEAT_CSSC Richard Henderson
@ 2025-08-15 13:06   ` Peter Maydell
  0 siblings, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 13:06 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:40, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/cpu-features.h | 5 +++++
>  1 file changed, 5 insertions(+)
>
> diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
> index 5876162428..f12c5c8be7 100644
> --- a/target/arm/cpu-features.h
> +++ b/target/arm/cpu-features.h
> @@ -604,6 +604,11 @@ static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id)
>      return FIELD_EX64_IDREG(id, ID_AA64ISAR2, RPRES);
>  }
>
> +static inline bool isar_feature_aa64_cssc(const ARMISARegisters *id)
> +{
> +    return FIELD_EX64_IDREG(id, ID_AA64ISAR2, CSSC) != 0;
> +}
> +
>  static inline bool isar_feature_aa64_lut(const ARMISARegisters *id)
>  {
>      return FIELD_EX64_IDREG(id, ID_AA64ISAR2, LUT);
> --
> 2.43.0

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 04/11] target/arm: Split out gen_wrap2_i32 helper
  2025-08-03  1:40 ` [PATCH 04/11] target/arm: Split out gen_wrap2_i32 helper Richard Henderson
@ 2025-08-15 13:09   ` Peter Maydell
  2025-08-15 16:05   ` Peter Maydell
  1 sibling, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 13:09 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:42, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Wrapper to extract the low 32 bits, perform an operation,
> and zero-extend back to 64 bits.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tcg/translate-a64.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
> index bb92bdc296..64a845d5fb 100644
> --- a/target/arm/tcg/translate-a64.c
> +++ b/target/arm/tcg/translate-a64.c
> @@ -8231,13 +8231,18 @@ static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
>      return true;
>  }
>

A brief comment here would help:

  /*
   * Perform 32-bit operation fn on the low half of n;
   * the high half of the output is zeroed.
   */
> +static void gen_wrap2_i32(TCGv_i64 d, TCGv_i64 n, NeonGenOneOpFn fn)
> +{
> +    TCGv_i32 t = tcg_temp_new_i32();
> +
> +    tcg_gen_extrl_i64_i32(t, n);
> +    fn(t, t);
> +    tcg_gen_extu_i32_i64(d, t);
> +}
> +

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 06/11] target/arm: Enable FEAT_CSSC for -cpu max
  2025-08-03  1:40 ` [PATCH 06/11] target/arm: Enable FEAT_CSSC for -cpu max Richard Henderson
@ 2025-08-15 13:10   ` Peter Maydell
  0 siblings, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 13:10 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:41, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 02/11] target/arm: Implement MIN/MAX (immediate)
  2025-08-03  1:40 ` [PATCH 02/11] target/arm: Implement MIN/MAX (immediate) Richard Henderson
@ 2025-08-15 13:18   ` Peter Maydell
  2025-08-15 13:33     ` Richard Henderson
  0 siblings, 1 reply; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 13:18 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:41, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tcg/translate-a64.c | 44 ++++++++++++++++++++++++++++++++++
>  target/arm/tcg/a64.decode      | 10 ++++++++
>  2 files changed, 54 insertions(+)
>
> diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
> index dbf47595db..b70ae5befd 100644
> --- a/target/arm/tcg/translate-a64.c
> +++ b/target/arm/tcg/translate-a64.c
> @@ -4552,6 +4552,50 @@ TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
>  TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
>  TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
>
> +/*
> + * Min/Max (immediate)
> + */
> +
> +static void gen_wrap3_i32(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, NeonGenTwoOpFn fn)
> +{
> +    TCGv_i32 t1 = tcg_temp_new_i32();
> +    TCGv_i32 t2 = tcg_temp_new_i32();
> +
> +    tcg_gen_extrl_i64_i32(t1, n);
> +    tcg_gen_extrl_i64_i32(t2, m);
> +    fn(t1, t1, t2);
> +    tcg_gen_extu_i32_i64(d, t1);
> +}
> +
> +static void gen_smax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
> +{
> +    gen_wrap3_i32(d, n, m, tcg_gen_smax_i32);
> +}
> +
> +static void gen_smin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
> +{
> +    gen_wrap3_i32(d, n, m, tcg_gen_smin_i32);
> +}
> +
> +static void gen_umax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
> +{
> +    gen_wrap3_i32(d, n, m, tcg_gen_umax_i32);
> +}
> +
> +static void gen_umin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
> +{
> +    gen_wrap3_i32(d, n, m, tcg_gen_umin_i32);
> +}
> +
> +TRANS_FEAT(SMAX_i, aa64_cssc, gen_rri, a, 0, 0,
> +           a->sf ? tcg_gen_smax_i64 : gen_smax32_i64)
> +TRANS_FEAT(SMIN_i, aa64_cssc, gen_rri, a, 0, 0,
> +           a->sf ? tcg_gen_smin_i64 : gen_smin32_i64)
> +TRANS_FEAT(UMAX_i, aa64_cssc, gen_rri, a, 0, 0,
> +           a->sf ? tcg_gen_umax_i64 : gen_umax32_i64)
> +TRANS_FEAT(UMIN_i, aa64_cssc, gen_rri, a, 0, 0,
> +           a->sf ? tcg_gen_umin_i64 : gen_umin32_i64)

We end up doing the zero-extension twice for the 32-bit case,
once in gen_wrap3_i32(), and once in gen_rri().  Does the
extra one get optimized away ?

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 03/11] target/arm: Implement MIN/MAX (register)
  2025-08-03  1:40 ` [PATCH 03/11] target/arm: Implement MIN/MAX (register) Richard Henderson
@ 2025-08-15 13:20   ` Peter Maydell
  0 siblings, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 13:20 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:42, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tcg/translate-a64.c | 22 ++++++++++++++++++++++
>  target/arm/tcg/a64.decode      |  5 +++++
>  2 files changed, 27 insertions(+)

(this also does the zext twice I think)

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 05/11] target/arm: Implement CTZ, CNT, ABS
  2025-08-03  1:40 ` [PATCH 05/11] target/arm: Implement CTZ, CNT, ABS Richard Henderson
@ 2025-08-15 13:26   ` Peter Maydell
  2025-08-26 16:11   ` Peter Maydell
  1 sibling, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 13:26 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:41, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tcg/translate-a64.c | 31 +++++++++++++++++++++++++++++++
>  target/arm/tcg/a64.decode      |  4 ++++
>  2 files changed, 35 insertions(+)

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 10/11] linux-user: Change exported get_elf_hwcap to abi_ulong
  2025-08-03  1:40 ` [PATCH 10/11] linux-user: Change exported get_elf_hwcap to abi_ulong Richard Henderson
@ 2025-08-15 13:28   ` Peter Maydell
  0 siblings, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 13:28 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:43, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> AArch64 is about to add the first 64-bit hwcap bit, so we
> have to expand the return type.  Since the only user
> assigns this to a abi_ulong, match that type.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 02/11] target/arm: Implement MIN/MAX (immediate)
  2025-08-15 13:18   ` Peter Maydell
@ 2025-08-15 13:33     ` Richard Henderson
  0 siblings, 0 replies; 25+ messages in thread
From: Richard Henderson @ 2025-08-15 13:33 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel, qemu-arm

On 8/15/25 23:18, Peter Maydell wrote:
> We end up doing the zero-extension twice for the 32-bit case,
> once in gen_wrap3_i32(), and once in gen_rri().  Does the
> extra one get optimized away ?

Yes.  That's what the z_mask stuff in tcg/optimize.c is for.


r~


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR
  2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
                   ` (10 preceding siblings ...)
  2025-08-03  1:40 ` [PATCH 11/11] target/arm: Enable FEAT_CMPBR for -cpu max Richard Henderson
@ 2025-08-15 14:10 ` Peter Maydell
  11 siblings, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 14:10 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:40, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> These two extensions are related due to sharing ID_AA64ISAR2.CSSC.
>
> I had intended to test by bootstrapping gcc under emulation with
> -march=native (that seemed the easiest way to test the short
> range branches), but that failed for unrelated reasons.
>
> I'll try again next week.
>
>
> r~
>
>
> Richard Henderson (11):
>   target/arm: Add feature predicate for FEAT_CSSC
>   target/arm: Implement MIN/MAX (immediate)
>   target/arm: Implement MIN/MAX (register)
>   target/arm: Split out gen_wrap2_i32 helper
>   target/arm: Implement CTZ, CNT, ABS
>   target/arm: Enable FEAT_CSSC for -cpu max

I've applied this bits (FEAT_CSSC) to target-arm.next for
10.2; thanks.

-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 04/11] target/arm: Split out gen_wrap2_i32 helper
  2025-08-03  1:40 ` [PATCH 04/11] target/arm: Split out gen_wrap2_i32 helper Richard Henderson
  2025-08-15 13:09   ` Peter Maydell
@ 2025-08-15 16:05   ` Peter Maydell
  1 sibling, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-15 16:05 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:42, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Wrapper to extract the low 32 bits, perform an operation,
> and zero-extend back to 64 bits.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tcg/translate-a64.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
> index bb92bdc296..64a845d5fb 100644
> --- a/target/arm/tcg/translate-a64.c
> +++ b/target/arm/tcg/translate-a64.c
> @@ -8231,13 +8231,18 @@ static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
>      return true;
>  }
>
> +static void gen_wrap2_i32(TCGv_i64 d, TCGv_i64 n, NeonGenOneOpFn fn)
> +{
> +    TCGv_i32 t = tcg_temp_new_i32();
> +
> +    tcg_gen_extrl_i64_i32(t, n);
> +    fn(t, t);
> +    tcg_gen_extu_i32_i64(d, t);
> +}
> +
>  static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
>  {
> -    TCGv_i32 t32 = tcg_temp_new_i32();
> -
> -    tcg_gen_extrl_i64_i32(t32, tcg_rn);
> -    gen_helper_rbit(t32, t32);
> -    tcg_gen_extu_i32_i64(tcg_rd, t32);
> +    gen_wrap2_i32(tcg_rn, tcg_rn, gen_helper_rbit);

...should be (tcg_rd, tcg_rn, gen_helper_rbit);


>  }
>
>  static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
> @@ -8293,11 +8298,7 @@ static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
>
>  static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
>  {
> -    TCGv_i32 t32 = tcg_temp_new_i32();
> -
> -    tcg_gen_extrl_i64_i32(t32, tcg_rn);
> -    tcg_gen_clrsb_i32(t32, t32);
> -    tcg_gen_extu_i32_i64(tcg_rd, t32);
> +    gen_wrap2_i32(tcg_rn, tcg_rn, tcg_gen_clrsb_i32);

Ditto.

This caused the 'check-functional' tests to fail.

-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 05/11] target/arm: Implement CTZ, CNT, ABS
  2025-08-03  1:40 ` [PATCH 05/11] target/arm: Implement CTZ, CNT, ABS Richard Henderson
  2025-08-15 13:26   ` Peter Maydell
@ 2025-08-26 16:11   ` Peter Maydell
  1 sibling, 0 replies; 25+ messages in thread
From: Peter Maydell @ 2025-08-26 16:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Sun, 3 Aug 2025 at 02:41, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tcg/translate-a64.c | 31 +++++++++++++++++++++++++++++++
>  target/arm/tcg/a64.decode      |  4 ++++
>  2 files changed, 35 insertions(+)
>
> diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
> index 64a845d5fb..0c78d4bb79 100644
> --- a/target/arm/tcg/translate-a64.c
> +++ b/target/arm/tcg/translate-a64.c
> @@ -8304,6 +8304,37 @@ static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
>  TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
>  TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
>
> +static void gen_ctz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
> +{
> +    TCGv_i32 t32 = tcg_temp_new_i32();
> +
> +    tcg_gen_extrl_i64_i32(t32, tcg_rn);
> +    tcg_gen_ctzi_i32(t32, t32, 32);
> +    tcg_gen_extu_i32_i64(tcg_rd, t32);
> +}
> +
> +static void gen_ctz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
> +{
> +    tcg_gen_ctzi_i64(tcg_rd, tcg_rn, 64);
> +}
> +
> +static void gen_cnt32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
> +{
> +    gen_wrap2_i32(tcg_rn, tcg_rn, tcg_gen_ctpop_i32);
> +}
> +
> +static void gen_abs32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
> +{
> +    gen_wrap2_i32(tcg_rn, tcg_rn, tcg_gen_abs_i32);
> +}

I'm squashing in the following trivial fix:

--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -8324,12 +8324,12 @@ static void gen_ctz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)

 static void gen_cnt32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
 {
-    gen_wrap2_i32(tcg_rn, tcg_rn, tcg_gen_ctpop_i32);
+    gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_ctpop_i32);
 }

 static void gen_abs32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
 {
-    gen_wrap2_i32(tcg_rn, tcg_rn, tcg_gen_abs_i32);
+    gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_abs_i32);
 }

 TRANS_FEAT(CTZ, aa64_cssc, gen_rr, a->rd, a->rn,

-- PMM


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2025-08-26 16:16 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-03  1:40 [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Richard Henderson
2025-08-03  1:40 ` [PATCH 01/11] target/arm: Add feature predicate for FEAT_CSSC Richard Henderson
2025-08-15 13:06   ` Peter Maydell
2025-08-03  1:40 ` [PATCH 02/11] target/arm: Implement MIN/MAX (immediate) Richard Henderson
2025-08-15 13:18   ` Peter Maydell
2025-08-15 13:33     ` Richard Henderson
2025-08-03  1:40 ` [PATCH 03/11] target/arm: Implement MIN/MAX (register) Richard Henderson
2025-08-15 13:20   ` Peter Maydell
2025-08-03  1:40 ` [PATCH 04/11] target/arm: Split out gen_wrap2_i32 helper Richard Henderson
2025-08-15 13:09   ` Peter Maydell
2025-08-15 16:05   ` Peter Maydell
2025-08-03  1:40 ` [PATCH 05/11] target/arm: Implement CTZ, CNT, ABS Richard Henderson
2025-08-15 13:26   ` Peter Maydell
2025-08-26 16:11   ` Peter Maydell
2025-08-03  1:40 ` [PATCH 06/11] target/arm: Enable FEAT_CSSC for -cpu max Richard Henderson
2025-08-15 13:10   ` Peter Maydell
2025-08-03  1:40 ` [PATCH 07/11] target/arm: Add feature predicate for FEAT_CMPBR Richard Henderson
2025-08-03  1:40 ` [PATCH 08/11] target/arm: Implement CB, CBB, CBH Richard Henderson
2025-08-03 12:31   ` Paolo Bonzini
2025-08-03 20:28     ` Richard Henderson
2025-08-03  1:40 ` [PATCH 09/11] target/arm: Implement CB (immediate) Richard Henderson
2025-08-03  1:40 ` [PATCH 10/11] linux-user: Change exported get_elf_hwcap to abi_ulong Richard Henderson
2025-08-15 13:28   ` Peter Maydell
2025-08-03  1:40 ` [PATCH 11/11] target/arm: Enable FEAT_CMPBR for -cpu max Richard Henderson
2025-08-15 14:10 ` [PATCH 00/11] target/arm: Implement FEAT_CSSC, FEAT_CMPBR Peter Maydell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).