* [Qemu-devel] [PATCH v3 01/10] tcg: Introduce helpers for integer min/max
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 02/10] target/arm: Use new min/max expanders Richard Henderson
` (10 subsequent siblings)
11 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
These operations are re-invented by several targets so far.
Several supported hosts have insns for these, so place the
expanders out-of-line for a future introduction of tcg opcodes.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tcg-op.h | 16 ++++++++++++++++
tcg/tcg-op.c | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 5d2c91a1b6..0451e2752e 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -324,6 +324,10 @@ void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg);
void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
static inline void tcg_gen_discard_i32(TCGv_i32 arg)
{
@@ -517,6 +521,10 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg);
void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg);
void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
#if TCG_TARGET_REG_BITS == 64
static inline void tcg_gen_discard_i64(TCGv_i64 arg)
@@ -1025,6 +1033,10 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
#define tcg_gen_muls2_tl tcg_gen_muls2_i64
#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64
+#define tcg_gen_smin_tl tcg_gen_smin_i64
+#define tcg_gen_umin_tl tcg_gen_umin_i64
+#define tcg_gen_smax_tl tcg_gen_smax_i64
+#define tcg_gen_umax_tl tcg_gen_umax_i64
#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64
#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64
#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64
@@ -1123,6 +1135,10 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
#define tcg_gen_muls2_tl tcg_gen_muls2_i32
#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32
+#define tcg_gen_smin_tl tcg_gen_smin_i32
+#define tcg_gen_umin_tl tcg_gen_umin_i32
+#define tcg_gen_smax_tl tcg_gen_smax_i32
+#define tcg_gen_umax_tl tcg_gen_umax_i32
#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32
#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32
#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 34b96d68f3..5b82c3be8d 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1033,6 +1033,26 @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
}
}
+void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
+}
+
+void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
+}
+
+void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
+}
+
+void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
+}
+
/* 64-bit ops */
#if TCG_TARGET_REG_BITS == 32
@@ -2438,6 +2458,26 @@ void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_temp_free_i64(t2);
}
+void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
+}
+
+void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
+}
+
+void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
+}
+
+void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
+}
+
/* Size changing operations. */
void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 02/10] target/arm: Use new min/max expanders
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 01/10] tcg: Introduce helpers for integer min/max Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 03/10] target/xtensa: " Richard Henderson
` (9 subsequent siblings)
11 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
The generic expanders replace nearly identical code in the translator.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 46 ++++++++++++--------------------------
1 file changed, 14 insertions(+), 32 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index bff4e13bf6..d916fea3a3 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -6021,15 +6021,18 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
break;
case 0x0a: /* SMAXV / UMAXV */
- tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
- tcg_res,
- tcg_res, tcg_elt, tcg_res, tcg_elt);
+ if (is_u) {
+ tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
+ } else {
+ tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
+ }
break;
case 0x1a: /* SMINV / UMINV */
- tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
- tcg_res,
- tcg_res, tcg_elt, tcg_res, tcg_elt);
- break;
+ if (is_u) {
+ tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
+ } else {
+ tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
+ }
break;
default:
g_assert_not_reached();
@@ -9931,27 +9934,6 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
}
}
-/* Helper functions for 32 bit comparisons */
-static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
-}
-
-static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
-}
-
-static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
-}
-
-static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
-}
-
/* Pairwise op subgroup of C3.6.16.
*
* This is called directly or via the handle_3same_float for float pairwise
@@ -10051,7 +10033,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
{ gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
- { gen_max_s32, gen_max_u32 },
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
};
genfn = fns[size][u];
break;
@@ -10061,7 +10043,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
{ gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
- { gen_min_s32, gen_min_u32 },
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
};
genfn = fns[size][u];
break;
@@ -10516,7 +10498,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
{ gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
- { gen_max_s32, gen_max_u32 },
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
};
genfn = fns[size][u];
break;
@@ -10527,7 +10509,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
{ gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
- { gen_min_s32, gen_min_u32 },
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
};
genfn = fns[size][u];
break;
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 03/10] target/xtensa: Use new min/max expanders
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 01/10] tcg: Introduce helpers for integer min/max Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 02/10] target/arm: Use new min/max expanders Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 04/10] tcg: Introduce atomic helpers for integer min/max Richard Henderson
` (8 subsequent siblings)
11 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
The generic expanders replace nearly identical code in the translator.
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/xtensa/translate.c | 50 ++++++++++++++++++++++++++-------------
1 file changed, 33 insertions(+), 17 deletions(-)
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index 4f6d03059f..bad5cdb009 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -1527,10 +1527,8 @@ static void translate_clamps(DisasContext *dc, const uint32_t arg[],
TCGv_i32 tmp1 = tcg_const_i32(-1u << arg[2]);
TCGv_i32 tmp2 = tcg_const_i32((1 << arg[2]) - 1);
- tcg_gen_movcond_i32(TCG_COND_GT, tmp1,
- cpu_R[arg[1]], tmp1, cpu_R[arg[1]], tmp1);
- tcg_gen_movcond_i32(TCG_COND_LT, cpu_R[arg[0]],
- tmp1, tmp2, tmp1, tmp2);
+ tcg_gen_smax_i32(tmp1, tmp1, cpu_R[arg[1]]);
+ tcg_gen_smin_i32(cpu_R[arg[0]], tmp1, tmp2);
tcg_temp_free(tmp1);
tcg_temp_free(tmp2);
}
@@ -1855,13 +1853,35 @@ static void translate_memw(DisasContext *dc, const uint32_t arg[],
tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
}
-static void translate_minmax(DisasContext *dc, const uint32_t arg[],
- const uint32_t par[])
+static void translate_smin(DisasContext *dc, const uint32_t arg[],
+ const uint32_t par[])
{
if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
- tcg_gen_movcond_i32(par[0], cpu_R[arg[0]],
- cpu_R[arg[1]], cpu_R[arg[2]],
- cpu_R[arg[1]], cpu_R[arg[2]]);
+ tcg_gen_smin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
+ }
+}
+
+static void translate_umin(DisasContext *dc, const uint32_t arg[],
+ const uint32_t par[])
+{
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
+ tcg_gen_umin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
+ }
+}
+
+static void translate_smax(DisasContext *dc, const uint32_t arg[],
+ const uint32_t par[])
+{
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
+ tcg_gen_smax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
+ }
+}
+
+static void translate_umax(DisasContext *dc, const uint32_t arg[],
+ const uint32_t par[])
+{
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
+ tcg_gen_umax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
}
}
@@ -2984,23 +3004,19 @@ static const XtensaOpcodeOps core_ops[] = {
.par = (const uint32_t[]){TCG_COND_NE},
}, {
.name = "max",
- .translate = translate_minmax,
- .par = (const uint32_t[]){TCG_COND_GE},
+ .translate = translate_smax,
}, {
.name = "maxu",
- .translate = translate_minmax,
- .par = (const uint32_t[]){TCG_COND_GEU},
+ .translate = translate_umax,
}, {
.name = "memw",
.translate = translate_memw,
}, {
.name = "min",
- .translate = translate_minmax,
- .par = (const uint32_t[]){TCG_COND_LT},
+ .translate = translate_smin,
}, {
.name = "minu",
- .translate = translate_minmax,
- .par = (const uint32_t[]){TCG_COND_LTU},
+ .translate = translate_umin,
}, {
.name = "mov",
.translate = translate_mov,
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 04/10] tcg: Introduce atomic helpers for integer min/max
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (2 preceding siblings ...)
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 03/10] target/xtensa: " Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 17:37 ` Peter Maydell
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 05/10] tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add Richard Henderson
` (7 subsequent siblings)
11 siblings, 1 reply; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Given that this atomic operation will be used by both risc-v
and aarch64, let's not duplicate code across the two targets.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++
accel/tcg/tcg-runtime.h | 8 +++++
tcg/tcg-op.h | 34 ++++++++++++++++++
tcg/tcg.h | 8 +++++
tcg/tcg-op.c | 8 +++++
5 files changed, 129 insertions(+)
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index e022df4571..2489dd3ec1 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -25,18 +25,22 @@
#elif DATA_SIZE == 8
# define SUFFIX q
# define DATA_TYPE uint64_t
+# define SDATA_TYPE int64_t
# define BSWAP bswap64
#elif DATA_SIZE == 4
# define SUFFIX l
# define DATA_TYPE uint32_t
+# define SDATA_TYPE int32_t
# define BSWAP bswap32
#elif DATA_SIZE == 2
# define SUFFIX w
# define DATA_TYPE uint16_t
+# define SDATA_TYPE int16_t
# define BSWAP bswap16
#elif DATA_SIZE == 1
# define SUFFIX b
# define DATA_TYPE uint8_t
+# define SDATA_TYPE int8_t
# define BSWAP
#else
# error unsupported data size
@@ -118,6 +122,39 @@ GEN_ATOMIC_HELPER(or_fetch)
GEN_ATOMIC_HELPER(xor_fetch)
#undef GEN_ATOMIC_HELPER
+
+/* These helpers are, as a whole, full barriers. Within the helper,
+ * the leading barrier is explicit and the trailing barrier is within
+ * cmpxchg primitive.
+ */
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
+ ABI_TYPE xval EXTRA_ARGS) \
+{ \
+ ATOMIC_MMU_DECLS; \
+ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
+ XDATA_TYPE cmp, old, new, val = xval; \
+ smp_mb(); \
+ cmp = atomic_read__nocheck(haddr); \
+ do { \
+ old = cmp; new = FN(old, val); \
+ cmp = atomic_cmpxchg__nocheck(haddr, old, new); \
+ } while (cmp != old); \
+ ATOMIC_MMU_CLEANUP; \
+ return RET; \
+}
+
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old)
+
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
+
+#undef GEN_ATOMIC_HELPER_FN
#endif /* DATA SIZE >= 16 */
#undef END
@@ -233,6 +270,39 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
ldo = ldn;
}
}
+
+/* These helpers are, as a whole, full barriers. Within the helper,
+ * the leading barrier is explicit and the trailing barrier is within
+ * cmpxchg primitive.
+ */
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
+ ABI_TYPE xval EXTRA_ARGS) \
+{ \
+ ATOMIC_MMU_DECLS; \
+ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
+ XDATA_TYPE ldo, ldn, old, new, val = xval; \
+ smp_mb(); \
+ ldn = atomic_read__nocheck(haddr); \
+ do { \
+ ldo = ldn; old = BSWAP(ldo); new = FN(old, val); \
+ ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \
+ } while (ldo != ldn); \
+ ATOMIC_MMU_CLEANUP; \
+ return RET; \
+}
+
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old)
+
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
+
+#undef GEN_ATOMIC_HELPER_FN
#endif /* DATA_SIZE >= 16 */
#undef END
@@ -241,5 +311,6 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
#undef BSWAP
#undef ABI_TYPE
#undef DATA_TYPE
+#undef SDATA_TYPE
#undef SUFFIX
#undef DATA_SIZE
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 2536959a18..1bd39d136d 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -125,11 +125,19 @@ GEN_ATOMIC_HELPERS(fetch_add)
GEN_ATOMIC_HELPERS(fetch_and)
GEN_ATOMIC_HELPERS(fetch_or)
GEN_ATOMIC_HELPERS(fetch_xor)
+GEN_ATOMIC_HELPERS(fetch_smin)
+GEN_ATOMIC_HELPERS(fetch_umin)
+GEN_ATOMIC_HELPERS(fetch_smax)
+GEN_ATOMIC_HELPERS(fetch_umax)
GEN_ATOMIC_HELPERS(add_fetch)
GEN_ATOMIC_HELPERS(and_fetch)
GEN_ATOMIC_HELPERS(or_fetch)
GEN_ATOMIC_HELPERS(xor_fetch)
+GEN_ATOMIC_HELPERS(smin_fetch)
+GEN_ATOMIC_HELPERS(umin_fetch)
+GEN_ATOMIC_HELPERS(smax_fetch)
+GEN_ATOMIC_HELPERS(umax_fetch)
GEN_ATOMIC_HELPERS(xchg)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 0451e2752e..04eb3e9e17 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -898,6 +898,7 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+
void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
@@ -906,6 +907,15 @@ void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+
void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
@@ -914,6 +924,14 @@ void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
@@ -1043,10 +1061,18 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64
#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64
#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i64
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i64
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i64
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i64
#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i64
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i64
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64
#define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec
#else
#define tcg_gen_movi_tl tcg_gen_movi_i32
@@ -1145,10 +1171,18 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32
#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32
#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i32
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i32
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i32
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i32
#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i32
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i32
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32
#define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec
#endif
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 75fbad128b..1ca985479b 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -1415,12 +1415,20 @@ GEN_ATOMIC_HELPER_ALL(fetch_sub)
GEN_ATOMIC_HELPER_ALL(fetch_and)
GEN_ATOMIC_HELPER_ALL(fetch_or)
GEN_ATOMIC_HELPER_ALL(fetch_xor)
+GEN_ATOMIC_HELPER_ALL(fetch_smin)
+GEN_ATOMIC_HELPER_ALL(fetch_umin)
+GEN_ATOMIC_HELPER_ALL(fetch_smax)
+GEN_ATOMIC_HELPER_ALL(fetch_umax)
GEN_ATOMIC_HELPER_ALL(add_fetch)
GEN_ATOMIC_HELPER_ALL(sub_fetch)
GEN_ATOMIC_HELPER_ALL(and_fetch)
GEN_ATOMIC_HELPER_ALL(or_fetch)
GEN_ATOMIC_HELPER_ALL(xor_fetch)
+GEN_ATOMIC_HELPER_ALL(smin_fetch)
+GEN_ATOMIC_HELPER_ALL(umin_fetch)
+GEN_ATOMIC_HELPER_ALL(smax_fetch)
+GEN_ATOMIC_HELPER_ALL(umax_fetch)
GEN_ATOMIC_HELPER_ALL(xchg)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 5b82c3be8d..6a914654f5 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -3051,11 +3051,19 @@ GEN_ATOMIC_HELPER(fetch_add, add, 0)
GEN_ATOMIC_HELPER(fetch_and, and, 0)
GEN_ATOMIC_HELPER(fetch_or, or, 0)
GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
GEN_ATOMIC_HELPER(add_fetch, add, 1)
GEN_ATOMIC_HELPER(and_fetch, and, 1)
GEN_ATOMIC_HELPER(or_fetch, or, 1)
GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
{
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH v3 04/10] tcg: Introduce atomic helpers for integer min/max
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 04/10] tcg: Introduce atomic helpers for integer min/max Richard Henderson
@ 2018-05-08 17:37 ` Peter Maydell
2018-05-08 17:49 ` Peter Maydell
0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2018-05-08 17:37 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers
On 8 May 2018 at 16:14, Richard Henderson <richard.henderson@linaro.org> wrote:
> Given that this atomic operation will be used by both risc-v
> and aarch64, let's not duplicate code across the two targets.
>
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++
> accel/tcg/tcg-runtime.h | 8 +++++
> tcg/tcg-op.h | 34 ++++++++++++++++++
> tcg/tcg.h | 8 +++++
> tcg/tcg-op.c | 8 +++++
> 5 files changed, 129 insertions(+)
>
> diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
> index e022df4571..2489dd3ec1 100644
> --- a/accel/tcg/atomic_template.h
> +++ b/accel/tcg/atomic_template.h
> @@ -25,18 +25,22 @@
> #elif DATA_SIZE == 8
> # define SUFFIX q
> # define DATA_TYPE uint64_t
> +# define SDATA_TYPE int64_t
> # define BSWAP bswap64
> #elif DATA_SIZE == 4
> # define SUFFIX l
> # define DATA_TYPE uint32_t
> +# define SDATA_TYPE int32_t
> # define BSWAP bswap32
> #elif DATA_SIZE == 2
> # define SUFFIX w
> # define DATA_TYPE uint16_t
> +# define SDATA_TYPE int16_t
> # define BSWAP bswap16
> #elif DATA_SIZE == 1
> # define SUFFIX b
> # define DATA_TYPE uint8_t
> +# define SDATA_TYPE int8_t
> # define BSWAP
> #else
> # error unsupported data size
> @@ -118,6 +122,39 @@ GEN_ATOMIC_HELPER(or_fetch)
> GEN_ATOMIC_HELPER(xor_fetch)
>
> #undef GEN_ATOMIC_HELPER
> +
> +/* These helpers are, as a whole, full barriers. Within the helper,
> + * the leading barrier is explicit and the trailing barrier is within
> + * cmpxchg primitive.
> + */
> +#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
> +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
> + ABI_TYPE xval EXTRA_ARGS) \
> +{ \
> + ATOMIC_MMU_DECLS; \
> + XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
> + XDATA_TYPE cmp, old, new, val = xval; \
> + smp_mb(); \
> + cmp = atomic_read__nocheck(haddr); \
> + do { \
> + old = cmp; new = FN(old, val); \
> + cmp = atomic_cmpxchg__nocheck(haddr, old, new); \
> + } while (cmp != old); \
> + ATOMIC_MMU_CLEANUP; \
> + return RET; \
> +}
> +
> +GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
> +GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old)
> +GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
> +GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old)
> +
> +GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
> +GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
> +GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
> +GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
This fails to compile for me:
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:1062:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_fetch_sminb_mmu’:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:147:1:
error: value computed is not used [-Werror=unused-value]
GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
^
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:1062:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_fetch_smaxb_mmu’:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:149:1:
error: value computed is not used [-Werror=unused-value]
GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
^
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:1062:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_smin_fetchb_mmu’:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:152:1:
error: value computed is not used [-Werror=unused-value]
GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
^
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:1062:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_smax_fetchb_mmu’:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:154:1:
error: value computed is not used [-Werror=unused-value]
GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
^
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:1090:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_fetch_sminb’:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:147:1:
error: value computed is not used [-Werror=unused-value]
GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
^
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:1090:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_fetch_smaxb’:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:149:1:
error: value computed is not used [-Werror=unused-value]
GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
^
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:1090:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_smin_fetchb’:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:152:1:
error: value computed is not used [-Werror=unused-value]
GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
^
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:1090:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_smax_fetchb’:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:154:1:
error: value computed is not used [-Werror=unused-value]
GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
^
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
configure arguments
exec '../../configure'
'--target-list=arm-softmmu,aarch64-softmmu,arm-linux-user,aarch64-linux-user'
'--enable-debug' '--cc=ccache gcc' '--audio-drv-list=pa'
'--with-pkgversion=pm215' "$@"
For some reason, only the signed versions get warnings...
thanks
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH v3 04/10] tcg: Introduce atomic helpers for integer min/max
2018-05-08 17:37 ` Peter Maydell
@ 2018-05-08 17:49 ` Peter Maydell
2018-05-08 18:27 ` Peter Maydell
0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2018-05-08 17:49 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers
On 8 May 2018 at 18:37, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 8 May 2018 at 16:14, Richard Henderson <richard.henderson@linaro.org> wrote:
>> Given that this atomic operation will be used by both risc-v
>> and aarch64, let's not duplicate code across the two targets.
>>
>> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>> accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++
>> accel/tcg/tcg-runtime.h | 8 +++++
>> tcg/tcg-op.h | 34 ++++++++++++++++++
>> tcg/tcg.h | 8 +++++
>> tcg/tcg-op.c | 8 +++++
>> 5 files changed, 129 insertions(+)
>>
>> diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
>> index e022df4571..2489dd3ec1 100644
>> --- a/accel/tcg/atomic_template.h
>> +++ b/accel/tcg/atomic_template.h
>> @@ -25,18 +25,22 @@
>> #elif DATA_SIZE == 8
>> # define SUFFIX q
>> # define DATA_TYPE uint64_t
>> +# define SDATA_TYPE int64_t
>> # define BSWAP bswap64
>> #elif DATA_SIZE == 4
>> # define SUFFIX l
>> # define DATA_TYPE uint32_t
>> +# define SDATA_TYPE int32_t
>> # define BSWAP bswap32
>> #elif DATA_SIZE == 2
>> # define SUFFIX w
>> # define DATA_TYPE uint16_t
>> +# define SDATA_TYPE int16_t
>> # define BSWAP bswap16
>> #elif DATA_SIZE == 1
>> # define SUFFIX b
>> # define DATA_TYPE uint8_t
>> +# define SDATA_TYPE int8_t
>> # define BSWAP
>> #else
>> # error unsupported data size
>> @@ -118,6 +122,39 @@ GEN_ATOMIC_HELPER(or_fetch)
>> GEN_ATOMIC_HELPER(xor_fetch)
>>
>> #undef GEN_ATOMIC_HELPER
>> +
>> +/* These helpers are, as a whole, full barriers. Within the helper,
>> + * the leading barrier is explicit and the trailing barrier is within
>> + * cmpxchg primitive.
>> + */
>> +#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
>> +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
>> + ABI_TYPE xval EXTRA_ARGS) \
>> +{ \
>> + ATOMIC_MMU_DECLS; \
>> + XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
>> + XDATA_TYPE cmp, old, new, val = xval; \
>> + smp_mb(); \
>> + cmp = atomic_read__nocheck(haddr); \
>> + do { \
>> + old = cmp; new = FN(old, val); \
>> + cmp = atomic_cmpxchg__nocheck(haddr, old, new); \
>> + } while (cmp != old); \
>> + ATOMIC_MMU_CLEANUP; \
>> + return RET; \
>> +}
>> +
>> +GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
>> +GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old)
>> +GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
>> +GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old)
>> +
>> +GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
>> +GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
>> +GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
>> +GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
>
> This fails to compile for me:
Running it not via ccache gave me some more detail (??):
In file included from
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/cputlb.c:22:0:
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:
In function ‘helper_atomic_fetch_sminb_mmu’:
/home/petmay01/linaro/qemu-from-laptop/qemu/target/arm/cpu.h:36:29:
error: value computed is not used [-Werror=unused-value]
#define CPUArchState struct CPUARMState
^
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:131:25:
note: in expansion of macro ‘CPUArchState’
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
^
/home/petmay01/linaro/qemu-from-laptop/qemu/accel/tcg/atomic_template.h:147:1:
note: in expansion of macro ‘GEN_ATOMIC_HELPER_FN’
GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
^
though that doesn't make much sense either. I'll see if
I can get gcc to show me the macros-expanded code.
thanks
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH v3 04/10] tcg: Introduce atomic helpers for integer min/max
2018-05-08 17:49 ` Peter Maydell
@ 2018-05-08 18:27 ` Peter Maydell
2018-05-10 14:25 ` Peter Maydell
0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2018-05-08 18:27 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers
On 8 May 2018 at 18:49, Peter Maydell <peter.maydell@linaro.org> wrote:
> [weird compiler errors]
This fixes them:
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -187,7 +187,7 @@
/* Returns the eventual value, failed or not */
#define atomic_cmpxchg__nocheck(ptr, old, new) ({ \
typeof_strip_qual(*ptr) _old = (old); \
- __atomic_compare_exchange_n(ptr, &_old, new, false, \
+ (void)__atomic_compare_exchange_n(ptr, &_old, new, false, \
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \
_old; \
})
Seems pretty clearly a compiler bug -- rth says newer gcc don't
do this, so presumably fixed upstream somewhere between gcc 5 and 6.
thanks
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH v3 04/10] tcg: Introduce atomic helpers for integer min/max
2018-05-08 18:27 ` Peter Maydell
@ 2018-05-10 14:25 ` Peter Maydell
0 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2018-05-10 14:25 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers
On 8 May 2018 at 19:27, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 8 May 2018 at 18:49, Peter Maydell <peter.maydell@linaro.org> wrote:
>> [weird compiler errors]
>
> This fixes them:
>
> --- a/include/qemu/atomic.h
> +++ b/include/qemu/atomic.h
> @@ -187,7 +187,7 @@
> /* Returns the eventual value, failed or not */
> #define atomic_cmpxchg__nocheck(ptr, old, new) ({ \
> typeof_strip_qual(*ptr) _old = (old); \
> - __atomic_compare_exchange_n(ptr, &_old, new, false, \
> + (void)__atomic_compare_exchange_n(ptr, &_old, new, false, \
> __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \
> _old; \
> })
>
> Seems pretty clearly a compiler bug -- rth says newer gcc don't
> do this, so presumably fixed upstream somewhere between gcc 5 and 6.
Standalone testcase, fwiw:
===begin===
/*
* Weirdly, this compiler will complain about FOO(int8_t) but not
* the others:
* $ gcc -g -Wall -Wunused-value -o zz9.o -c zz9.c
* zz9.c: In function ‘foo_int8_t’:
* zz9.c:12:5: warning: value computed is not used [-Wunused-value]
* __atomic_compare_exchange_n(p, exp, des, 0, __ATOMIC_SEQ_CST,
__ATOMIC_SEQ_CST); \
* ^
* /tmp/zz9.c:17:1: note: in expansion of macro ‘FOO’
* FOO(int8_t)
* ^
*
* This is gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
*/
typedef unsigned char uint8_t;
typedef signed char int8_t;
#define FOO(TYPE) \
void foo_##TYPE(TYPE *p, TYPE *exp, TYPE *des) { \
__atomic_compare_exchange_n(p, exp, des, 0, __ATOMIC_SEQ_CST,
__ATOMIC_SEQ_CST); \
}
FOO(int)
FOO(uint8_t)
FOO(int8_t)
===endit===
thanks
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 05/10] tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (3 preceding siblings ...)
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 04/10] tcg: Introduce atomic helpers for integer min/max Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 06/10] target/riscv: Use new atomic min/max expanders Richard Henderson
` (6 subsequent siblings)
11 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/atomic_template.h | 49 ++++++-------------------------------
1 file changed, 7 insertions(+), 42 deletions(-)
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index 2489dd3ec1..3f41ef2782 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -229,48 +229,6 @@ GEN_ATOMIC_HELPER(xor_fetch)
#undef GEN_ATOMIC_HELPER
-/* Note that for addition, we need to use a separate cmpxchg loop instead
- of bswaps for the reverse-host-endian helpers. */
-ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
- ABI_TYPE val EXTRA_ARGS)
-{
- ATOMIC_MMU_DECLS;
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
- DATA_TYPE ldo, ldn, ret, sto;
-
- ldo = atomic_read__nocheck(haddr);
- while (1) {
- ret = BSWAP(ldo);
- sto = BSWAP(ret + val);
- ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
- if (ldn == ldo) {
- ATOMIC_MMU_CLEANUP;
- return ret;
- }
- ldo = ldn;
- }
-}
-
-ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
- ABI_TYPE val EXTRA_ARGS)
-{
- ATOMIC_MMU_DECLS;
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
- DATA_TYPE ldo, ldn, ret, sto;
-
- ldo = atomic_read__nocheck(haddr);
- while (1) {
- ret = BSWAP(ldo) + val;
- sto = BSWAP(ret);
- ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
- if (ldn == ldo) {
- ATOMIC_MMU_CLEANUP;
- return ret;
- }
- ldo = ldn;
- }
-}
-
/* These helpers are, as a whole, full barriers. Within the helper,
* the leading barrier is explicit and the trailing barrier is within
* cmpxchg primitive.
@@ -302,6 +260,13 @@ GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
+/* Note that for addition, we need to use a separate cmpxchg loop instead
+ of bswaps for the reverse-host-endian helpers. */
+#define ADD(X, Y) (X + Y)
+GEN_ATOMIC_HELPER_FN(fetch_add, ADD, DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
+#undef ADD
+
#undef GEN_ATOMIC_HELPER_FN
#endif /* DATA_SIZE >= 16 */
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 06/10] target/riscv: Use new atomic min/max expanders
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (4 preceding siblings ...)
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 05/10] tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode Richard Henderson
` (5 subsequent siblings)
11 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Reviewed-by: Michael Clark <mjc@sifive.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/riscv/translate.c | 72 +++++++++++-----------------------------
1 file changed, 20 insertions(+), 52 deletions(-)
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 808eab7f50..9cab717088 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -725,7 +725,6 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc,
TCGv src1, src2, dat;
TCGLabel *l1, *l2;
TCGMemOp mop;
- TCGCond cond;
bool aq, rl;
/* Extract the size of the atomic operation. */
@@ -823,60 +822,29 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc,
tcg_gen_atomic_fetch_or_tl(src2, src1, src2, ctx->mem_idx, mop);
gen_set_gpr(rd, src2);
break;
-
case OPC_RISC_AMOMIN:
- cond = TCG_COND_LT;
- goto do_minmax;
- case OPC_RISC_AMOMAX:
- cond = TCG_COND_GT;
- goto do_minmax;
- case OPC_RISC_AMOMINU:
- cond = TCG_COND_LTU;
- goto do_minmax;
- case OPC_RISC_AMOMAXU:
- cond = TCG_COND_GTU;
- goto do_minmax;
- do_minmax:
- /* Handle the RL barrier. The AQ barrier is handled along the
- parallel path by the SC atomic cmpxchg. On the serial path,
- of course, barriers do not matter. */
- if (rl) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- }
- if (tb_cflags(ctx->tb) & CF_PARALLEL) {
- l1 = gen_new_label();
- gen_set_label(l1);
- } else {
- l1 = NULL;
- }
-
gen_get_gpr(src1, rs1);
gen_get_gpr(src2, rs2);
- if ((mop & MO_SSIZE) == MO_SL) {
- /* Sign-extend the register comparison input. */
- tcg_gen_ext32s_tl(src2, src2);
- }
- dat = tcg_temp_local_new();
- tcg_gen_qemu_ld_tl(dat, src1, ctx->mem_idx, mop);
- tcg_gen_movcond_tl(cond, src2, dat, src2, dat, src2);
-
- if (tb_cflags(ctx->tb) & CF_PARALLEL) {
- /* Parallel context. Make this operation atomic by verifying
- that the memory didn't change while we computed the result. */
- tcg_gen_atomic_cmpxchg_tl(src2, src1, dat, src2, ctx->mem_idx, mop);
-
- /* If the cmpxchg failed, retry. */
- /* ??? There is an assumption here that this will eventually
- succeed, such that we don't live-lock. This is not unlike
- a similar loop that the compiler would generate for e.g.
- __atomic_fetch_and_xor, so don't worry about it. */
- tcg_gen_brcond_tl(TCG_COND_NE, dat, src2, l1);
- } else {
- /* Serial context. Directly store the result. */
- tcg_gen_qemu_st_tl(src2, src1, ctx->mem_idx, mop);
- }
- gen_set_gpr(rd, dat);
- tcg_temp_free(dat);
+ tcg_gen_atomic_fetch_smin_tl(src2, src1, src2, ctx->mem_idx, mop);
+ gen_set_gpr(rd, src2);
+ break;
+ case OPC_RISC_AMOMAX:
+ gen_get_gpr(src1, rs1);
+ gen_get_gpr(src2, rs2);
+ tcg_gen_atomic_fetch_smax_tl(src2, src1, src2, ctx->mem_idx, mop);
+ gen_set_gpr(rd, src2);
+ break;
+ case OPC_RISC_AMOMINU:
+ gen_get_gpr(src1, rs1);
+ gen_get_gpr(src2, rs2);
+ tcg_gen_atomic_fetch_umin_tl(src2, src1, src2, ctx->mem_idx, mop);
+ gen_set_gpr(rd, src2);
+ break;
+ case OPC_RISC_AMOMAXU:
+ gen_get_gpr(src1, rs1);
+ gen_get_gpr(src2, rs2);
+ tcg_gen_atomic_fetch_umax_tl(src2, src1, src2, ctx->mem_idx, mop);
+ gen_set_gpr(rd, src2);
break;
default:
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (5 preceding siblings ...)
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 06/10] target/riscv: Use new atomic min/max expanders Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 16:48 ` Peter Maydell
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 08/10] target/arm: Fill in disas_ldst_atomic Richard Henderson
` (4 subsequent siblings)
11 siblings, 1 reply; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
The insns in the ARMv8.1-Atomics are added to the existing
load/store exclusive and load/store reg opcode spaces.
Rearrange the top-level decoders for these to accomodate.
The Atomics insns themselves still generate Unallocated.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 2 +
linux-user/elfload.c | 1 +
target/arm/translate-a64.c | 182 +++++++++++++++++++++++++++----------
3 files changed, 139 insertions(+), 46 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 44e6b77151..f71a78d908 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1449,6 +1449,8 @@ enum arm_features {
ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_1,
+ ARM_FEATURE_V8_ATOMICS = ARM_FEATURE_V8_1, /* mandatory extension */
ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 36d52194bc..13bc78d0c8 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -581,6 +581,7 @@ static uint32_t get_elf_hwcap(void)
GET_FEATURE(ARM_FEATURE_V8_SHA512, ARM_HWCAP_A64_SHA512);
GET_FEATURE(ARM_FEATURE_V8_FP16,
ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP);
+ GET_FEATURE(ARM_FEATURE_V8_ATOMICS, ARM_HWCAP_A64_ATOMICS);
GET_FEATURE(ARM_FEATURE_V8_RDM, ARM_HWCAP_A64_ASIMDRDM);
GET_FEATURE(ARM_FEATURE_V8_FCMA, ARM_HWCAP_A64_FCMA);
#undef GET_FEATURE
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index d916fea3a3..6acad791e6 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2147,62 +2147,98 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
int rt = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int rt2 = extract32(insn, 10, 5);
- int is_lasr = extract32(insn, 15, 1);
int rs = extract32(insn, 16, 5);
- int is_pair = extract32(insn, 21, 1);
- int is_store = !extract32(insn, 22, 1);
- int is_excl = !extract32(insn, 23, 1);
+ int is_lasr = extract32(insn, 15, 1);
+ int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
int size = extract32(insn, 30, 2);
TCGv_i64 tcg_addr;
- if ((!is_excl && !is_pair && !is_lasr) ||
- (!is_excl && is_pair) ||
- (is_pair && size < 2)) {
- unallocated_encoding(s);
+ switch (o2_L_o1_o0) {
+ case 0x0: /* STXR */
+ case 0x1: /* STLXR */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ if (is_lasr) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
return;
- }
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
-
- /* Note that since TCG is single threaded load-acquire/store-release
- * semantics require no extra if (is_lasr) { ... } handling.
- */
-
- if (is_excl) {
- if (!is_store) {
- s->is_ldex = true;
- gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
- }
- } else {
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- }
- gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
+ case 0x4: /* LDXR */
+ case 0x5: /* LDAXR */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
}
- } else {
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
- bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ s->is_ldex = true;
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
+ if (is_lasr) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
+ return;
+ case 0x9: /* STLR */
/* Generate ISS for non-exclusive accesses including LASR. */
- if (is_store) {
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
+ return;
+
+ case 0xd: /* LDAR */
+ /* Generate ISS for non-exclusive accesses including LASR. */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ return;
+
+ case 0x2: case 0x3: /* CASP / STXP */
+ if (size & 2) { /* STXP / STLXP */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
- do_gpr_st(s, tcg_rt, tcg_addr, size,
- true, rt, iss_sf, is_lasr);
- } else {
- do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
- true, rt, iss_sf, is_lasr);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
+ return;
+ }
+ /* CASP / CASPL */
+ break;
+
+ case 0x6: case 0x7: /* CASP / LDXP */
+ if (size & 2) { /* LDXP / LDAXP */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ s->is_ldex = true;
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
+ return;
}
+ /* CASPA / CASPAL */
+ break;
+
+ case 0xa: /* CAS */
+ case 0xb: /* CASL */
+ case 0xe: /* CASA */
+ case 0xf: /* CASAL */
+ break;
}
+ unallocated_encoding(s);
}
/*
@@ -2715,6 +2751,55 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
}
}
+/* Atomic memory operations
+ *
+ * 31 30 27 26 24 22 21 16 15 12 10 5 0
+ * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
+ * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
+ * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
+ *
+ * Rt: the result register
+ * Rn: base address or SP
+ * Rs: the source register for the operation
+ * V: vector flag (always 0 as of v8.3)
+ * A: acquire flag
+ * R: release flag
+ */
+static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
+ int size, int rt, bool is_vector)
+{
+ int rs = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int o3_opc = extract32(insn, 12, 4);
+ int feature = ARM_FEATURE_V8_ATOMICS;
+
+ if (is_vector) {
+ unallocated_encoding(s);
+ return;
+ }
+ switch (o3_opc) {
+ case 000: /* LDADD */
+ case 001: /* LDCLR */
+ case 002: /* LDEOR */
+ case 003: /* LDSET */
+ case 004: /* LDSMAX */
+ case 005: /* LDSMIN */
+ case 006: /* LDUMAX */
+ case 007: /* LDUMIN */
+ case 010: /* SWP */
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ (void)rs;
+ (void)rn;
+}
+
/* Load/store register (all forms) */
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
{
@@ -2725,23 +2810,28 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
switch (extract32(insn, 24, 2)) {
case 0:
- if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
- disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
- } else {
+ if (extract32(insn, 21, 1) == 0) {
/* Load/store register (unscaled immediate)
* Load/store immediate pre/post-indexed
* Load/store register unprivileged
*/
disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
+ return;
+ }
+ switch (extract32(insn, 10, 2)) {
+ case 0:
+ disas_ldst_atomic(s, insn, size, rt, is_vector);
+ return;
+ case 2:
+ disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
+ return;
}
break;
case 1:
disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
- break;
- default:
- unallocated_encoding(s);
- break;
+ return;
}
+ unallocated_encoding(s);
}
/* AdvSIMD load/store multiple structures
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH v3 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode Richard Henderson
@ 2018-05-08 16:48 ` Peter Maydell
2018-05-08 17:31 ` Richard Henderson
0 siblings, 1 reply; 19+ messages in thread
From: Peter Maydell @ 2018-05-08 16:48 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers
On 8 May 2018 at 16:14, Richard Henderson <richard.henderson@linaro.org> wrote:
> The insns in the ARMv8.1-Atomics are added to the existing
> load/store exclusive and load/store reg opcode spaces.
> Rearrange the top-level decoders for these to accomodate.
> The Atomics insns themselves still generate Unallocated.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/arm/cpu.h | 2 +
> linux-user/elfload.c | 1 +
> target/arm/translate-a64.c | 182 +++++++++++++++++++++++++++----------
> 3 files changed, 139 insertions(+), 46 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 44e6b77151..f71a78d908 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -1449,6 +1449,8 @@ enum arm_features {
> ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
> ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
> ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
> + ARM_FEATURE_V8_1,
> + ARM_FEATURE_V8_ATOMICS = ARM_FEATURE_V8_1, /* mandatory extension */
This isn't the way we handle "this bit of the spec is mandatory
for architecture version X" for existing features. We have
a feature bit for the arch version X, and a feature bit for
the portion of the spec, and arm_cpu_realizefn() sets the
spec-portion feature bit if the version-X bit is set.
Doing it the way you have here means we can't model a
v8.0 CPU that also has the atomics extension but not the rest
of v8.1. (A v8.x implementation can include any arbitrary
subset of the v8.(x+1) features, so this is a legal combination.)
In fact we already have a mandatory-in-v8.1 feature:
ARM_FEATURE_V8_RDM, which we've just given its own feature bit.
I can apply this to target-arm.next with this change squashed in:
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1449,8 +1449,7 @@ enum arm_features {
ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
- ARM_FEATURE_V8_1,
- ARM_FEATURE_V8_ATOMICS = ARM_FEATURE_V8_1, /* mandatory extension */
+ ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
Is that OK?
We can have a separate patch which adds ARM_FEATURE_V8_1 and the
code in realizefn to do
if (arm_feature(env, ARM_FEATURE_V8_1)) {
set_feature(env, ARM_FEATURE_V8);
set_feature(env, ARM_FEATURE_V8_ATOMICS);
set_feature(env, ARM_FEATURE_V8_RDM);
}
thanks
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH v3 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode
2018-05-08 16:48 ` Peter Maydell
@ 2018-05-08 17:31 ` Richard Henderson
0 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 17:31 UTC (permalink / raw)
To: Peter Maydell; +Cc: QEMU Developers
On 05/08/2018 09:48 AM, Peter Maydell wrote:
> (A v8.x implementation can include any arbitrary
> subset of the v8.(x+1) features, so this is a legal combination.)
I didn't realize this was a possibility.
> In fact we already have a mandatory-in-v8.1 feature:
> ARM_FEATURE_V8_RDM, which we've just given its own feature bit.
Yes, I'd been planning to send follow-up patches to "tidy" this.
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -1449,8 +1449,7 @@ enum arm_features {
> ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
> ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
> ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
> - ARM_FEATURE_V8_1,
> - ARM_FEATURE_V8_ATOMICS = ARM_FEATURE_V8_1, /* mandatory extension */
> + ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
> ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
> ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
> ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
>
> Is that OK?
Yes, that's fine. Thanks.
> We can have a separate patch which adds ARM_FEATURE_V8_1 and the
> code in realizefn to do
> if (arm_feature(env, ARM_FEATURE_V8_1)) {
> set_feature(env, ARM_FEATURE_V8);
> set_feature(env, ARM_FEATURE_V8_ATOMICS);
> set_feature(env, ARM_FEATURE_V8_RDM);
> }
Yep.
r~
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 08/10] target/arm: Fill in disas_ldst_atomic
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (6 preceding siblings ...)
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 09/10] target/arm: Implement CAS and CASP Richard Henderson
` (3 subsequent siblings)
11 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
This implements all of the v8.1-Atomics instructions except
for compare-and-swap, which is decoded elsewhere.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++--
1 file changed, 36 insertions(+), 2 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 6acad791e6..c13858a690 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
/* Note that the gvec expanders operate on offsets + sizes. */
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
@@ -2772,6 +2773,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
int rn = extract32(insn, 5, 5);
int o3_opc = extract32(insn, 12, 4);
int feature = ARM_FEATURE_V8_ATOMICS;
+ TCGv_i64 tcg_rn, tcg_rs;
+ AtomicThreeOpFn *fn;
if (is_vector) {
unallocated_encoding(s);
@@ -2779,14 +2782,32 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
}
switch (o3_opc) {
case 000: /* LDADD */
+ fn = tcg_gen_atomic_fetch_add_i64;
+ break;
case 001: /* LDCLR */
+ fn = tcg_gen_atomic_fetch_and_i64;
+ break;
case 002: /* LDEOR */
+ fn = tcg_gen_atomic_fetch_xor_i64;
+ break;
case 003: /* LDSET */
+ fn = tcg_gen_atomic_fetch_or_i64;
+ break;
case 004: /* LDSMAX */
+ fn = tcg_gen_atomic_fetch_smax_i64;
+ break;
case 005: /* LDSMIN */
+ fn = tcg_gen_atomic_fetch_smin_i64;
+ break;
case 006: /* LDUMAX */
+ fn = tcg_gen_atomic_fetch_umax_i64;
+ break;
case 007: /* LDUMIN */
+ fn = tcg_gen_atomic_fetch_umin_i64;
+ break;
case 010: /* SWP */
+ fn = tcg_gen_atomic_xchg_i64;
+ break;
default:
unallocated_encoding(s);
return;
@@ -2796,8 +2817,21 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
return;
}
- (void)rs;
- (void)rn;
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_rn = cpu_reg_sp(s, rn);
+ tcg_rs = read_cpu_reg(s, rs, true);
+
+ if (o3_opc == 1) { /* LDCLR */
+ tcg_gen_not_i64(tcg_rs, tcg_rs);
+ }
+
+ /* The tcg atomic primitives are all full barriers. Therefore we
+ * can ignore the Acquire and Release bits of this instruction.
+ */
+ fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
+ s->be_data | size | MO_ALIGN);
}
/* Load/store register (all forms) */
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 09/10] target/arm: Implement CAS and CASP
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (7 preceding siblings ...)
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 08/10] target/arm: Fill in disas_ldst_atomic Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 10/10] target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only Richard Henderson
` (2 subsequent siblings)
11 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper-a64.h | 2 +
target/arm/helper-a64.c | 43 ++++++++++++++
target/arm/translate-a64.c | 119 ++++++++++++++++++++++++++++++++++++-
3 files changed, 161 insertions(+), 3 deletions(-)
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index ef4ddfe9d8..b8028ac98c 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -51,6 +51,8 @@ DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
i64, env, i64, i64, i64)
+DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
+DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index afb25ad20c..549ed3513e 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -636,6 +636,49 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
}
+/* Writes back the old data into Rs. */
+void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
+ uint64_t new_lo, uint64_t new_hi)
+{
+ uintptr_t ra = GETPC();
+#ifndef CONFIG_ATOMIC128
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+ Int128 oldv, cmpv, newv;
+
+ cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
+ newv = int128_make128(new_lo, new_hi);
+
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+ oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
+
+ env->xregs[rs] = int128_getlo(oldv);
+ env->xregs[rs + 1] = int128_gethi(oldv);
+#endif
+}
+
+void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
+ uint64_t new_hi, uint64_t new_lo)
+{
+ uintptr_t ra = GETPC();
+#ifndef CONFIG_ATOMIC128
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+ Int128 oldv, cmpv, newv;
+
+ cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
+ newv = int128_make128(new_lo, new_hi);
+
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+ oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+
+ env->xregs[rs + 1] = int128_getlo(oldv);
+ env->xregs[rs] = int128_gethi(oldv);
+#endif
+}
+
/*
* AdvSIMD half-precision
*/
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index c13858a690..e6cd88303f 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2114,6 +2114,103 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
}
+static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
+ int rn, int size)
+{
+ TCGv_i64 tcg_rs = cpu_reg(s, rs);
+ TCGv_i64 tcg_rt = cpu_reg(s, rt);
+ int memidx = get_mem_index(s);
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
+ size | MO_ALIGN | s->be_data);
+}
+
+static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
+ int rn, int size)
+{
+ TCGv_i64 s1 = cpu_reg(s, rs);
+ TCGv_i64 s2 = cpu_reg(s, rs + 1);
+ TCGv_i64 t1 = cpu_reg(s, rt);
+ TCGv_i64 t2 = cpu_reg(s, rt + 1);
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
+ int memidx = get_mem_index(s);
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ if (size == 2) {
+ TCGv_i64 cmp = tcg_temp_new_i64();
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat32_i64(val, t1, t2);
+ tcg_gen_concat32_i64(cmp, s1, s2);
+ } else {
+ tcg_gen_concat32_i64(val, t2, t1);
+ tcg_gen_concat32_i64(cmp, s2, s1);
+ }
+
+ tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
+ MO_64 | MO_ALIGN | s->be_data);
+ tcg_temp_free_i64(val);
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr32_i64(s1, s2, cmp);
+ } else {
+ tcg_gen_extr32_i64(s2, s1, cmp);
+ }
+ tcg_temp_free_i64(cmp);
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+ TCGv_i32 tcg_rs = tcg_const_i32(rs);
+
+ if (s->be_data == MO_LE) {
+ gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+ } else {
+ gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+ }
+ tcg_temp_free_i32(tcg_rs);
+ } else {
+ TCGv_i64 d1 = tcg_temp_new_i64();
+ TCGv_i64 d2 = tcg_temp_new_i64();
+ TCGv_i64 a2 = tcg_temp_new_i64();
+ TCGv_i64 c1 = tcg_temp_new_i64();
+ TCGv_i64 c2 = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_const_i64(0);
+
+ /* Load the two words, in memory order. */
+ tcg_gen_qemu_ld_i64(d1, addr, memidx,
+ MO_64 | MO_ALIGN_16 | s->be_data);
+ tcg_gen_addi_i64(a2, addr, 8);
+ tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
+
+ /* Compare the two words, also in memory order. */
+ tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
+ tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
+ tcg_gen_and_i64(c2, c2, c1);
+
+ /* If compare equal, write back new data, else write back old data. */
+ tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
+ tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
+ tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
+ tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
+ tcg_temp_free_i64(a2);
+ tcg_temp_free_i64(c1);
+ tcg_temp_free_i64(c2);
+ tcg_temp_free_i64(zero);
+
+ /* Write back the data from memory to Rs. */
+ tcg_gen_mov_i64(s1, d1);
+ tcg_gen_mov_i64(s2, d2);
+ tcg_temp_free_i64(d1);
+ tcg_temp_free_i64(d2);
+ }
+}
+
/* Update the Sixty-Four bit (SF) registersize. This logic is derived
* from the ARMv8 specs for LDR (Shared decode for all encodings).
*/
@@ -2214,10 +2311,16 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
return;
}
- /* CASP / CASPL */
+ if (rt2 == 31
+ && ((rt | rs) & 1) == 0
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ /* CASP / CASPL */
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
+ return;
+ }
break;
- case 0x6: case 0x7: /* CASP / LDXP */
+ case 0x6: case 0x7: /* CASPA / LDXP */
if (size & 2) { /* LDXP / LDAXP */
if (rn == 31) {
gen_check_sp_alignment(s);
@@ -2230,13 +2333,23 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
}
return;
}
- /* CASPA / CASPAL */
+ if (rt2 == 31
+ && ((rt | rs) & 1) == 0
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ /* CASPA / CASPAL */
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
+ return;
+ }
break;
case 0xa: /* CAS */
case 0xb: /* CASL */
case 0xe: /* CASA */
case 0xf: /* CASAL */
+ if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ gen_compare_and_swap(s, rs, rt, rn, size);
+ return;
+ }
break;
}
unallocated_encoding(s);
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH v3 10/10] target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (8 preceding siblings ...)
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 09/10] target/arm: Implement CAS and CASP Richard Henderson
@ 2018-05-08 15:14 ` Richard Henderson
2018-05-08 15:54 ` [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics no-reply
2018-05-10 15:03 ` Peter Maydell
11 siblings, 0 replies; 19+ messages in thread
From: Richard Henderson @ 2018-05-08 15:14 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu64.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 991d764674..c50dcd4077 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -248,6 +248,7 @@ static void aarch64_max_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8_SM4);
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
+ set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
--
2.17.0
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (9 preceding siblings ...)
2018-05-08 15:14 ` [Qemu-devel] [PATCH v3 10/10] target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only Richard Henderson
@ 2018-05-08 15:54 ` no-reply
2018-05-10 15:03 ` Peter Maydell
11 siblings, 0 replies; 19+ messages in thread
From: no-reply @ 2018-05-08 15:54 UTC (permalink / raw)
To: richard.henderson; +Cc: famz, qemu-devel, peter.maydell
Hi,
This series seems to have some coding style problems. See output below for
more information:
Type: series
Message-id: 20180508151437.4232-1-richard.henderson@linaro.org
Subject: [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics
=== TEST SCRIPT BEGIN ===
#!/bin/bash
BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done
exit $failed
=== TEST SCRIPT END ===
Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
t [tag update] patchew/20180503225648.13153-1-jsnow@redhat.com -> patchew/20180503225648.13153-1-jsnow@redhat.com
Switched to a new branch 'test'
03c4e7c45f target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only
bfd47ba4bc target/arm: Implement CAS and CASP
92484463d0 target/arm: Fill in disas_ldst_atomic
13b7933509 target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode
97dea408aa target/riscv: Use new atomic min/max expanders
72116c58cb tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add
cb24fc8fed tcg: Introduce atomic helpers for integer min/max
255505b55f target/xtensa: Use new min/max expanders
f82f53c47a target/arm: Use new min/max expanders
5d8965c70b tcg: Introduce helpers for integer min/max
=== OUTPUT BEGIN ===
Checking PATCH 1/10: tcg: Introduce helpers for integer min/max...
Checking PATCH 2/10: target/arm: Use new min/max expanders...
Checking PATCH 3/10: target/xtensa: Use new min/max expanders...
Checking PATCH 4/10: tcg: Introduce atomic helpers for integer min/max...
ERROR: memory barrier without comment
#56: FILE: accel/tcg/atomic_template.h:137:
+ smp_mb(); \
ERROR: memory barrier without comment
#96: FILE: accel/tcg/atomic_template.h:285:
+ smp_mb(); \
total: 2 errors, 0 warnings, 236 lines checked
Your patch has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 5/10: tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add...
Checking PATCH 6/10: target/riscv: Use new atomic min/max expanders...
Checking PATCH 7/10: target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode...
Checking PATCH 8/10: target/arm: Fill in disas_ldst_atomic...
Checking PATCH 9/10: target/arm: Implement CAS and CASP...
Checking PATCH 10/10: target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only...
=== OUTPUT END ===
Test command exited with code: 1
---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-devel@redhat.com
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics
2018-05-08 15:14 [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (10 preceding siblings ...)
2018-05-08 15:54 ` [Qemu-devel] [PATCH v3 00/10] target/arm: Implement v8.1-Atomics no-reply
@ 2018-05-10 15:03 ` Peter Maydell
11 siblings, 0 replies; 19+ messages in thread
From: Peter Maydell @ 2018-05-10 15:03 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers
On 8 May 2018 at 16:14, Richard Henderson <richard.henderson@linaro.org> wrote:
> This implements the Atomics extension, which is mandatory for v8.1.
> While testing the v8.2-SVE extension, I've run into issues with the
> GCC testsuite expecting this to exist.
>
> Missing is the wiring up of the system registers to indicate that
> the extension exists, but we have no system CPU model that would
> exercise such a setting.
>
> Changes since v3:
> * Patch 8: Do not zero-extend X[s] via the third parameter
> to read_cpu_reg.
>
> Changes since v2:
> * New patch to use a helper macro for opposite-endian
> atomic_fetch_add and atomic_add_fetch, as suggested
> by pm215.
> * Introduce ARM_FEATURE_V8_1 and define ARM_FEATURE_V8_ATOMICS
> in terms of that, reinforcing the mandatory nature of
> the extension.
> * Typo fix in patch 8
Applied to target-arm.next (with the atomics.h workaround first
and the removal of ARM_FEATURE_V8_1), thanks.
-- PMM
^ permalink raw reply [flat|nested] 19+ messages in thread