* [Qemu-devel] [PATCH v2 01/10] tcg: Introduce helpers for integer min/max
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 02/10] target/arm: Use new min/max expanders Richard Henderson
` (10 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
These operations are re-invented by several targets so far.
Several supported hosts have insns for these, so place the
expanders out-of-line for a future introduction of tcg opcodes.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tcg-op.h | 16 ++++++++++++++++
tcg/tcg-op.c | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 5d2c91a1b6..0451e2752e 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -324,6 +324,10 @@ void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg);
void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
static inline void tcg_gen_discard_i32(TCGv_i32 arg)
{
@@ -517,6 +521,10 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg);
void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg);
void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
#if TCG_TARGET_REG_BITS == 64
static inline void tcg_gen_discard_i64(TCGv_i64 arg)
@@ -1025,6 +1033,10 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
#define tcg_gen_muls2_tl tcg_gen_muls2_i64
#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64
+#define tcg_gen_smin_tl tcg_gen_smin_i64
+#define tcg_gen_umin_tl tcg_gen_umin_i64
+#define tcg_gen_smax_tl tcg_gen_smax_i64
+#define tcg_gen_umax_tl tcg_gen_umax_i64
#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64
#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64
#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64
@@ -1123,6 +1135,10 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
#define tcg_gen_muls2_tl tcg_gen_muls2_i32
#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32
+#define tcg_gen_smin_tl tcg_gen_smin_i32
+#define tcg_gen_umin_tl tcg_gen_umin_i32
+#define tcg_gen_smax_tl tcg_gen_smax_i32
+#define tcg_gen_umax_tl tcg_gen_umax_i32
#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32
#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32
#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 34b96d68f3..5b82c3be8d 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1033,6 +1033,26 @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
}
}
+void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
+}
+
+void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
+}
+
+void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
+}
+
+void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
+}
+
/* 64-bit ops */
#if TCG_TARGET_REG_BITS == 32
@@ -2438,6 +2458,26 @@ void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_temp_free_i64(t2);
}
+void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
+}
+
+void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
+}
+
+void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
+}
+
+void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
+}
+
/* Size changing operations. */
void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 02/10] target/arm: Use new min/max expanders
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 01/10] tcg: Introduce helpers for integer min/max Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 03/10] target/xtensa: " Richard Henderson
` (9 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
The generic expanders replace nearly identical code in the translator.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 46 ++++++++++++++--------------------------------
1 file changed, 14 insertions(+), 32 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index bff4e13bf6..d916fea3a3 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -6021,15 +6021,18 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
break;
case 0x0a: /* SMAXV / UMAXV */
- tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
- tcg_res,
- tcg_res, tcg_elt, tcg_res, tcg_elt);
+ if (is_u) {
+ tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
+ } else {
+ tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
+ }
break;
case 0x1a: /* SMINV / UMINV */
- tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
- tcg_res,
- tcg_res, tcg_elt, tcg_res, tcg_elt);
- break;
+ if (is_u) {
+ tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
+ } else {
+ tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
+ }
break;
default:
g_assert_not_reached();
@@ -9931,27 +9934,6 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
}
}
-/* Helper functions for 32 bit comparisons */
-static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
-}
-
-static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
-}
-
-static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
-}
-
-static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
-}
-
/* Pairwise op subgroup of C3.6.16.
*
* This is called directly or via the handle_3same_float for float pairwise
@@ -10051,7 +10033,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
{ gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
- { gen_max_s32, gen_max_u32 },
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
};
genfn = fns[size][u];
break;
@@ -10061,7 +10043,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
{ gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
- { gen_min_s32, gen_min_u32 },
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
};
genfn = fns[size][u];
break;
@@ -10516,7 +10498,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
{ gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
- { gen_max_s32, gen_max_u32 },
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
};
genfn = fns[size][u];
break;
@@ -10527,7 +10509,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
{ gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
- { gen_min_s32, gen_min_u32 },
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
};
genfn = fns[size][u];
break;
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 03/10] target/xtensa: Use new min/max expanders
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 01/10] tcg: Introduce helpers for integer min/max Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 02/10] target/arm: Use new min/max expanders Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 04/10] tcg: Introduce atomic helpers for integer min/max Richard Henderson
` (8 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
The generic expanders replace nearly identical code in the translator.
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/xtensa/translate.c | 50 +++++++++++++++++++++++++++++++----------------
1 file changed, 33 insertions(+), 17 deletions(-)
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index 4f6d03059f..bad5cdb009 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -1527,10 +1527,8 @@ static void translate_clamps(DisasContext *dc, const uint32_t arg[],
TCGv_i32 tmp1 = tcg_const_i32(-1u << arg[2]);
TCGv_i32 tmp2 = tcg_const_i32((1 << arg[2]) - 1);
- tcg_gen_movcond_i32(TCG_COND_GT, tmp1,
- cpu_R[arg[1]], tmp1, cpu_R[arg[1]], tmp1);
- tcg_gen_movcond_i32(TCG_COND_LT, cpu_R[arg[0]],
- tmp1, tmp2, tmp1, tmp2);
+ tcg_gen_smax_i32(tmp1, tmp1, cpu_R[arg[1]]);
+ tcg_gen_smin_i32(cpu_R[arg[0]], tmp1, tmp2);
tcg_temp_free(tmp1);
tcg_temp_free(tmp2);
}
@@ -1855,13 +1853,35 @@ static void translate_memw(DisasContext *dc, const uint32_t arg[],
tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
}
-static void translate_minmax(DisasContext *dc, const uint32_t arg[],
- const uint32_t par[])
+static void translate_smin(DisasContext *dc, const uint32_t arg[],
+ const uint32_t par[])
{
if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
- tcg_gen_movcond_i32(par[0], cpu_R[arg[0]],
- cpu_R[arg[1]], cpu_R[arg[2]],
- cpu_R[arg[1]], cpu_R[arg[2]]);
+ tcg_gen_smin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
+ }
+}
+
+static void translate_umin(DisasContext *dc, const uint32_t arg[],
+ const uint32_t par[])
+{
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
+ tcg_gen_umin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
+ }
+}
+
+static void translate_smax(DisasContext *dc, const uint32_t arg[],
+ const uint32_t par[])
+{
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
+ tcg_gen_smax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
+ }
+}
+
+static void translate_umax(DisasContext *dc, const uint32_t arg[],
+ const uint32_t par[])
+{
+ if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
+ tcg_gen_umax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
}
}
@@ -2984,23 +3004,19 @@ static const XtensaOpcodeOps core_ops[] = {
.par = (const uint32_t[]){TCG_COND_NE},
}, {
.name = "max",
- .translate = translate_minmax,
- .par = (const uint32_t[]){TCG_COND_GE},
+ .translate = translate_smax,
}, {
.name = "maxu",
- .translate = translate_minmax,
- .par = (const uint32_t[]){TCG_COND_GEU},
+ .translate = translate_umax,
}, {
.name = "memw",
.translate = translate_memw,
}, {
.name = "min",
- .translate = translate_minmax,
- .par = (const uint32_t[]){TCG_COND_LT},
+ .translate = translate_smin,
}, {
.name = "minu",
- .translate = translate_minmax,
- .par = (const uint32_t[]){TCG_COND_LTU},
+ .translate = translate_umin,
}, {
.name = "mov",
.translate = translate_mov,
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 04/10] tcg: Introduce atomic helpers for integer min/max
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (2 preceding siblings ...)
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 03/10] target/xtensa: " Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 05/10] tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add Richard Henderson
` (7 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
Given that this atomic operation will be used by both risc-v
and aarch64, let's not duplicate code across the two targets.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/atomic_template.h | 71 +++++++++++++++++++++++++++++++++++++++++++++
accel/tcg/tcg-runtime.h | 8 +++++
tcg/tcg-op.h | 34 ++++++++++++++++++++++
tcg/tcg.h | 8 +++++
tcg/tcg-op.c | 8 +++++
5 files changed, 129 insertions(+)
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index e022df4571..2489dd3ec1 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -25,18 +25,22 @@
#elif DATA_SIZE == 8
# define SUFFIX q
# define DATA_TYPE uint64_t
+# define SDATA_TYPE int64_t
# define BSWAP bswap64
#elif DATA_SIZE == 4
# define SUFFIX l
# define DATA_TYPE uint32_t
+# define SDATA_TYPE int32_t
# define BSWAP bswap32
#elif DATA_SIZE == 2
# define SUFFIX w
# define DATA_TYPE uint16_t
+# define SDATA_TYPE int16_t
# define BSWAP bswap16
#elif DATA_SIZE == 1
# define SUFFIX b
# define DATA_TYPE uint8_t
+# define SDATA_TYPE int8_t
# define BSWAP
#else
# error unsupported data size
@@ -118,6 +122,39 @@ GEN_ATOMIC_HELPER(or_fetch)
GEN_ATOMIC_HELPER(xor_fetch)
#undef GEN_ATOMIC_HELPER
+
+/* These helpers are, as a whole, full barriers. Within the helper,
+ * the leading barrier is explicit and the trailing barrier is within
+ * cmpxchg primitive.
+ */
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
+ ABI_TYPE xval EXTRA_ARGS) \
+{ \
+ ATOMIC_MMU_DECLS; \
+ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
+ XDATA_TYPE cmp, old, new, val = xval; \
+ smp_mb(); \
+ cmp = atomic_read__nocheck(haddr); \
+ do { \
+ old = cmp; new = FN(old, val); \
+ cmp = atomic_cmpxchg__nocheck(haddr, old, new); \
+ } while (cmp != old); \
+ ATOMIC_MMU_CLEANUP; \
+ return RET; \
+}
+
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old)
+
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
+
+#undef GEN_ATOMIC_HELPER_FN
#endif /* DATA SIZE >= 16 */
#undef END
@@ -233,6 +270,39 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
ldo = ldn;
}
}
+
+/* These helpers are, as a whole, full barriers. Within the helper,
+ * the leading barrier is explicit and the trailing barrier is within
+ * cmpxchg primitive.
+ */
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
+ ABI_TYPE xval EXTRA_ARGS) \
+{ \
+ ATOMIC_MMU_DECLS; \
+ XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
+ XDATA_TYPE ldo, ldn, old, new, val = xval; \
+ smp_mb(); \
+ ldn = atomic_read__nocheck(haddr); \
+ do { \
+ ldo = ldn; old = BSWAP(ldo); new = FN(old, val); \
+ ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \
+ } while (ldo != ldn); \
+ ATOMIC_MMU_CLEANUP; \
+ return RET; \
+}
+
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old)
+
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
+
+#undef GEN_ATOMIC_HELPER_FN
#endif /* DATA_SIZE >= 16 */
#undef END
@@ -241,5 +311,6 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
#undef BSWAP
#undef ABI_TYPE
#undef DATA_TYPE
+#undef SDATA_TYPE
#undef SUFFIX
#undef DATA_SIZE
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 2536959a18..1bd39d136d 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -125,11 +125,19 @@ GEN_ATOMIC_HELPERS(fetch_add)
GEN_ATOMIC_HELPERS(fetch_and)
GEN_ATOMIC_HELPERS(fetch_or)
GEN_ATOMIC_HELPERS(fetch_xor)
+GEN_ATOMIC_HELPERS(fetch_smin)
+GEN_ATOMIC_HELPERS(fetch_umin)
+GEN_ATOMIC_HELPERS(fetch_smax)
+GEN_ATOMIC_HELPERS(fetch_umax)
GEN_ATOMIC_HELPERS(add_fetch)
GEN_ATOMIC_HELPERS(and_fetch)
GEN_ATOMIC_HELPERS(or_fetch)
GEN_ATOMIC_HELPERS(xor_fetch)
+GEN_ATOMIC_HELPERS(smin_fetch)
+GEN_ATOMIC_HELPERS(umin_fetch)
+GEN_ATOMIC_HELPERS(smax_fetch)
+GEN_ATOMIC_HELPERS(umax_fetch)
GEN_ATOMIC_HELPERS(xchg)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 0451e2752e..04eb3e9e17 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -898,6 +898,7 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+
void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
@@ -906,6 +907,15 @@ void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+
void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
@@ -914,6 +924,14 @@ void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
@@ -1043,10 +1061,18 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64
#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64
#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i64
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i64
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i64
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i64
#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i64
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i64
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64
#define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec
#else
#define tcg_gen_movi_tl tcg_gen_movi_i32
@@ -1145,10 +1171,18 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32
#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32
#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i32
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i32
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i32
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i32
#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i32
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i32
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32
#define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec
#endif
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 75fbad128b..1ca985479b 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -1415,12 +1415,20 @@ GEN_ATOMIC_HELPER_ALL(fetch_sub)
GEN_ATOMIC_HELPER_ALL(fetch_and)
GEN_ATOMIC_HELPER_ALL(fetch_or)
GEN_ATOMIC_HELPER_ALL(fetch_xor)
+GEN_ATOMIC_HELPER_ALL(fetch_smin)
+GEN_ATOMIC_HELPER_ALL(fetch_umin)
+GEN_ATOMIC_HELPER_ALL(fetch_smax)
+GEN_ATOMIC_HELPER_ALL(fetch_umax)
GEN_ATOMIC_HELPER_ALL(add_fetch)
GEN_ATOMIC_HELPER_ALL(sub_fetch)
GEN_ATOMIC_HELPER_ALL(and_fetch)
GEN_ATOMIC_HELPER_ALL(or_fetch)
GEN_ATOMIC_HELPER_ALL(xor_fetch)
+GEN_ATOMIC_HELPER_ALL(smin_fetch)
+GEN_ATOMIC_HELPER_ALL(umin_fetch)
+GEN_ATOMIC_HELPER_ALL(smax_fetch)
+GEN_ATOMIC_HELPER_ALL(umax_fetch)
GEN_ATOMIC_HELPER_ALL(xchg)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 5b82c3be8d..6a914654f5 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -3051,11 +3051,19 @@ GEN_ATOMIC_HELPER(fetch_add, add, 0)
GEN_ATOMIC_HELPER(fetch_and, and, 0)
GEN_ATOMIC_HELPER(fetch_or, or, 0)
GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
GEN_ATOMIC_HELPER(add_fetch, add, 1)
GEN_ATOMIC_HELPER(and_fetch, and, 1)
GEN_ATOMIC_HELPER(or_fetch, or, 1)
GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
{
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 05/10] tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (3 preceding siblings ...)
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 04/10] tcg: Introduce atomic helpers for integer min/max Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 06/10] target/riscv: Use new atomic min/max expanders Richard Henderson
` (6 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/atomic_template.h | 49 +++++++--------------------------------------
1 file changed, 7 insertions(+), 42 deletions(-)
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index 2489dd3ec1..3f41ef2782 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -229,48 +229,6 @@ GEN_ATOMIC_HELPER(xor_fetch)
#undef GEN_ATOMIC_HELPER
-/* Note that for addition, we need to use a separate cmpxchg loop instead
- of bswaps for the reverse-host-endian helpers. */
-ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
- ABI_TYPE val EXTRA_ARGS)
-{
- ATOMIC_MMU_DECLS;
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
- DATA_TYPE ldo, ldn, ret, sto;
-
- ldo = atomic_read__nocheck(haddr);
- while (1) {
- ret = BSWAP(ldo);
- sto = BSWAP(ret + val);
- ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
- if (ldn == ldo) {
- ATOMIC_MMU_CLEANUP;
- return ret;
- }
- ldo = ldn;
- }
-}
-
-ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
- ABI_TYPE val EXTRA_ARGS)
-{
- ATOMIC_MMU_DECLS;
- DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
- DATA_TYPE ldo, ldn, ret, sto;
-
- ldo = atomic_read__nocheck(haddr);
- while (1) {
- ret = BSWAP(ldo) + val;
- sto = BSWAP(ret);
- ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
- if (ldn == ldo) {
- ATOMIC_MMU_CLEANUP;
- return ret;
- }
- ldo = ldn;
- }
-}
-
/* These helpers are, as a whole, full barriers. Within the helper,
* the leading barrier is explicit and the trailing barrier is within
* cmpxchg primitive.
@@ -302,6 +260,13 @@ GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new)
GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
+/* Note that for addition, we need to use a separate cmpxchg loop instead
+ of bswaps for the reverse-host-endian helpers. */
+#define ADD(X, Y) (X + Y)
+GEN_ATOMIC_HELPER_FN(fetch_add, ADD, DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
+#undef ADD
+
#undef GEN_ATOMIC_HELPER_FN
#endif /* DATA_SIZE >= 16 */
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 06/10] target/riscv: Use new atomic min/max expanders
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (4 preceding siblings ...)
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 05/10] tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode Richard Henderson
` (5 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
Reviewed-by: Michael Clark <mjc@sifive.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/riscv/translate.c | 72 ++++++++++++++----------------------------------
1 file changed, 20 insertions(+), 52 deletions(-)
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 808eab7f50..9cab717088 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -725,7 +725,6 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc,
TCGv src1, src2, dat;
TCGLabel *l1, *l2;
TCGMemOp mop;
- TCGCond cond;
bool aq, rl;
/* Extract the size of the atomic operation. */
@@ -823,60 +822,29 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc,
tcg_gen_atomic_fetch_or_tl(src2, src1, src2, ctx->mem_idx, mop);
gen_set_gpr(rd, src2);
break;
-
case OPC_RISC_AMOMIN:
- cond = TCG_COND_LT;
- goto do_minmax;
- case OPC_RISC_AMOMAX:
- cond = TCG_COND_GT;
- goto do_minmax;
- case OPC_RISC_AMOMINU:
- cond = TCG_COND_LTU;
- goto do_minmax;
- case OPC_RISC_AMOMAXU:
- cond = TCG_COND_GTU;
- goto do_minmax;
- do_minmax:
- /* Handle the RL barrier. The AQ barrier is handled along the
- parallel path by the SC atomic cmpxchg. On the serial path,
- of course, barriers do not matter. */
- if (rl) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- }
- if (tb_cflags(ctx->tb) & CF_PARALLEL) {
- l1 = gen_new_label();
- gen_set_label(l1);
- } else {
- l1 = NULL;
- }
-
gen_get_gpr(src1, rs1);
gen_get_gpr(src2, rs2);
- if ((mop & MO_SSIZE) == MO_SL) {
- /* Sign-extend the register comparison input. */
- tcg_gen_ext32s_tl(src2, src2);
- }
- dat = tcg_temp_local_new();
- tcg_gen_qemu_ld_tl(dat, src1, ctx->mem_idx, mop);
- tcg_gen_movcond_tl(cond, src2, dat, src2, dat, src2);
-
- if (tb_cflags(ctx->tb) & CF_PARALLEL) {
- /* Parallel context. Make this operation atomic by verifying
- that the memory didn't change while we computed the result. */
- tcg_gen_atomic_cmpxchg_tl(src2, src1, dat, src2, ctx->mem_idx, mop);
-
- /* If the cmpxchg failed, retry. */
- /* ??? There is an assumption here that this will eventually
- succeed, such that we don't live-lock. This is not unlike
- a similar loop that the compiler would generate for e.g.
- __atomic_fetch_and_xor, so don't worry about it. */
- tcg_gen_brcond_tl(TCG_COND_NE, dat, src2, l1);
- } else {
- /* Serial context. Directly store the result. */
- tcg_gen_qemu_st_tl(src2, src1, ctx->mem_idx, mop);
- }
- gen_set_gpr(rd, dat);
- tcg_temp_free(dat);
+ tcg_gen_atomic_fetch_smin_tl(src2, src1, src2, ctx->mem_idx, mop);
+ gen_set_gpr(rd, src2);
+ break;
+ case OPC_RISC_AMOMAX:
+ gen_get_gpr(src1, rs1);
+ gen_get_gpr(src2, rs2);
+ tcg_gen_atomic_fetch_smax_tl(src2, src1, src2, ctx->mem_idx, mop);
+ gen_set_gpr(rd, src2);
+ break;
+ case OPC_RISC_AMOMINU:
+ gen_get_gpr(src1, rs1);
+ gen_get_gpr(src2, rs2);
+ tcg_gen_atomic_fetch_umin_tl(src2, src1, src2, ctx->mem_idx, mop);
+ gen_set_gpr(rd, src2);
+ break;
+ case OPC_RISC_AMOMAXU:
+ gen_get_gpr(src1, rs1);
+ gen_get_gpr(src2, rs2);
+ tcg_gen_atomic_fetch_umax_tl(src2, src1, src2, ctx->mem_idx, mop);
+ gen_set_gpr(rd, src2);
break;
default:
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (5 preceding siblings ...)
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 06/10] target/riscv: Use new atomic min/max expanders Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 08/10] target/arm: Fill in disas_ldst_atomic Richard Henderson
` (4 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
The insns in the ARMv8.1-Atomics are added to the existing
load/store exclusive and load/store reg opcode spaces.
Rearrange the top-level decoders for these to accomodate.
The Atomics insns themselves still generate Unallocated.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 2 +
linux-user/elfload.c | 1 +
target/arm/translate-a64.c | 182 +++++++++++++++++++++++++++++++++------------
3 files changed, 139 insertions(+), 46 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 44e6b77151..f71a78d908 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1449,6 +1449,8 @@ enum arm_features {
ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_1,
+ ARM_FEATURE_V8_ATOMICS = ARM_FEATURE_V8_1, /* mandatory extension */
ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 36d52194bc..13bc78d0c8 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -581,6 +581,7 @@ static uint32_t get_elf_hwcap(void)
GET_FEATURE(ARM_FEATURE_V8_SHA512, ARM_HWCAP_A64_SHA512);
GET_FEATURE(ARM_FEATURE_V8_FP16,
ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP);
+ GET_FEATURE(ARM_FEATURE_V8_ATOMICS, ARM_HWCAP_A64_ATOMICS);
GET_FEATURE(ARM_FEATURE_V8_RDM, ARM_HWCAP_A64_ASIMDRDM);
GET_FEATURE(ARM_FEATURE_V8_FCMA, ARM_HWCAP_A64_FCMA);
#undef GET_FEATURE
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index d916fea3a3..6acad791e6 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2147,62 +2147,98 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
int rt = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int rt2 = extract32(insn, 10, 5);
- int is_lasr = extract32(insn, 15, 1);
int rs = extract32(insn, 16, 5);
- int is_pair = extract32(insn, 21, 1);
- int is_store = !extract32(insn, 22, 1);
- int is_excl = !extract32(insn, 23, 1);
+ int is_lasr = extract32(insn, 15, 1);
+ int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
int size = extract32(insn, 30, 2);
TCGv_i64 tcg_addr;
- if ((!is_excl && !is_pair && !is_lasr) ||
- (!is_excl && is_pair) ||
- (is_pair && size < 2)) {
- unallocated_encoding(s);
+ switch (o2_L_o1_o0) {
+ case 0x0: /* STXR */
+ case 0x1: /* STLXR */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ if (is_lasr) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
return;
- }
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
-
- /* Note that since TCG is single threaded load-acquire/store-release
- * semantics require no extra if (is_lasr) { ... } handling.
- */
-
- if (is_excl) {
- if (!is_store) {
- s->is_ldex = true;
- gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
- }
- } else {
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- }
- gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
+ case 0x4: /* LDXR */
+ case 0x5: /* LDAXR */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
}
- } else {
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
- bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ s->is_ldex = true;
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
+ if (is_lasr) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
+ return;
+ case 0x9: /* STLR */
/* Generate ISS for non-exclusive accesses including LASR. */
- if (is_store) {
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
+ return;
+
+ case 0xd: /* LDAR */
+ /* Generate ISS for non-exclusive accesses including LASR. */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ return;
+
+ case 0x2: case 0x3: /* CASP / STXP */
+ if (size & 2) { /* STXP / STLXP */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
- do_gpr_st(s, tcg_rt, tcg_addr, size,
- true, rt, iss_sf, is_lasr);
- } else {
- do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
- true, rt, iss_sf, is_lasr);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
+ return;
+ }
+ /* CASP / CASPL */
+ break;
+
+ case 0x6: case 0x7: /* CASP / LDXP */
+ if (size & 2) { /* LDXP / LDAXP */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ s->is_ldex = true;
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
+ return;
}
+ /* CASPA / CASPAL */
+ break;
+
+ case 0xa: /* CAS */
+ case 0xb: /* CASL */
+ case 0xe: /* CASA */
+ case 0xf: /* CASAL */
+ break;
}
+ unallocated_encoding(s);
}
/*
@@ -2715,6 +2751,55 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
}
}
+/* Atomic memory operations
+ *
+ * 31 30 27 26 24 22 21 16 15 12 10 5 0
+ * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
+ * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
+ * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
+ *
+ * Rt: the result register
+ * Rn: base address or SP
+ * Rs: the source register for the operation
+ * V: vector flag (always 0 as of v8.3)
+ * A: acquire flag
+ * R: release flag
+ */
+static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
+ int size, int rt, bool is_vector)
+{
+ int rs = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int o3_opc = extract32(insn, 12, 4);
+ int feature = ARM_FEATURE_V8_ATOMICS;
+
+ if (is_vector) {
+ unallocated_encoding(s);
+ return;
+ }
+ switch (o3_opc) {
+ case 000: /* LDADD */
+ case 001: /* LDCLR */
+ case 002: /* LDEOR */
+ case 003: /* LDSET */
+ case 004: /* LDSMAX */
+ case 005: /* LDSMIN */
+ case 006: /* LDUMAX */
+ case 007: /* LDUMIN */
+ case 010: /* SWP */
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ (void)rs;
+ (void)rn;
+}
+
/* Load/store register (all forms) */
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
{
@@ -2725,23 +2810,28 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
switch (extract32(insn, 24, 2)) {
case 0:
- if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
- disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
- } else {
+ if (extract32(insn, 21, 1) == 0) {
/* Load/store register (unscaled immediate)
* Load/store immediate pre/post-indexed
* Load/store register unprivileged
*/
disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
+ return;
+ }
+ switch (extract32(insn, 10, 2)) {
+ case 0:
+ disas_ldst_atomic(s, insn, size, rt, is_vector);
+ return;
+ case 2:
+ disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
+ return;
}
break;
case 1:
disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
- break;
- default:
- unallocated_encoding(s);
- break;
+ return;
}
+ unallocated_encoding(s);
}
/* AdvSIMD load/store multiple structures
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 08/10] target/arm: Fill in disas_ldst_atomic
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (6 preceding siblings ...)
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 07/10] target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 09/10] target/arm: Implement CAS and CASP Richard Henderson
` (3 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
This implements all of the v8.1-Atomics instructions except
for compare-and-swap, which is decoded elsewhere.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++--
1 file changed, 36 insertions(+), 2 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 6acad791e6..5af6028089 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
/* Note that the gvec expanders operate on offsets + sizes. */
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
@@ -2772,6 +2773,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
int rn = extract32(insn, 5, 5);
int o3_opc = extract32(insn, 12, 4);
int feature = ARM_FEATURE_V8_ATOMICS;
+ TCGv_i64 tcg_rn, tcg_rs;
+ AtomicThreeOpFn *fn;
if (is_vector) {
unallocated_encoding(s);
@@ -2779,14 +2782,32 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
}
switch (o3_opc) {
case 000: /* LDADD */
+ fn = tcg_gen_atomic_fetch_add_i64;
+ break;
case 001: /* LDCLR */
+ fn = tcg_gen_atomic_fetch_and_i64;
+ break;
case 002: /* LDEOR */
+ fn = tcg_gen_atomic_fetch_xor_i64;
+ break;
case 003: /* LDSET */
+ fn = tcg_gen_atomic_fetch_or_i64;
+ break;
case 004: /* LDSMAX */
+ fn = tcg_gen_atomic_fetch_smax_i64;
+ break;
case 005: /* LDSMIN */
+ fn = tcg_gen_atomic_fetch_smin_i64;
+ break;
case 006: /* LDUMAX */
+ fn = tcg_gen_atomic_fetch_umax_i64;
+ break;
case 007: /* LDUMIN */
+ fn = tcg_gen_atomic_fetch_umin_i64;
+ break;
case 010: /* SWP */
+ fn = tcg_gen_atomic_xchg_i64;
+ break;
default:
unallocated_encoding(s);
return;
@@ -2796,8 +2817,21 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
return;
}
- (void)rs;
- (void)rn;
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_rn = cpu_reg_sp(s, rn);
+ tcg_rs = read_cpu_reg(s, rs, false);
+
+ if (o3_opc == 1) { /* LDCLR */
+ tcg_gen_not_i64(tcg_rs, tcg_rs);
+ }
+
+ /* The tcg atomic primitives are all full barriers. Therefore we
+ * can ignore the Acquire and Release bits of this instruction.
+ */
+ fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
+ s->be_data | size | MO_ALIGN);
}
/* Load/store register (all forms) */
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 09/10] target/arm: Implement CAS and CASP
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (7 preceding siblings ...)
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 08/10] target/arm: Fill in disas_ldst_atomic Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 10/10] target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only Richard Henderson
` (2 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper-a64.h | 2 +
target/arm/helper-a64.c | 43 ++++++++++++++++
target/arm/translate-a64.c | 119 +++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 161 insertions(+), 3 deletions(-)
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index ef4ddfe9d8..b8028ac98c 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -51,6 +51,8 @@ DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
i64, env, i64, i64, i64)
+DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
+DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index afb25ad20c..549ed3513e 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -636,6 +636,49 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
}
+/* Writes back the old data into Rs. */
+void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
+ uint64_t new_lo, uint64_t new_hi)
+{
+ uintptr_t ra = GETPC();
+#ifndef CONFIG_ATOMIC128
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+ Int128 oldv, cmpv, newv;
+
+ cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
+ newv = int128_make128(new_lo, new_hi);
+
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+ oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
+
+ env->xregs[rs] = int128_getlo(oldv);
+ env->xregs[rs + 1] = int128_gethi(oldv);
+#endif
+}
+
+void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
+ uint64_t new_hi, uint64_t new_lo)
+{
+ uintptr_t ra = GETPC();
+#ifndef CONFIG_ATOMIC128
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+ Int128 oldv, cmpv, newv;
+
+ cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
+ newv = int128_make128(new_lo, new_hi);
+
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+ oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+
+ env->xregs[rs + 1] = int128_getlo(oldv);
+ env->xregs[rs] = int128_gethi(oldv);
+#endif
+}
+
/*
* AdvSIMD half-precision
*/
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 5af6028089..08debc7a93 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2114,6 +2114,103 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
}
+static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
+ int rn, int size)
+{
+ TCGv_i64 tcg_rs = cpu_reg(s, rs);
+ TCGv_i64 tcg_rt = cpu_reg(s, rt);
+ int memidx = get_mem_index(s);
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
+ size | MO_ALIGN | s->be_data);
+}
+
+static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
+ int rn, int size)
+{
+ TCGv_i64 s1 = cpu_reg(s, rs);
+ TCGv_i64 s2 = cpu_reg(s, rs + 1);
+ TCGv_i64 t1 = cpu_reg(s, rt);
+ TCGv_i64 t2 = cpu_reg(s, rt + 1);
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
+ int memidx = get_mem_index(s);
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ if (size == 2) {
+ TCGv_i64 cmp = tcg_temp_new_i64();
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat32_i64(val, t1, t2);
+ tcg_gen_concat32_i64(cmp, s1, s2);
+ } else {
+ tcg_gen_concat32_i64(val, t2, t1);
+ tcg_gen_concat32_i64(cmp, s2, s1);
+ }
+
+ tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
+ MO_64 | MO_ALIGN | s->be_data);
+ tcg_temp_free_i64(val);
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr32_i64(s1, s2, cmp);
+ } else {
+ tcg_gen_extr32_i64(s2, s1, cmp);
+ }
+ tcg_temp_free_i64(cmp);
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+ TCGv_i32 tcg_rs = tcg_const_i32(rs);
+
+ if (s->be_data == MO_LE) {
+ gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+ } else {
+ gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+ }
+ tcg_temp_free_i32(tcg_rs);
+ } else {
+ TCGv_i64 d1 = tcg_temp_new_i64();
+ TCGv_i64 d2 = tcg_temp_new_i64();
+ TCGv_i64 a2 = tcg_temp_new_i64();
+ TCGv_i64 c1 = tcg_temp_new_i64();
+ TCGv_i64 c2 = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_const_i64(0);
+
+ /* Load the two words, in memory order. */
+ tcg_gen_qemu_ld_i64(d1, addr, memidx,
+ MO_64 | MO_ALIGN_16 | s->be_data);
+ tcg_gen_addi_i64(a2, addr, 8);
+ tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
+
+ /* Compare the two words, also in memory order. */
+ tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
+ tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
+ tcg_gen_and_i64(c2, c2, c1);
+
+ /* If compare equal, write back new data, else write back old data. */
+ tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
+ tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
+ tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
+ tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
+ tcg_temp_free_i64(a2);
+ tcg_temp_free_i64(c1);
+ tcg_temp_free_i64(c2);
+ tcg_temp_free_i64(zero);
+
+ /* Write back the data from memory to Rs. */
+ tcg_gen_mov_i64(s1, d1);
+ tcg_gen_mov_i64(s2, d2);
+ tcg_temp_free_i64(d1);
+ tcg_temp_free_i64(d2);
+ }
+}
+
/* Update the Sixty-Four bit (SF) registersize. This logic is derived
* from the ARMv8 specs for LDR (Shared decode for all encodings).
*/
@@ -2214,10 +2311,16 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
return;
}
- /* CASP / CASPL */
+ if (rt2 == 31
+ && ((rt | rs) & 1) == 0
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ /* CASP / CASPL */
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
+ return;
+ }
break;
- case 0x6: case 0x7: /* CASP / LDXP */
+ case 0x6: case 0x7: /* CASPA / LDXP */
if (size & 2) { /* LDXP / LDAXP */
if (rn == 31) {
gen_check_sp_alignment(s);
@@ -2230,13 +2333,23 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
}
return;
}
- /* CASPA / CASPAL */
+ if (rt2 == 31
+ && ((rt | rs) & 1) == 0
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ /* CASPA / CASPAL */
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
+ return;
+ }
break;
case 0xa: /* CAS */
case 0xb: /* CASL */
case 0xe: /* CASA */
case 0xf: /* CASAL */
+ if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ gen_compare_and_swap(s, rs, rt, rn, size);
+ return;
+ }
break;
}
unallocated_encoding(s);
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v2 10/10] target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (8 preceding siblings ...)
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 09/10] target/arm: Implement CAS and CASP Richard Henderson
@ 2018-05-04 18:30 ` Richard Henderson
2018-05-04 18:43 ` [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics no-reply
2018-05-04 22:45 ` [Qemu-devel] [PATCH v3 08/10] target/arm: Fill in disas_ldst_atomic Richard Henderson
11 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 18:30 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, peter.maydell
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu64.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 991d764674..c50dcd4077 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -248,6 +248,7 @@ static void aarch64_max_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8_SM4);
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
+ set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (9 preceding siblings ...)
2018-05-04 18:30 ` [Qemu-devel] [PATCH v2 10/10] target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only Richard Henderson
@ 2018-05-04 18:43 ` no-reply
2018-05-04 22:45 ` [Qemu-devel] [PATCH v3 08/10] target/arm: Fill in disas_ldst_atomic Richard Henderson
11 siblings, 0 replies; 14+ messages in thread
From: no-reply @ 2018-05-04 18:43 UTC (permalink / raw)
To: richard.henderson; +Cc: famz, qemu-devel, peter.maydell, qemu-arm
Hi,
This series seems to have some coding style problems. See output below for
more information:
Type: series
Message-id: 20180504183021.19318-1-richard.henderson@linaro.org
Subject: [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics
=== TEST SCRIPT BEGIN ===
#!/bin/bash
BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done
exit $failed
=== TEST SCRIPT END ===
Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
t [tag update] patchew/20180504171540.25813-1-peter.maydell@linaro.org -> patchew/20180504171540.25813-1-peter.maydell@linaro.org
* [new tag] patchew/20180504183021.19318-1-richard.henderson@linaro.org -> patchew/20180504183021.19318-1-richard.henderson@linaro.org
Switched to a new branch 'test'
85278ba514 target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only
94bdcba55b target/arm: Implement CAS and CASP
62f42319cf target/arm: Fill in disas_ldst_atomic
8d29049006 target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode
faec1dbe5a target/riscv: Use new atomic min/max expanders
1fa0baa271 tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add
bb9e6b5cdb tcg: Introduce atomic helpers for integer min/max
b0c37750f7 target/xtensa: Use new min/max expanders
3373196e98 target/arm: Use new min/max expanders
38706ce634 tcg: Introduce helpers for integer min/max
=== OUTPUT BEGIN ===
Checking PATCH 1/10: tcg: Introduce helpers for integer min/max...
Checking PATCH 2/10: target/arm: Use new min/max expanders...
Checking PATCH 3/10: target/xtensa: Use new min/max expanders...
Checking PATCH 4/10: tcg: Introduce atomic helpers for integer min/max...
ERROR: memory barrier without comment
#56: FILE: accel/tcg/atomic_template.h:137:
+ smp_mb(); \
ERROR: memory barrier without comment
#96: FILE: accel/tcg/atomic_template.h:285:
+ smp_mb(); \
total: 2 errors, 0 warnings, 236 lines checked
Your patch has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 5/10: tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add...
Checking PATCH 6/10: target/riscv: Use new atomic min/max expanders...
Checking PATCH 7/10: target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode...
Checking PATCH 8/10: target/arm: Fill in disas_ldst_atomic...
Checking PATCH 9/10: target/arm: Implement CAS and CASP...
Checking PATCH 10/10: target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only...
=== OUTPUT END ===
Test command exited with code: 1
---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-devel@redhat.com
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH v3 08/10] target/arm: Fill in disas_ldst_atomic
2018-05-04 18:30 [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics Richard Henderson
` (10 preceding siblings ...)
2018-05-04 18:43 ` [Qemu-devel] [PATCH v2 00/10] target/arm: Implement v8.1-Atomics no-reply
@ 2018-05-04 22:45 ` Richard Henderson
2018-05-08 14:09 ` Peter Maydell
11 siblings, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2018-05-04 22:45 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
This implements all of the v8.1-Atomics instructions except
for compare-and-swap, which is decoded elsewhere.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v3: Do not zero-extend X[s] via the third parameter to read_cpu_reg.
---
target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++--
1 file changed, 36 insertions(+), 2 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 6acad791e6..c13858a690 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
/* Note that the gvec expanders operate on offsets + sizes. */
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
@@ -2772,6 +2773,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
int rn = extract32(insn, 5, 5);
int o3_opc = extract32(insn, 12, 4);
int feature = ARM_FEATURE_V8_ATOMICS;
+ TCGv_i64 tcg_rn, tcg_rs;
+ AtomicThreeOpFn *fn;
if (is_vector) {
unallocated_encoding(s);
@@ -2779,14 +2782,32 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
}
switch (o3_opc) {
case 000: /* LDADD */
+ fn = tcg_gen_atomic_fetch_add_i64;
+ break;
case 001: /* LDCLR */
+ fn = tcg_gen_atomic_fetch_and_i64;
+ break;
case 002: /* LDEOR */
+ fn = tcg_gen_atomic_fetch_xor_i64;
+ break;
case 003: /* LDSET */
+ fn = tcg_gen_atomic_fetch_or_i64;
+ break;
case 004: /* LDSMAX */
+ fn = tcg_gen_atomic_fetch_smax_i64;
+ break;
case 005: /* LDSMIN */
+ fn = tcg_gen_atomic_fetch_smin_i64;
+ break;
case 006: /* LDUMAX */
+ fn = tcg_gen_atomic_fetch_umax_i64;
+ break;
case 007: /* LDUMIN */
+ fn = tcg_gen_atomic_fetch_umin_i64;
+ break;
case 010: /* SWP */
+ fn = tcg_gen_atomic_xchg_i64;
+ break;
default:
unallocated_encoding(s);
return;
@@ -2796,8 +2817,21 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
return;
}
- (void)rs;
- (void)rn;
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_rn = cpu_reg_sp(s, rn);
+ tcg_rs = read_cpu_reg(s, rs, true);
+
+ if (o3_opc == 1) { /* LDCLR */
+ tcg_gen_not_i64(tcg_rs, tcg_rs);
+ }
+
+ /* The tcg atomic primitives are all full barriers. Therefore we
+ * can ignore the Acquire and Release bits of this instruction.
+ */
+ fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
+ s->be_data | size | MO_ALIGN);
}
/* Load/store register (all forms) */
--
2.14.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH v3 08/10] target/arm: Fill in disas_ldst_atomic
2018-05-04 22:45 ` [Qemu-devel] [PATCH v3 08/10] target/arm: Fill in disas_ldst_atomic Richard Henderson
@ 2018-05-08 14:09 ` Peter Maydell
0 siblings, 0 replies; 14+ messages in thread
From: Peter Maydell @ 2018-05-08 14:09 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers, qemu-arm
On 4 May 2018 at 23:45, Richard Henderson <richard.henderson@linaro.org> wrote:
> This implements all of the v8.1-Atomics instructions except
> for compare-and-swap, which is decoded elsewhere.
>
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v3: Do not zero-extend X[s] via the third parameter to read_cpu_reg.
Can you just resend the full series, please? Sending a single
patch rather than the whole thing means that 'patches' complains
about "series is either incomplete or improperly threaded",
and my eyeball for updated versions to add to my to-review
queue generally skips over patches which aren't cover letters
or single-patches...
thanks
-- PMM
^ permalink raw reply [flat|nested] 14+ messages in thread