* [PATCH v2 1/7] target/loongarch: Require atomics to be aligned
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
@ 2025-11-19 12:24 ` Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 2/7] target/loongarch: Add am{swap/add}[_db].{b/h} Jiajie Chen
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:24 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Currently, all atomic instructions in LoongArch require the address to
be aligned.
Signed-off-by: Jiajie Chen <c@jia.je>
---
target/loongarch/tcg/insn_trans/trans_atomic.c.inc | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 77eeedbc42..5622202a67 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -9,7 +9,7 @@ static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop)
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
TCGv t0 = make_address_i(ctx, src1, a->imm);
- tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop);
+ tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop | MO_ALIGN);
tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr));
tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval));
gen_set_gpr(a->rd, t1, EXT_NONE);
@@ -37,7 +37,7 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
tcg_gen_mov_tl(val, src2);
/* generate cmpxchg */
tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval,
- val, ctx->mem_idx, mop);
+ val, ctx->mem_idx, mop | MO_ALIGN);
tcg_gen_setcond_tl(TCG_COND_EQ, dest, t0, cpu_llval);
gen_set_label(done);
gen_set_gpr(a->rd, dest, EXT_NONE);
@@ -63,7 +63,7 @@ static bool gen_am(DisasContext *ctx, arg_rrr *a,
addr = make_address_i(ctx, addr, 0);
- func(dest, addr, val, ctx->mem_idx, mop);
+ func(dest, addr, val, ctx->mem_idx, mop | MO_ALIGN);
gen_set_gpr(a->rd, dest, EXT_NONE);
return true;
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 2/7] target/loongarch: Add am{swap/add}[_db].{b/h}
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 1/7] target/loongarch: Require atomics to be aligned Jiajie Chen
@ 2025-11-19 12:24 ` Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 3/7] target/loongarch: Add amcas[_db].{b/h/w/d} Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
3 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:24 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
The new instructions are introduced in LoongArch v1.1:
- amswap.b
- amswap.h
- amadd.b
- amadd.h
- amswap_db.b
- amswap_db.h
- amadd_db.b
- amadd_db.h
The instructions are gated by CPUCFG2.LAM_BH.
Signed-off-by: Jiajie Chen <c@jia.je>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/cpu.h | 1 +
target/loongarch/disas.c | 8 ++++++++
target/loongarch/insns.decode | 8 ++++++++
.../tcg/insn_trans/trans_atomic.c.inc | 8 ++++++++
target/loongarch/translate.h | 19 ++++++++++---------
5 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 1a14469b3b..aa3d976875 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -138,6 +138,7 @@ FIELD(CPUCFG2, LBT_ALL, 18, 3)
FIELD(CPUCFG2, LSPW, 21, 1)
FIELD(CPUCFG2, LAM, 22, 1)
FIELD(CPUCFG2, HPTW, 24, 1)
+FIELD(CPUCFG2, LAM_BH, 27, 1)
/* cpucfg[3] bits */
FIELD(CPUCFG3, CCDMA, 0, 1)
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 63989a6282..1a0f527cb1 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -580,6 +580,14 @@ INSN(fldx_s, frr)
INSN(fldx_d, frr)
INSN(fstx_s, frr)
INSN(fstx_d, frr)
+INSN(amswap_b, rrr)
+INSN(amswap_h, rrr)
+INSN(amadd_b, rrr)
+INSN(amadd_h, rrr)
+INSN(amswap_db_b, rrr)
+INSN(amswap_db_h, rrr)
+INSN(amadd_db_b, rrr)
+INSN(amadd_db_h, rrr)
INSN(amswap_w, rrr)
INSN(amswap_d, rrr)
INSN(amadd_w, rrr)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 62f58cc541..678ce42038 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -261,6 +261,14 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2
sc_w 0010 0001 .............. ..... ..... @rr_i14s2
ll_d 0010 0010 .............. ..... ..... @rr_i14s2
sc_d 0010 0011 .............. ..... ..... @rr_i14s2
+amswap_b 0011 10000101 11000 ..... ..... ..... @rrr
+amswap_h 0011 10000101 11001 ..... ..... ..... @rrr
+amadd_b 0011 10000101 11010 ..... ..... ..... @rrr
+amadd_h 0011 10000101 11011 ..... ..... ..... @rrr
+amswap_db_b 0011 10000101 11100 ..... ..... ..... @rrr
+amswap_db_h 0011 10000101 11101 ..... ..... ..... @rrr
+amadd_db_b 0011 10000101 11110 ..... ..... ..... @rrr
+amadd_db_h 0011 10000101 11111 ..... ..... ..... @rrr
amswap_w 0011 10000110 00000 ..... ..... ..... @rrr
amswap_d 0011 10000110 00001 ..... ..... ..... @rrr
amadd_w 0011 10000110 00010 ..... ..... ..... @rrr
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 5622202a67..0d837d08b6 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -73,6 +73,14 @@ TRANS(ll_w, ALL, gen_ll, MO_TESL)
TRANS(sc_w, ALL, gen_sc, MO_TESL)
TRANS(ll_d, 64, gen_ll, MO_TEUQ)
TRANS(sc_d, 64, gen_sc, MO_TEUQ)
+TRANS(amswap_b, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_SB)
+TRANS(amswap_h, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_TESW)
+TRANS(amadd_b, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_SB)
+TRANS(amadd_h, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESW)
+TRANS(amswap_db_b, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_SB)
+TRANS(amswap_db_h, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_TESW)
+TRANS(amadd_db_b, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_SB)
+TRANS(amadd_db_h, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESW)
TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL)
TRANS64(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ)
TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index bbe015ba57..eb424bb0da 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -21,15 +21,16 @@
#define avail_ALL(C) true
#define avail_64(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, ARCH) == \
CPUCFG1_ARCH_LA64)
-#define avail_FP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP))
-#define avail_FP_SP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_SP))
-#define avail_FP_DP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_DP))
-#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
-#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
-#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
-#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
-#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
-#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC))
+#define avail_FP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP))
+#define avail_FP_SP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_SP))
+#define avail_FP_DP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_DP))
+#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
+#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
+#define avail_LAM_BH(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM_BH))
+#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
+#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
+#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
+#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC))
/*
* If an operation is being performed on less than TARGET_LONG_BITS,
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 3/7] target/loongarch: Add amcas[_db].{b/h/w/d}
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 1/7] target/loongarch: Require atomics to be aligned Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 2/7] target/loongarch: Add am{swap/add}[_db].{b/h} Jiajie Chen
@ 2025-11-19 12:24 ` Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
3 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:24 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
The new instructions are introduced in LoongArch v1.1:
- amcas.b
- amcas.h
- amcas.w
- amcas.d
- amcas_db.b
- amcas_db.h
- amcas_db.w
- amcas_db.d
The new instructions are gated by CPUCFG2.LAMCAS.
Signed-off-by: Jiajie Chen <c@jia.je>
---
target/loongarch/cpu.h | 1 +
target/loongarch/disas.c | 8 ++++++
target/loongarch/insns.decode | 8 ++++++
.../tcg/insn_trans/trans_atomic.c.inc | 25 +++++++++++++++++++
target/loongarch/translate.h | 1 +
5 files changed, 43 insertions(+)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index aa3d976875..9ca7af9b4a 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -139,6 +139,7 @@ FIELD(CPUCFG2, LSPW, 21, 1)
FIELD(CPUCFG2, LAM, 22, 1)
FIELD(CPUCFG2, HPTW, 24, 1)
FIELD(CPUCFG2, LAM_BH, 27, 1)
+FIELD(CPUCFG2, LAMCAS, 28, 1)
/* cpucfg[3] bits */
FIELD(CPUCFG3, CCDMA, 0, 1)
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 1a0f527cb1..66c0cae5a9 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -580,6 +580,14 @@ INSN(fldx_s, frr)
INSN(fldx_d, frr)
INSN(fstx_s, frr)
INSN(fstx_d, frr)
+INSN(amcas_b, rrr)
+INSN(amcas_h, rrr)
+INSN(amcas_w, rrr)
+INSN(amcas_d, rrr)
+INSN(amcas_db_b, rrr)
+INSN(amcas_db_h, rrr)
+INSN(amcas_db_w, rrr)
+INSN(amcas_db_d, rrr)
INSN(amswap_b, rrr)
INSN(amswap_h, rrr)
INSN(amadd_b, rrr)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 678ce42038..cf4123cd46 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -261,6 +261,14 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2
sc_w 0010 0001 .............. ..... ..... @rr_i14s2
ll_d 0010 0010 .............. ..... ..... @rr_i14s2
sc_d 0010 0011 .............. ..... ..... @rr_i14s2
+amcas_b 0011 10000101 10000 ..... ..... ..... @rrr
+amcas_h 0011 10000101 10001 ..... ..... ..... @rrr
+amcas_w 0011 10000101 10010 ..... ..... ..... @rrr
+amcas_d 0011 10000101 10011 ..... ..... ..... @rrr
+amcas_db_b 0011 10000101 10100 ..... ..... ..... @rrr
+amcas_db_h 0011 10000101 10101 ..... ..... ..... @rrr
+amcas_db_w 0011 10000101 10110 ..... ..... ..... @rrr
+amcas_db_d 0011 10000101 10111 ..... ..... ..... @rrr
amswap_b 0011 10000101 11000 ..... ..... ..... @rrr
amswap_h 0011 10000101 11001 ..... ..... ..... @rrr
amadd_b 0011 10000101 11010 ..... ..... ..... @rrr
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 0d837d08b6..1b2673b82d 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -45,6 +45,23 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
return true;
}
+static bool gen_cas(DisasContext *ctx, arg_rrr *a,
+ void (*func)(TCGv, TCGv, TCGv, TCGv, TCGArg, MemOp),
+ MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv val = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv old = gpr_src(ctx, a->rd, EXT_NONE);
+
+ addr = make_address_i(ctx, addr, 0);
+
+ func(dest, addr, old, val, ctx->mem_idx, mop | MO_ALIGN);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
static bool gen_am(DisasContext *ctx, arg_rrr *a,
void (*func)(TCGv, TCGv, TCGv, TCGArg, MemOp),
MemOp mop)
@@ -73,6 +90,14 @@ TRANS(ll_w, ALL, gen_ll, MO_TESL)
TRANS(sc_w, ALL, gen_sc, MO_TESL)
TRANS(ll_d, 64, gen_ll, MO_TEUQ)
TRANS(sc_d, 64, gen_sc, MO_TEUQ)
+TRANS(amcas_b, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_SB)
+TRANS(amcas_h, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESW)
+TRANS(amcas_w, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESL)
+TRANS(amcas_d, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TEUQ)
+TRANS(amcas_db_b, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_SB)
+TRANS(amcas_db_h, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESW)
+TRANS(amcas_db_w, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESL)
+TRANS(amcas_db_d, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TEUQ)
TRANS(amswap_b, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_SB)
TRANS(amswap_h, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_TESW)
TRANS(amadd_b, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_SB)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index eb424bb0da..9ba3b425c1 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -27,6 +27,7 @@
#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
#define avail_LAM_BH(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM_BH))
+#define avail_LAMCAS(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAMCAS))
#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
` (2 preceding siblings ...)
2025-11-19 12:24 ` [PATCH v2 3/7] target/loongarch: Add amcas[_db].{b/h/w/d} Jiajie Chen
@ 2025-11-19 12:30 ` Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions Jiajie Chen
` (2 more replies)
3 siblings, 3 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:30 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Add the following new instructions in LoongArch v1.1:
- frecipe.s
- frecipe.d
- frsqrte.s
- frsqrte.d
- vfrecipe.s
- vfrecipe.d
- vfrsqrte.s
- vfrsqrte.d
- xvfrecipe.s
- xvfrecipe.d
- xvfrsqrte.s
- xvfrsqrte.d
They are guarded by CPUCFG2.FRECIPE. Altought the instructions allow
implementation to improve performance by reducing precision, we use the
existing softfloat implementation.
Signed-off-by: Jiajie Chen <c@jia.je>
Acked-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/cpu.h | 1 +
target/loongarch/disas.c | 12 ++++++++++++
target/loongarch/insns.decode | 12 ++++++++++++
target/loongarch/tcg/insn_trans/trans_farith.c.inc | 4 ++++
target/loongarch/tcg/insn_trans/trans_vec.c.inc | 8 ++++++++
target/loongarch/translate.h | 6 ++++++
6 files changed, 43 insertions(+)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 9ca7af9b4a..740e474d79 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -138,6 +138,7 @@ FIELD(CPUCFG2, LBT_ALL, 18, 3)
FIELD(CPUCFG2, LSPW, 21, 1)
FIELD(CPUCFG2, LAM, 22, 1)
FIELD(CPUCFG2, HPTW, 24, 1)
+FIELD(CPUCFG2, FRECIPE, 25, 1)
FIELD(CPUCFG2, LAM_BH, 27, 1)
FIELD(CPUCFG2, LAMCAS, 28, 1)
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 66c0cae5a9..e5e1b37ce0 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -478,6 +478,10 @@ INSN(frecip_s, ff)
INSN(frecip_d, ff)
INSN(frsqrt_s, ff)
INSN(frsqrt_d, ff)
+INSN(frecipe_s, ff)
+INSN(frecipe_d, ff)
+INSN(frsqrte_s, ff)
+INSN(frsqrte_d, ff)
INSN(fmov_s, ff)
INSN(fmov_d, ff)
INSN(movgr2fr_w, fr)
@@ -1429,6 +1433,10 @@ INSN_LSX(vfrecip_s, vv)
INSN_LSX(vfrecip_d, vv)
INSN_LSX(vfrsqrt_s, vv)
INSN_LSX(vfrsqrt_d, vv)
+INSN_LSX(vfrecipe_s, vv)
+INSN_LSX(vfrecipe_d, vv)
+INSN_LSX(vfrsqrte_s, vv)
+INSN_LSX(vfrsqrte_d, vv)
INSN_LSX(vfcvtl_s_h, vv)
INSN_LSX(vfcvth_s_h, vv)
@@ -2343,6 +2351,10 @@ INSN_LASX(xvfrecip_s, vv)
INSN_LASX(xvfrecip_d, vv)
INSN_LASX(xvfrsqrt_s, vv)
INSN_LASX(xvfrsqrt_d, vv)
+INSN_LASX(xvfrecipe_s, vv)
+INSN_LASX(xvfrecipe_d, vv)
+INSN_LASX(xvfrsqrte_s, vv)
+INSN_LASX(xvfrsqrte_d, vv)
INSN_LASX(xvfcvtl_s_h, vv)
INSN_LASX(xvfcvth_s_h, vv)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index cf4123cd46..92078f0f9f 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -371,6 +371,10 @@ frecip_s 0000 00010001 01000 10101 ..... ..... @ff
frecip_d 0000 00010001 01000 10110 ..... ..... @ff
frsqrt_s 0000 00010001 01000 11001 ..... ..... @ff
frsqrt_d 0000 00010001 01000 11010 ..... ..... @ff
+frecipe_s 0000 00010001 01000 11101 ..... ..... @ff
+frecipe_d 0000 00010001 01000 11110 ..... ..... @ff
+frsqrte_s 0000 00010001 01001 00001 ..... ..... @ff
+frsqrte_d 0000 00010001 01001 00010 ..... ..... @ff
fscaleb_s 0000 00010001 00001 ..... ..... ..... @fff
fscaleb_d 0000 00010001 00010 ..... ..... ..... @fff
flogb_s 0000 00010001 01000 01001 ..... ..... @ff
@@ -1115,6 +1119,10 @@ vfrecip_s 0111 00101001 11001 11101 ..... ..... @vv
vfrecip_d 0111 00101001 11001 11110 ..... ..... @vv
vfrsqrt_s 0111 00101001 11010 00001 ..... ..... @vv
vfrsqrt_d 0111 00101001 11010 00010 ..... ..... @vv
+vfrecipe_s 0111 00101001 11010 00101 ..... ..... @vv
+vfrecipe_d 0111 00101001 11010 00110 ..... ..... @vv
+vfrsqrte_s 0111 00101001 11010 01001 ..... ..... @vv
+vfrsqrte_d 0111 00101001 11010 01010 ..... ..... @vv
vfcvtl_s_h 0111 00101001 11011 11010 ..... ..... @vv
vfcvth_s_h 0111 00101001 11011 11011 ..... ..... @vv
@@ -1879,6 +1887,10 @@ xvfrecip_s 0111 01101001 11001 11101 ..... ..... @vv
xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv
xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv
xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv
+xvfrecipe_s 0111 01101001 11010 00101 ..... ..... @vv
+xvfrecipe_d 0111 01101001 11010 00110 ..... ..... @vv
+xvfrsqrte_s 0111 01101001 11010 01001 ..... ..... @vv
+xvfrsqrte_d 0111 01101001 11010 01010 ..... ..... @vv
xvfcvtl_s_h 0111 01101001 11011 11010 ..... ..... @vv
xvfcvth_s_h 0111 01101001 11011 11011 ..... ..... @vv
diff --git a/target/loongarch/tcg/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc
index ff6cf3448e..eed6ab7312 100644
--- a/target/loongarch/tcg/insn_trans/trans_farith.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_farith.c.inc
@@ -191,6 +191,10 @@ TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s)
TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d)
TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s)
TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d)
+TRANS(frecipe_s, FRECIPE_FP_SP, gen_ff, gen_helper_frecip_s)
+TRANS(frecipe_d, FRECIPE_FP_DP, gen_ff, gen_helper_frecip_d)
+TRANS(frsqrte_s, FRECIPE_FP_SP, gen_ff, gen_helper_frsqrt_s)
+TRANS(frsqrte_d, FRECIPE_FP_DP, gen_ff, gen_helper_frsqrt_d)
TRANS64(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s)
TRANS64(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d)
TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s)
diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc
index 38bccf2838..ef57abe408 100644
--- a/target/loongarch/tcg/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc
@@ -4407,12 +4407,20 @@ TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
+TRANS(vfrecipe_s, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrecip_s)
+TRANS(vfrecipe_d, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrecip_d)
+TRANS(vfrsqrte_s, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
+TRANS(vfrsqrte_d, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
+TRANS(xvfrecipe_s, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrecip_s)
+TRANS(xvfrecipe_d, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrecip_d)
+TRANS(xvfrsqrte_s, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
+TRANS(xvfrsqrte_d, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 9ba3b425c1..331f79c8f2 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -33,6 +33,12 @@
#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC))
+#define avail_FRECIPE(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FRECIPE))
+#define avail_FRECIPE_FP_SP(C) (avail_FRECIPE(C) && avail_FP_SP(C))
+#define avail_FRECIPE_FP_DP(C) (avail_FRECIPE(C) && avail_FP_DP(C))
+#define avail_FRECIPE_LSX(C) (avail_FRECIPE(C) && avail_LSX(C))
+#define avail_FRECIPE_LASX(C) (avail_FRECIPE(C) && avail_LASX(C))
+
/*
* If an operation is being performed on less than TARGET_LONG_BITS,
* it may require the inputs to be sign- or zero-extended; which will
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
@ 2025-11-19 12:30 ` Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 6/7] target/loongarch: Add sc.q instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 7/7] target/loongarch: Add LA v1.1 instructions to max cpu Jiajie Chen
2 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:30 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Add the following instructions in LoongArch v1.1:
- llacq.w
- screl.w
- llacq.d
- screl.d
They are guarded by CPUCFG2.LLACQ_SCREL.
Signed-off-by: Jiajie Chen <c@jia.je>
Co-developed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/cpu.h | 1 +
target/loongarch/disas.c | 4 ++++
target/loongarch/insns.decode | 5 ++++
.../tcg/insn_trans/trans_atomic.c.inc | 24 ++++++++++++++-----
target/loongarch/translate.h | 3 +++
5 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 740e474d79..5cab02ad6f 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -141,6 +141,7 @@ FIELD(CPUCFG2, HPTW, 24, 1)
FIELD(CPUCFG2, FRECIPE, 25, 1)
FIELD(CPUCFG2, LAM_BH, 27, 1)
FIELD(CPUCFG2, LAMCAS, 28, 1)
+FIELD(CPUCFG2, LLACQ_SCREL, 29, 1)
/* cpucfg[3] bits */
FIELD(CPUCFG3, CCDMA, 0, 1)
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e5e1b37ce0..3164fade9b 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -584,6 +584,10 @@ INSN(fldx_s, frr)
INSN(fldx_d, frr)
INSN(fstx_s, frr)
INSN(fstx_d, frr)
+INSN(llacq_w, rr_i)
+INSN(screl_w, rr_i)
+INSN(llacq_d, rr_i)
+INSN(screl_d, rr_i)
INSN(amcas_b, rrr)
INSN(amcas_h, rrr)
INSN(amcas_w, rrr)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 92078f0f9f..7898f5f719 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -69,6 +69,7 @@
@rr_i14s2 .... .... .............. rj:5 rd:5 &rr_i imm=%i14s2
@rr_i16 .... .. imm:s16 rj:5 rd:5 &rr_i
@rr_i16s2 .... .. ................ rj:5 rd:5 &rr_i imm=%offs16
+@rr_i0 .... .. ................ rj:5 rd:5 &rr_i imm=0
@hint_r_i12 .... ...... imm:s12 rj:5 hint:5 &hint_r_i
@hint_rr .... ........ ..... rk:5 rj:5 hint:5 &hint_rr
@rrr_sa2p1 .... ........ ... .. rk:5 rj:5 rd:5 &rrr_sa sa=%sa2p1
@@ -261,6 +262,10 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2
sc_w 0010 0001 .............. ..... ..... @rr_i14s2
ll_d 0010 0010 .............. ..... ..... @rr_i14s2
sc_d 0010 0011 .............. ..... ..... @rr_i14s2
+llacq_w 0011 10000101 01111 00000 ..... ..... @rr_i0
+screl_w 0011 10000101 01111 00001 ..... ..... @rr_i0
+llacq_d 0011 10000101 01111 00010 ..... ..... @rr_i0
+screl_d 0011 10000101 01111 00011 ..... ..... @rr_i0
amcas_b 0011 10000101 10000 ..... ..... ..... @rrr
amcas_h 0011 10000101 10001 ..... ..... ..... @rrr
amcas_w 0011 10000101 10010 ..... ..... ..... @rrr
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 1b2673b82d..c9a6dcfdeb 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -3,7 +3,7 @@
* Copyright (c) 2021 Loongson Technology Corporation Limited
*/
-static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool acq)
{
TCGv t1 = tcg_temp_new();
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
@@ -14,10 +14,14 @@ static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop)
tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval));
gen_set_gpr(a->rd, t1, EXT_NONE);
+ if (acq) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
+
return true;
}
-static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel)
{
TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
@@ -29,6 +33,10 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
TCGLabel *done = gen_new_label();
tcg_gen_addi_tl(t0, src1, a->imm);
+
+ if (rel) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1);
tcg_gen_movi_tl(dest, 0);
tcg_gen_br(done);
@@ -86,10 +94,14 @@ static bool gen_am(DisasContext *ctx, arg_rrr *a,
return true;
}
-TRANS(ll_w, ALL, gen_ll, MO_TESL)
-TRANS(sc_w, ALL, gen_sc, MO_TESL)
-TRANS(ll_d, 64, gen_ll, MO_TEUQ)
-TRANS(sc_d, 64, gen_sc, MO_TEUQ)
+TRANS(ll_w, ALL, gen_ll, MO_TESL, false)
+TRANS(sc_w, ALL, gen_sc, MO_TESL, false)
+TRANS(ll_d, 64, gen_ll, MO_TEUQ, false)
+TRANS(sc_d, 64, gen_sc, MO_TEUQ, false)
+TRANS(llacq_w, LLACQ_SCREL, gen_ll, MO_TESL, true)
+TRANS(screl_w, LLACQ_SCREL, gen_sc, MO_TESL, true)
+TRANS(llacq_d, LLACQ_SCREL_64, gen_ll, MO_TEUQ, true)
+TRANS(screl_d, LLACQ_SCREL_64, gen_sc, MO_TEUQ, true)
TRANS(amcas_b, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_SB)
TRANS(amcas_h, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESW)
TRANS(amcas_w, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESL)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 331f79c8f2..76bceedf98 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -39,6 +39,9 @@
#define avail_FRECIPE_LSX(C) (avail_FRECIPE(C) && avail_LSX(C))
#define avail_FRECIPE_LASX(C) (avail_FRECIPE(C) && avail_LASX(C))
+#define avail_LLACQ_SCREL(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LLACQ_SCREL))
+#define avail_LLACQ_SCREL_64(C) (avail_64(C) && avail_LLACQ_SCREL(C))
+
/*
* If an operation is being performed on less than TARGET_LONG_BITS,
* it may require the inputs to be sign- or zero-extended; which will
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 6/7] target/loongarch: Add sc.q instructions
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions Jiajie Chen
@ 2025-11-19 12:30 ` Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 7/7] target/loongarch: Add LA v1.1 instructions to max cpu Jiajie Chen
2 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:30 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Add the sc.q instruction in LoongArch v1.1, guarded by CPUCFG2.SCQ. It
is implemented by reading 128bit data (llval + llval_high) in ll.d when
aligned to 16B boundary, and cmpxchg 128bit in sc.q. If ld.d
matches the higher part of the 128bit, its data is taken from
llval_high.
Expected assembly sequence:
ll.d lo, base, 0
ld.d hi, base, 8
sc.q lo, hi, base
Signed-off-by: Jiajie Chen <c@jia.je>
---
target/loongarch/cpu.h | 3 +
target/loongarch/disas.c | 1 +
target/loongarch/insns.decode | 1 +
.../tcg/insn_trans/trans_atomic.c.inc | 82 +++++++++++++++++++
.../tcg/insn_trans/trans_memory.c.inc | 22 +++++
target/loongarch/tcg/translate.c | 6 +-
target/loongarch/translate.h | 1 +
7 files changed, 115 insertions(+), 1 deletion(-)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 5cab02ad6f..0a89c06b01 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -142,6 +142,7 @@ FIELD(CPUCFG2, FRECIPE, 25, 1)
FIELD(CPUCFG2, LAM_BH, 27, 1)
FIELD(CPUCFG2, LAMCAS, 28, 1)
FIELD(CPUCFG2, LLACQ_SCREL, 29, 1)
+FIELD(CPUCFG2, SCQ, 30, 1)
/* cpucfg[3] bits */
FIELD(CPUCFG3, CCDMA, 0, 1)
@@ -377,6 +378,8 @@ typedef struct CPUArchState {
uint32_t fcsr0_mask;
uint64_t lladdr; /* LL virtual address compared against SC */
uint64_t llval;
+ uint64_t llval_high; /* For 128-bit atomic SC.Q */
+ uint64_t llbit_scq; /* Potential LL.D+LD.D+SC.Q sequence in effect */
#endif
#ifndef CONFIG_USER_ONLY
#ifdef CONFIG_TCG
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 3164fade9b..3249ab7ac6 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -584,6 +584,7 @@ INSN(fldx_s, frr)
INSN(fldx_d, frr)
INSN(fstx_s, frr)
INSN(fstx_d, frr)
+INSN(sc_q, rrr)
INSN(llacq_w, rr_i)
INSN(screl_w, rr_i)
INSN(llacq_d, rr_i)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7898f5f719..3089d42044 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -262,6 +262,7 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2
sc_w 0010 0001 .............. ..... ..... @rr_i14s2
ll_d 0010 0010 .............. ..... ..... @rr_i14s2
sc_d 0010 0011 .............. ..... ..... @rr_i14s2
+sc_q 0011 10000101 01110 ..... ..... ..... @rrr
llacq_w 0011 10000101 01111 00000 ..... ..... @rr_i0
screl_w 0011 10000101 01111 00001 ..... ..... @rr_i0
llacq_d 0011 10000101 01111 00010 ..... ..... @rr_i0
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index c9a6dcfdeb..565daa7219 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -6,14 +6,48 @@
static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool acq)
{
TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
TCGv t0 = make_address_i(ctx, src1, a->imm);
+ TCGv_i128 t16 = tcg_temp_new_i128();
+ TCGv mask = tcg_constant_tl(0xf);
+ TCGv one = tcg_constant_tl(1);
+ TCGv zero = tcg_constant_tl(0);
+ TCGLabel *l1 = gen_new_label();
+ TCGLabel *done = gen_new_label();
+
+ if (avail_SCQ(ctx) && mop == MO_TEUQ) {
+ /*
+ * The LL.D+LD.D may be paired with SC.Q,
+ * load 128-bit if aligned: (t0 & 0xf) == 0
+ */
+ tcg_gen_and_tl(t1, t0, mask);
+ tcg_gen_brcond_tl(TCG_COND_EQ, t1, zero, l1);
+ /* fallthrough if not aligned to 16B */
+ }
tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop | MO_ALIGN);
tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr));
tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval));
gen_set_gpr(a->rd, t1, EXT_NONE);
+ if (avail_SCQ(ctx) && mop == MO_TEUQ) {
+ tcg_gen_br(done);
+
+ gen_set_label(l1);
+
+ /* Load 16B data and save into llval/llval_high */
+ tcg_gen_qemu_ld_i128(t16, t0, ctx->mem_idx, MO_128 | MO_ALIGN);
+ tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr));
+ tcg_gen_extr_i128_i64(t1, t2, t16);
+ tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval));
+ tcg_gen_st_tl(t2, tcg_env, offsetof(CPULoongArchState, llval_high));
+ tcg_gen_st_tl(one, tcg_env, offsetof(CPULoongArchState, llbit_scq));
+ gen_set_gpr(a->rd, t1, EXT_NONE);
+
+ gen_set_label(done);
+ }
+
if (acq) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
@@ -28,6 +62,7 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel)
TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE);
TCGv t0 = tcg_temp_new();
TCGv val = tcg_temp_new();
+ TCGv zero = tcg_constant_tl(0);
TCGLabel *l1 = gen_new_label();
TCGLabel *done = gen_new_label();
@@ -37,6 +72,11 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel)
if (rel) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
+
+ if (avail_SCQ(ctx)) {
+ tcg_gen_st_tl(zero, tcg_env, offsetof(CPULoongArchState, llbit_scq));
+ }
+
tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1);
tcg_gen_movi_tl(dest, 0);
tcg_gen_br(done);
@@ -53,6 +93,47 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel)
return true;
}
+static bool gen_sc_q(DisasContext *ctx, arg_rrr *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv src3 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv_i128 t16 = tcg_temp_new_i128();
+ TCGv_i128 c16 = tcg_temp_new_i128();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv zero = tcg_constant_tl(0);
+
+ TCGLabel *l1 = gen_new_label();
+ TCGLabel *done = gen_new_label();
+
+ tcg_gen_st_tl(zero, tcg_env, offsetof(CPULoongArchState, llbit_scq));
+ tcg_gen_brcond_tl(TCG_COND_EQ, src1, cpu_lladdr, l1);
+ tcg_gen_movi_tl(dest, 0);
+ tcg_gen_br(done);
+
+ gen_set_label(l1);
+ tcg_gen_concat_i64_i128(t16, src2, src3);
+ tcg_gen_concat_i64_i128(c16, cpu_llval,
+ cpu_llval_high);
+
+ /* generate cmpxchg */
+ tcg_gen_atomic_cmpxchg_i128(t16, cpu_lladdr, c16,
+ t16, ctx->mem_idx, mop | MO_ALIGN);
+
+ /* check if success */
+ tcg_gen_extr_i128_i64(t1, t2, t16);
+ tcg_gen_xor_i64(t1, t1, cpu_llval);
+ tcg_gen_xor_i64(t2, t2, cpu_llval_high);
+ tcg_gen_or_i64(t1, t1, t2);
+ tcg_gen_setcondi_i64(TCG_COND_EQ, dest, t1, 0);
+ gen_set_label(done);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
static bool gen_cas(DisasContext *ctx, arg_rrr *a,
void (*func)(TCGv, TCGv, TCGv, TCGv, TCGArg, MemOp),
MemOp mop)
@@ -98,6 +179,7 @@ TRANS(ll_w, ALL, gen_ll, MO_TESL, false)
TRANS(sc_w, ALL, gen_sc, MO_TESL, false)
TRANS(ll_d, 64, gen_ll, MO_TEUQ, false)
TRANS(sc_d, 64, gen_sc, MO_TEUQ, false)
+TRANS(sc_q, 64, gen_sc_q, MO_128)
TRANS(llacq_w, LLACQ_SCREL, gen_ll, MO_TESL, true)
TRANS(screl_w, LLACQ_SCREL, gen_sc, MO_TESL, true)
TRANS(llacq_d, LLACQ_SCREL_64, gen_ll, MO_TEUQ, true)
diff --git a/target/loongarch/tcg/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
index 42f4e74012..8b3c1b037c 100644
--- a/target/loongarch/tcg/insn_trans/trans_memory.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
@@ -7,11 +7,33 @@ static bool gen_load(DisasContext *ctx, arg_rr_i *a, MemOp mop)
{
TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv t1 = tcg_temp_new();
+ TCGv mask = tcg_constant_tl(0x8);
+ TCGv zero = tcg_constant_tl(0);
+ TCGLabel *done = gen_new_label();
+ TCGLabel *l1 = gen_new_label();
addr = make_address_i(ctx, addr, a->imm);
+ if (avail_SCQ(ctx) && mop == MO_TEUQ) {
+ /*
+ * The LL.D+LD.D may be paired with SC.Q,
+ * use llval_high if llbit_scq && (addr == lladdr ^ 0x8)
+ */
+ tcg_gen_brcond_tl(TCG_COND_EQ, cpu_llbit_scq, zero, l1);
+ tcg_gen_xor_tl(t1, addr, mask);
+ tcg_gen_brcond_tl(TCG_COND_NE, cpu_lladdr, t1, l1);
+ gen_set_gpr(a->rd, cpu_llval_high, EXT_NONE);
+ tcg_gen_br(done);
+ gen_set_label(l1);
+ }
+
tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ if (avail_SCQ(ctx) && mop == MO_TEUQ) {
+ gen_set_label(done);
+ }
return true;
}
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
index 055f6fb604..2023f892be 100644
--- a/target/loongarch/tcg/translate.c
+++ b/target/loongarch/tcg/translate.c
@@ -24,7 +24,7 @@
/* Global register indices */
TCGv cpu_gpr[32], cpu_pc;
-static TCGv cpu_lladdr, cpu_llval;
+static TCGv cpu_lladdr, cpu_llval, cpu_llval_high, cpu_llbit_scq;
#define HELPER_H "helper.h"
#include "exec/helper-info.c.inc"
@@ -360,6 +360,10 @@ void loongarch_translate_init(void)
offsetof(CPULoongArchState, lladdr), "lladdr");
cpu_llval = tcg_global_mem_new(tcg_env,
offsetof(CPULoongArchState, llval), "llval");
+ cpu_llval_high = tcg_global_mem_new(tcg_env,
+ offsetof(CPULoongArchState, llval_high), "llval_high");
+ cpu_llbit_scq = tcg_global_mem_new(tcg_env,
+ offsetof(CPULoongArchState, llbit_scq), "llbit_scq");
#ifndef CONFIG_USER_ONLY
loongarch_csr_translate_init();
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 76bceedf98..ba1c89e57b 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -30,6 +30,7 @@
#define avail_LAMCAS(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAMCAS))
#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
+#define avail_SCQ(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, SCQ))
#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC))
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 7/7] target/loongarch: Add LA v1.1 instructions to max cpu
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 6/7] target/loongarch: Add sc.q instructions Jiajie Chen
@ 2025-11-19 12:30 ` Jiajie Chen
2 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:30 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Add LA v1.1 new instructinos to max cpu by enabling new features in
CPUCFG2.
Signed-off-by: Jiajie Chen <c@jia.je>
---
target/loongarch/cpu.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index d74c3c3766..2c357f9342 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -413,7 +413,16 @@ static void loongarch_max_initfn(Object *obj)
if (tcg_enabled()) {
cpu->env.cpucfg[1] = FIELD_DP32(cpu->env.cpucfg[1], CPUCFG1, MSG_INT, 1);
cpu->msgint = ON_OFF_AUTO_AUTO;
- cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, HPTW, 1);
+
+ uint32_t data = cpu->env.cpucfg[2];
+ data = FIELD_DP32(data, CPUCFG2, HPTW, 1);
+ /* Enable LA v1.1 instructions */
+ data = FIELD_DP32(data, CPUCFG2, FRECIPE, 1);
+ data = FIELD_DP32(data, CPUCFG2, LAM_BH, 1);
+ data = FIELD_DP32(data, CPUCFG2, LAMCAS, 1);
+ data = FIELD_DP32(data, CPUCFG2, LLACQ_SCREL, 1);
+ data = FIELD_DP32(data, CPUCFG2, SCQ, 1);
+ cpu->env.cpucfg[2] = data;
}
}
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread