* [PATCH v2 0/7] Add LoongArch v1.1 instructions
@ 2025-11-19 12:24 Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 1/7] target/loongarch: Require atomics to be aligned Jiajie Chen
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:24 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Latest revision of LoongArch ISA is out at
https://www.loongson.cn/uploads/images/2023102309132647981.%E9%BE%99%E8%8A%AF%E6%9E%B6%E6%9E%84%E5%8F%82%E8%80%83%E6%89%8B%E5%86%8C%E5%8D%B7%E4%B8%80_r1p10.pdf
(Chinese only). The revision includes the following updates:
- estimated fp reciporcal instructions: frecip -> frecipe, frsqrt ->
frsqrte
- 128-bit width store-conditional instruction: sc.q
- ll.w/d with acquire semantic: llacq.w/d, sc.w/d with release semantic:
screl.w/d
- compare and swap instructions: amcas[_db].b/w/h/d
- byte and word-wide amswap/add instructions: am{swap/add}[_db].{b/h}
- new definition for dbar hints
- clarify 32-bit division instruction hebavior
- clarify load ordering when accessing the same address
- introduce message signaled interrupt
- introduce hardware page table walker
The new revision is implemented in the Loongson 3A6000 processor.
This patch series implements all the new instructions. The v1 version
can be found at
https://patchew.org/QEMU/20231023153029.269211-2-c@jia.je/.
A simple testcase to test the new fp and sc.q instructions:
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
void test_fp() {
float a = 3.0;
float b;
asm volatile("frecip.s %0, %1" : "=f"(b) : "f"(a));
printf("frecip: %f\n", b);
asm volatile("frecipe.s %0, %1" : "=f"(b) : "f"(a));
printf("frecipe: %f\n", b);
asm volatile("frsqrt.s %0, %1" : "=f"(b) : "f"(a));
printf("frsqrt: %f\n", b);
asm volatile("frsqrte.s %0, %1" : "=f"(b) : "f"(a));
printf("frsqrte: %f\n", b);
}
uint64_t rand64() { return ((uint64_t)rand() << 32) | rand(); }
void test_sc_q() {
__int128 val = rand64();
val = (val << 64) | rand64();
__int128 *ptr = &val;
uint64_t add_lo = rand64();
uint64_t add_hi = rand64();
__int128 add = add_hi;
add = (add << 64) | add_lo;
__int128 expect = val + add;
int res = 0;
asm volatile("ll.d $t1, %1, 0\nld.d $t2, %1, 8\nadd.d $t1, $t1, %2\nadd.d "
"$t2, $t2, %3\nsc.q $t1, $t2, %1\nmove %0, $t1"
: "=r"(res), "+r"(ptr)
: "r"(add_lo), "r"(add_hi)
: "$t1", "$t2", "memory");
assert(res == 1);
assert(val == expect);
// change memory content to make sc fail
res = 1;
asm volatile("ll.d $t1, %1, 0\nld.d $t2, %1, 8\naddi.d $t1, $t1, 1\nst.d "
"$t1, %1, 0\nsc.q $t1, $t2, %1\nmove %0, $t1"
: "=r"(res), "+r"(ptr)
:
: "$t1", "$t2", "memory");
assert(res == 0);
res = 1;
asm volatile("ll.d $t1, %1, 0\nld.d $t2, %1, 8\naddi.d $t2, $t2, 1\nst.d "
"$t2, %1, 8\nsc.q $t1, $t2, %1\nmove %0, $t1"
: "=r"(res), "+r"(ptr)
:
: "$t1", "$t2", "memory");
assert(res == 0);
printf("SC.Q passed\n");
}
int main(int argc, char *argv[]) {
test_fp();
test_sc_q();
return 0;
}
Compile and test by:
loongarch64-linux-gnu-gcc test.c -o test -static && ./qemu-loongarch64 -cpu max test
Jiajie Chen (7):
target/loongarch: Require atomics to be aligned
target/loongarch: Add am{swap/add}[_db].{b/h}
target/loongarch: Add amcas[_db].{b/h/w/d}
target/loongarch: Add estimated reciprocal instructions
target/loongarch: Add llacq/screl instructions
target/loongarch: Add sc.q instructions
target/loongarch: Add LA v1.1 instructions to max cpu
target/loongarch/cpu.c | 11 +-
target/loongarch/cpu.h | 7 +
target/loongarch/disas.c | 33 ++++
target/loongarch/insns.decode | 34 ++++
.../tcg/insn_trans/trans_atomic.c.inc | 145 ++++++++++++++++--
.../tcg/insn_trans/trans_farith.c.inc | 4 +
.../tcg/insn_trans/trans_memory.c.inc | 22 +++
.../loongarch/tcg/insn_trans/trans_vec.c.inc | 8 +
target/loongarch/tcg/translate.c | 6 +-
target/loongarch/translate.h | 30 ++--
10 files changed, 280 insertions(+), 20 deletions(-)
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 1/7] target/loongarch: Require atomics to be aligned
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
@ 2025-11-19 12:24 ` Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 2/7] target/loongarch: Add am{swap/add}[_db].{b/h} Jiajie Chen
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:24 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Currently, all atomic instructions in LoongArch require the address to
be aligned.
Signed-off-by: Jiajie Chen <c@jia.je>
---
target/loongarch/tcg/insn_trans/trans_atomic.c.inc | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 77eeedbc42..5622202a67 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -9,7 +9,7 @@ static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop)
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
TCGv t0 = make_address_i(ctx, src1, a->imm);
- tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop);
+ tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop | MO_ALIGN);
tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr));
tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval));
gen_set_gpr(a->rd, t1, EXT_NONE);
@@ -37,7 +37,7 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
tcg_gen_mov_tl(val, src2);
/* generate cmpxchg */
tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval,
- val, ctx->mem_idx, mop);
+ val, ctx->mem_idx, mop | MO_ALIGN);
tcg_gen_setcond_tl(TCG_COND_EQ, dest, t0, cpu_llval);
gen_set_label(done);
gen_set_gpr(a->rd, dest, EXT_NONE);
@@ -63,7 +63,7 @@ static bool gen_am(DisasContext *ctx, arg_rrr *a,
addr = make_address_i(ctx, addr, 0);
- func(dest, addr, val, ctx->mem_idx, mop);
+ func(dest, addr, val, ctx->mem_idx, mop | MO_ALIGN);
gen_set_gpr(a->rd, dest, EXT_NONE);
return true;
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 2/7] target/loongarch: Add am{swap/add}[_db].{b/h}
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 1/7] target/loongarch: Require atomics to be aligned Jiajie Chen
@ 2025-11-19 12:24 ` Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 3/7] target/loongarch: Add amcas[_db].{b/h/w/d} Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
3 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:24 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
The new instructions are introduced in LoongArch v1.1:
- amswap.b
- amswap.h
- amadd.b
- amadd.h
- amswap_db.b
- amswap_db.h
- amadd_db.b
- amadd_db.h
The instructions are gated by CPUCFG2.LAM_BH.
Signed-off-by: Jiajie Chen <c@jia.je>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/cpu.h | 1 +
target/loongarch/disas.c | 8 ++++++++
target/loongarch/insns.decode | 8 ++++++++
.../tcg/insn_trans/trans_atomic.c.inc | 8 ++++++++
target/loongarch/translate.h | 19 ++++++++++---------
5 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 1a14469b3b..aa3d976875 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -138,6 +138,7 @@ FIELD(CPUCFG2, LBT_ALL, 18, 3)
FIELD(CPUCFG2, LSPW, 21, 1)
FIELD(CPUCFG2, LAM, 22, 1)
FIELD(CPUCFG2, HPTW, 24, 1)
+FIELD(CPUCFG2, LAM_BH, 27, 1)
/* cpucfg[3] bits */
FIELD(CPUCFG3, CCDMA, 0, 1)
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 63989a6282..1a0f527cb1 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -580,6 +580,14 @@ INSN(fldx_s, frr)
INSN(fldx_d, frr)
INSN(fstx_s, frr)
INSN(fstx_d, frr)
+INSN(amswap_b, rrr)
+INSN(amswap_h, rrr)
+INSN(amadd_b, rrr)
+INSN(amadd_h, rrr)
+INSN(amswap_db_b, rrr)
+INSN(amswap_db_h, rrr)
+INSN(amadd_db_b, rrr)
+INSN(amadd_db_h, rrr)
INSN(amswap_w, rrr)
INSN(amswap_d, rrr)
INSN(amadd_w, rrr)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 62f58cc541..678ce42038 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -261,6 +261,14 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2
sc_w 0010 0001 .............. ..... ..... @rr_i14s2
ll_d 0010 0010 .............. ..... ..... @rr_i14s2
sc_d 0010 0011 .............. ..... ..... @rr_i14s2
+amswap_b 0011 10000101 11000 ..... ..... ..... @rrr
+amswap_h 0011 10000101 11001 ..... ..... ..... @rrr
+amadd_b 0011 10000101 11010 ..... ..... ..... @rrr
+amadd_h 0011 10000101 11011 ..... ..... ..... @rrr
+amswap_db_b 0011 10000101 11100 ..... ..... ..... @rrr
+amswap_db_h 0011 10000101 11101 ..... ..... ..... @rrr
+amadd_db_b 0011 10000101 11110 ..... ..... ..... @rrr
+amadd_db_h 0011 10000101 11111 ..... ..... ..... @rrr
amswap_w 0011 10000110 00000 ..... ..... ..... @rrr
amswap_d 0011 10000110 00001 ..... ..... ..... @rrr
amadd_w 0011 10000110 00010 ..... ..... ..... @rrr
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 5622202a67..0d837d08b6 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -73,6 +73,14 @@ TRANS(ll_w, ALL, gen_ll, MO_TESL)
TRANS(sc_w, ALL, gen_sc, MO_TESL)
TRANS(ll_d, 64, gen_ll, MO_TEUQ)
TRANS(sc_d, 64, gen_sc, MO_TEUQ)
+TRANS(amswap_b, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_SB)
+TRANS(amswap_h, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_TESW)
+TRANS(amadd_b, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_SB)
+TRANS(amadd_h, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESW)
+TRANS(amswap_db_b, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_SB)
+TRANS(amswap_db_h, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_TESW)
+TRANS(amadd_db_b, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_SB)
+TRANS(amadd_db_h, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESW)
TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL)
TRANS64(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ)
TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index bbe015ba57..eb424bb0da 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -21,15 +21,16 @@
#define avail_ALL(C) true
#define avail_64(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, ARCH) == \
CPUCFG1_ARCH_LA64)
-#define avail_FP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP))
-#define avail_FP_SP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_SP))
-#define avail_FP_DP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_DP))
-#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
-#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
-#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
-#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
-#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
-#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC))
+#define avail_FP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP))
+#define avail_FP_SP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_SP))
+#define avail_FP_DP(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FP_DP))
+#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
+#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
+#define avail_LAM_BH(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM_BH))
+#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
+#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
+#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
+#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC))
/*
* If an operation is being performed on less than TARGET_LONG_BITS,
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 3/7] target/loongarch: Add amcas[_db].{b/h/w/d}
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 1/7] target/loongarch: Require atomics to be aligned Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 2/7] target/loongarch: Add am{swap/add}[_db].{b/h} Jiajie Chen
@ 2025-11-19 12:24 ` Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
3 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:24 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
The new instructions are introduced in LoongArch v1.1:
- amcas.b
- amcas.h
- amcas.w
- amcas.d
- amcas_db.b
- amcas_db.h
- amcas_db.w
- amcas_db.d
The new instructions are gated by CPUCFG2.LAMCAS.
Signed-off-by: Jiajie Chen <c@jia.je>
---
target/loongarch/cpu.h | 1 +
target/loongarch/disas.c | 8 ++++++
target/loongarch/insns.decode | 8 ++++++
.../tcg/insn_trans/trans_atomic.c.inc | 25 +++++++++++++++++++
target/loongarch/translate.h | 1 +
5 files changed, 43 insertions(+)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index aa3d976875..9ca7af9b4a 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -139,6 +139,7 @@ FIELD(CPUCFG2, LSPW, 21, 1)
FIELD(CPUCFG2, LAM, 22, 1)
FIELD(CPUCFG2, HPTW, 24, 1)
FIELD(CPUCFG2, LAM_BH, 27, 1)
+FIELD(CPUCFG2, LAMCAS, 28, 1)
/* cpucfg[3] bits */
FIELD(CPUCFG3, CCDMA, 0, 1)
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 1a0f527cb1..66c0cae5a9 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -580,6 +580,14 @@ INSN(fldx_s, frr)
INSN(fldx_d, frr)
INSN(fstx_s, frr)
INSN(fstx_d, frr)
+INSN(amcas_b, rrr)
+INSN(amcas_h, rrr)
+INSN(amcas_w, rrr)
+INSN(amcas_d, rrr)
+INSN(amcas_db_b, rrr)
+INSN(amcas_db_h, rrr)
+INSN(amcas_db_w, rrr)
+INSN(amcas_db_d, rrr)
INSN(amswap_b, rrr)
INSN(amswap_h, rrr)
INSN(amadd_b, rrr)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 678ce42038..cf4123cd46 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -261,6 +261,14 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2
sc_w 0010 0001 .............. ..... ..... @rr_i14s2
ll_d 0010 0010 .............. ..... ..... @rr_i14s2
sc_d 0010 0011 .............. ..... ..... @rr_i14s2
+amcas_b 0011 10000101 10000 ..... ..... ..... @rrr
+amcas_h 0011 10000101 10001 ..... ..... ..... @rrr
+amcas_w 0011 10000101 10010 ..... ..... ..... @rrr
+amcas_d 0011 10000101 10011 ..... ..... ..... @rrr
+amcas_db_b 0011 10000101 10100 ..... ..... ..... @rrr
+amcas_db_h 0011 10000101 10101 ..... ..... ..... @rrr
+amcas_db_w 0011 10000101 10110 ..... ..... ..... @rrr
+amcas_db_d 0011 10000101 10111 ..... ..... ..... @rrr
amswap_b 0011 10000101 11000 ..... ..... ..... @rrr
amswap_h 0011 10000101 11001 ..... ..... ..... @rrr
amadd_b 0011 10000101 11010 ..... ..... ..... @rrr
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 0d837d08b6..1b2673b82d 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -45,6 +45,23 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
return true;
}
+static bool gen_cas(DisasContext *ctx, arg_rrr *a,
+ void (*func)(TCGv, TCGv, TCGv, TCGv, TCGArg, MemOp),
+ MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv val = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv old = gpr_src(ctx, a->rd, EXT_NONE);
+
+ addr = make_address_i(ctx, addr, 0);
+
+ func(dest, addr, old, val, ctx->mem_idx, mop | MO_ALIGN);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
static bool gen_am(DisasContext *ctx, arg_rrr *a,
void (*func)(TCGv, TCGv, TCGv, TCGArg, MemOp),
MemOp mop)
@@ -73,6 +90,14 @@ TRANS(ll_w, ALL, gen_ll, MO_TESL)
TRANS(sc_w, ALL, gen_sc, MO_TESL)
TRANS(ll_d, 64, gen_ll, MO_TEUQ)
TRANS(sc_d, 64, gen_sc, MO_TEUQ)
+TRANS(amcas_b, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_SB)
+TRANS(amcas_h, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESW)
+TRANS(amcas_w, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESL)
+TRANS(amcas_d, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TEUQ)
+TRANS(amcas_db_b, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_SB)
+TRANS(amcas_db_h, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESW)
+TRANS(amcas_db_w, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESL)
+TRANS(amcas_db_d, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TEUQ)
TRANS(amswap_b, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_SB)
TRANS(amswap_h, LAM_BH, gen_am, tcg_gen_atomic_xchg_tl, MO_TESW)
TRANS(amadd_b, LAM_BH, gen_am, tcg_gen_atomic_fetch_add_tl, MO_SB)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index eb424bb0da..9ba3b425c1 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -27,6 +27,7 @@
#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
#define avail_LAM_BH(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM_BH))
+#define avail_LAMCAS(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAMCAS))
#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
` (2 preceding siblings ...)
2025-11-19 12:24 ` [PATCH v2 3/7] target/loongarch: Add amcas[_db].{b/h/w/d} Jiajie Chen
@ 2025-11-19 12:30 ` Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions Jiajie Chen
` (2 more replies)
3 siblings, 3 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:30 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Add the following new instructions in LoongArch v1.1:
- frecipe.s
- frecipe.d
- frsqrte.s
- frsqrte.d
- vfrecipe.s
- vfrecipe.d
- vfrsqrte.s
- vfrsqrte.d
- xvfrecipe.s
- xvfrecipe.d
- xvfrsqrte.s
- xvfrsqrte.d
They are guarded by CPUCFG2.FRECIPE. Altought the instructions allow
implementation to improve performance by reducing precision, we use the
existing softfloat implementation.
Signed-off-by: Jiajie Chen <c@jia.je>
Acked-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/cpu.h | 1 +
target/loongarch/disas.c | 12 ++++++++++++
target/loongarch/insns.decode | 12 ++++++++++++
target/loongarch/tcg/insn_trans/trans_farith.c.inc | 4 ++++
target/loongarch/tcg/insn_trans/trans_vec.c.inc | 8 ++++++++
target/loongarch/translate.h | 6 ++++++
6 files changed, 43 insertions(+)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 9ca7af9b4a..740e474d79 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -138,6 +138,7 @@ FIELD(CPUCFG2, LBT_ALL, 18, 3)
FIELD(CPUCFG2, LSPW, 21, 1)
FIELD(CPUCFG2, LAM, 22, 1)
FIELD(CPUCFG2, HPTW, 24, 1)
+FIELD(CPUCFG2, FRECIPE, 25, 1)
FIELD(CPUCFG2, LAM_BH, 27, 1)
FIELD(CPUCFG2, LAMCAS, 28, 1)
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 66c0cae5a9..e5e1b37ce0 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -478,6 +478,10 @@ INSN(frecip_s, ff)
INSN(frecip_d, ff)
INSN(frsqrt_s, ff)
INSN(frsqrt_d, ff)
+INSN(frecipe_s, ff)
+INSN(frecipe_d, ff)
+INSN(frsqrte_s, ff)
+INSN(frsqrte_d, ff)
INSN(fmov_s, ff)
INSN(fmov_d, ff)
INSN(movgr2fr_w, fr)
@@ -1429,6 +1433,10 @@ INSN_LSX(vfrecip_s, vv)
INSN_LSX(vfrecip_d, vv)
INSN_LSX(vfrsqrt_s, vv)
INSN_LSX(vfrsqrt_d, vv)
+INSN_LSX(vfrecipe_s, vv)
+INSN_LSX(vfrecipe_d, vv)
+INSN_LSX(vfrsqrte_s, vv)
+INSN_LSX(vfrsqrte_d, vv)
INSN_LSX(vfcvtl_s_h, vv)
INSN_LSX(vfcvth_s_h, vv)
@@ -2343,6 +2351,10 @@ INSN_LASX(xvfrecip_s, vv)
INSN_LASX(xvfrecip_d, vv)
INSN_LASX(xvfrsqrt_s, vv)
INSN_LASX(xvfrsqrt_d, vv)
+INSN_LASX(xvfrecipe_s, vv)
+INSN_LASX(xvfrecipe_d, vv)
+INSN_LASX(xvfrsqrte_s, vv)
+INSN_LASX(xvfrsqrte_d, vv)
INSN_LASX(xvfcvtl_s_h, vv)
INSN_LASX(xvfcvth_s_h, vv)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index cf4123cd46..92078f0f9f 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -371,6 +371,10 @@ frecip_s 0000 00010001 01000 10101 ..... ..... @ff
frecip_d 0000 00010001 01000 10110 ..... ..... @ff
frsqrt_s 0000 00010001 01000 11001 ..... ..... @ff
frsqrt_d 0000 00010001 01000 11010 ..... ..... @ff
+frecipe_s 0000 00010001 01000 11101 ..... ..... @ff
+frecipe_d 0000 00010001 01000 11110 ..... ..... @ff
+frsqrte_s 0000 00010001 01001 00001 ..... ..... @ff
+frsqrte_d 0000 00010001 01001 00010 ..... ..... @ff
fscaleb_s 0000 00010001 00001 ..... ..... ..... @fff
fscaleb_d 0000 00010001 00010 ..... ..... ..... @fff
flogb_s 0000 00010001 01000 01001 ..... ..... @ff
@@ -1115,6 +1119,10 @@ vfrecip_s 0111 00101001 11001 11101 ..... ..... @vv
vfrecip_d 0111 00101001 11001 11110 ..... ..... @vv
vfrsqrt_s 0111 00101001 11010 00001 ..... ..... @vv
vfrsqrt_d 0111 00101001 11010 00010 ..... ..... @vv
+vfrecipe_s 0111 00101001 11010 00101 ..... ..... @vv
+vfrecipe_d 0111 00101001 11010 00110 ..... ..... @vv
+vfrsqrte_s 0111 00101001 11010 01001 ..... ..... @vv
+vfrsqrte_d 0111 00101001 11010 01010 ..... ..... @vv
vfcvtl_s_h 0111 00101001 11011 11010 ..... ..... @vv
vfcvth_s_h 0111 00101001 11011 11011 ..... ..... @vv
@@ -1879,6 +1887,10 @@ xvfrecip_s 0111 01101001 11001 11101 ..... ..... @vv
xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv
xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv
xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv
+xvfrecipe_s 0111 01101001 11010 00101 ..... ..... @vv
+xvfrecipe_d 0111 01101001 11010 00110 ..... ..... @vv
+xvfrsqrte_s 0111 01101001 11010 01001 ..... ..... @vv
+xvfrsqrte_d 0111 01101001 11010 01010 ..... ..... @vv
xvfcvtl_s_h 0111 01101001 11011 11010 ..... ..... @vv
xvfcvth_s_h 0111 01101001 11011 11011 ..... ..... @vv
diff --git a/target/loongarch/tcg/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc
index ff6cf3448e..eed6ab7312 100644
--- a/target/loongarch/tcg/insn_trans/trans_farith.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_farith.c.inc
@@ -191,6 +191,10 @@ TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s)
TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d)
TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s)
TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d)
+TRANS(frecipe_s, FRECIPE_FP_SP, gen_ff, gen_helper_frecip_s)
+TRANS(frecipe_d, FRECIPE_FP_DP, gen_ff, gen_helper_frecip_d)
+TRANS(frsqrte_s, FRECIPE_FP_SP, gen_ff, gen_helper_frsqrt_s)
+TRANS(frsqrte_d, FRECIPE_FP_DP, gen_ff, gen_helper_frsqrt_d)
TRANS64(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s)
TRANS64(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d)
TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s)
diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc
index 38bccf2838..ef57abe408 100644
--- a/target/loongarch/tcg/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc
@@ -4407,12 +4407,20 @@ TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
+TRANS(vfrecipe_s, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrecip_s)
+TRANS(vfrecipe_d, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrecip_d)
+TRANS(vfrsqrte_s, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
+TRANS(vfrsqrte_d, FRECIPE_LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
+TRANS(xvfrecipe_s, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrecip_s)
+TRANS(xvfrecipe_d, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrecip_d)
+TRANS(xvfrsqrte_s, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
+TRANS(xvfrsqrte_d, FRECIPE_LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 9ba3b425c1..331f79c8f2 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -33,6 +33,12 @@
#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC))
+#define avail_FRECIPE(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, FRECIPE))
+#define avail_FRECIPE_FP_SP(C) (avail_FRECIPE(C) && avail_FP_SP(C))
+#define avail_FRECIPE_FP_DP(C) (avail_FRECIPE(C) && avail_FP_DP(C))
+#define avail_FRECIPE_LSX(C) (avail_FRECIPE(C) && avail_LSX(C))
+#define avail_FRECIPE_LASX(C) (avail_FRECIPE(C) && avail_LASX(C))
+
/*
* If an operation is being performed on less than TARGET_LONG_BITS,
* it may require the inputs to be sign- or zero-extended; which will
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
@ 2025-11-19 12:30 ` Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 6/7] target/loongarch: Add sc.q instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 7/7] target/loongarch: Add LA v1.1 instructions to max cpu Jiajie Chen
2 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:30 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Add the following instructions in LoongArch v1.1:
- llacq.w
- screl.w
- llacq.d
- screl.d
They are guarded by CPUCFG2.LLACQ_SCREL.
Signed-off-by: Jiajie Chen <c@jia.je>
Co-developed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/cpu.h | 1 +
target/loongarch/disas.c | 4 ++++
target/loongarch/insns.decode | 5 ++++
.../tcg/insn_trans/trans_atomic.c.inc | 24 ++++++++++++++-----
target/loongarch/translate.h | 3 +++
5 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 740e474d79..5cab02ad6f 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -141,6 +141,7 @@ FIELD(CPUCFG2, HPTW, 24, 1)
FIELD(CPUCFG2, FRECIPE, 25, 1)
FIELD(CPUCFG2, LAM_BH, 27, 1)
FIELD(CPUCFG2, LAMCAS, 28, 1)
+FIELD(CPUCFG2, LLACQ_SCREL, 29, 1)
/* cpucfg[3] bits */
FIELD(CPUCFG3, CCDMA, 0, 1)
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e5e1b37ce0..3164fade9b 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -584,6 +584,10 @@ INSN(fldx_s, frr)
INSN(fldx_d, frr)
INSN(fstx_s, frr)
INSN(fstx_d, frr)
+INSN(llacq_w, rr_i)
+INSN(screl_w, rr_i)
+INSN(llacq_d, rr_i)
+INSN(screl_d, rr_i)
INSN(amcas_b, rrr)
INSN(amcas_h, rrr)
INSN(amcas_w, rrr)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 92078f0f9f..7898f5f719 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -69,6 +69,7 @@
@rr_i14s2 .... .... .............. rj:5 rd:5 &rr_i imm=%i14s2
@rr_i16 .... .. imm:s16 rj:5 rd:5 &rr_i
@rr_i16s2 .... .. ................ rj:5 rd:5 &rr_i imm=%offs16
+@rr_i0 .... .. ................ rj:5 rd:5 &rr_i imm=0
@hint_r_i12 .... ...... imm:s12 rj:5 hint:5 &hint_r_i
@hint_rr .... ........ ..... rk:5 rj:5 hint:5 &hint_rr
@rrr_sa2p1 .... ........ ... .. rk:5 rj:5 rd:5 &rrr_sa sa=%sa2p1
@@ -261,6 +262,10 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2
sc_w 0010 0001 .............. ..... ..... @rr_i14s2
ll_d 0010 0010 .............. ..... ..... @rr_i14s2
sc_d 0010 0011 .............. ..... ..... @rr_i14s2
+llacq_w 0011 10000101 01111 00000 ..... ..... @rr_i0
+screl_w 0011 10000101 01111 00001 ..... ..... @rr_i0
+llacq_d 0011 10000101 01111 00010 ..... ..... @rr_i0
+screl_d 0011 10000101 01111 00011 ..... ..... @rr_i0
amcas_b 0011 10000101 10000 ..... ..... ..... @rrr
amcas_h 0011 10000101 10001 ..... ..... ..... @rrr
amcas_w 0011 10000101 10010 ..... ..... ..... @rrr
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index 1b2673b82d..c9a6dcfdeb 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -3,7 +3,7 @@
* Copyright (c) 2021 Loongson Technology Corporation Limited
*/
-static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool acq)
{
TCGv t1 = tcg_temp_new();
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
@@ -14,10 +14,14 @@ static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop)
tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval));
gen_set_gpr(a->rd, t1, EXT_NONE);
+ if (acq) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
+
return true;
}
-static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel)
{
TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
@@ -29,6 +33,10 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
TCGLabel *done = gen_new_label();
tcg_gen_addi_tl(t0, src1, a->imm);
+
+ if (rel) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1);
tcg_gen_movi_tl(dest, 0);
tcg_gen_br(done);
@@ -86,10 +94,14 @@ static bool gen_am(DisasContext *ctx, arg_rrr *a,
return true;
}
-TRANS(ll_w, ALL, gen_ll, MO_TESL)
-TRANS(sc_w, ALL, gen_sc, MO_TESL)
-TRANS(ll_d, 64, gen_ll, MO_TEUQ)
-TRANS(sc_d, 64, gen_sc, MO_TEUQ)
+TRANS(ll_w, ALL, gen_ll, MO_TESL, false)
+TRANS(sc_w, ALL, gen_sc, MO_TESL, false)
+TRANS(ll_d, 64, gen_ll, MO_TEUQ, false)
+TRANS(sc_d, 64, gen_sc, MO_TEUQ, false)
+TRANS(llacq_w, LLACQ_SCREL, gen_ll, MO_TESL, true)
+TRANS(screl_w, LLACQ_SCREL, gen_sc, MO_TESL, true)
+TRANS(llacq_d, LLACQ_SCREL_64, gen_ll, MO_TEUQ, true)
+TRANS(screl_d, LLACQ_SCREL_64, gen_sc, MO_TEUQ, true)
TRANS(amcas_b, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_SB)
TRANS(amcas_h, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESW)
TRANS(amcas_w, LAMCAS, gen_cas, tcg_gen_atomic_cmpxchg_tl, MO_TESL)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 331f79c8f2..76bceedf98 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -39,6 +39,9 @@
#define avail_FRECIPE_LSX(C) (avail_FRECIPE(C) && avail_LSX(C))
#define avail_FRECIPE_LASX(C) (avail_FRECIPE(C) && avail_LASX(C))
+#define avail_LLACQ_SCREL(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LLACQ_SCREL))
+#define avail_LLACQ_SCREL_64(C) (avail_64(C) && avail_LLACQ_SCREL(C))
+
/*
* If an operation is being performed on less than TARGET_LONG_BITS,
* it may require the inputs to be sign- or zero-extended; which will
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 6/7] target/loongarch: Add sc.q instructions
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions Jiajie Chen
@ 2025-11-19 12:30 ` Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 7/7] target/loongarch: Add LA v1.1 instructions to max cpu Jiajie Chen
2 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:30 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Add the sc.q instruction in LoongArch v1.1, guarded by CPUCFG2.SCQ. It
is implemented by reading 128bit data (llval + llval_high) in ll.d when
aligned to 16B boundary, and cmpxchg 128bit in sc.q. If ld.d
matches the higher part of the 128bit, its data is taken from
llval_high.
Expected assembly sequence:
ll.d lo, base, 0
ld.d hi, base, 8
sc.q lo, hi, base
Signed-off-by: Jiajie Chen <c@jia.je>
---
target/loongarch/cpu.h | 3 +
target/loongarch/disas.c | 1 +
target/loongarch/insns.decode | 1 +
.../tcg/insn_trans/trans_atomic.c.inc | 82 +++++++++++++++++++
.../tcg/insn_trans/trans_memory.c.inc | 22 +++++
target/loongarch/tcg/translate.c | 6 +-
target/loongarch/translate.h | 1 +
7 files changed, 115 insertions(+), 1 deletion(-)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 5cab02ad6f..0a89c06b01 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -142,6 +142,7 @@ FIELD(CPUCFG2, FRECIPE, 25, 1)
FIELD(CPUCFG2, LAM_BH, 27, 1)
FIELD(CPUCFG2, LAMCAS, 28, 1)
FIELD(CPUCFG2, LLACQ_SCREL, 29, 1)
+FIELD(CPUCFG2, SCQ, 30, 1)
/* cpucfg[3] bits */
FIELD(CPUCFG3, CCDMA, 0, 1)
@@ -377,6 +378,8 @@ typedef struct CPUArchState {
uint32_t fcsr0_mask;
uint64_t lladdr; /* LL virtual address compared against SC */
uint64_t llval;
+ uint64_t llval_high; /* For 128-bit atomic SC.Q */
+ uint64_t llbit_scq; /* Potential LL.D+LD.D+SC.Q sequence in effect */
#endif
#ifndef CONFIG_USER_ONLY
#ifdef CONFIG_TCG
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 3164fade9b..3249ab7ac6 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -584,6 +584,7 @@ INSN(fldx_s, frr)
INSN(fldx_d, frr)
INSN(fstx_s, frr)
INSN(fstx_d, frr)
+INSN(sc_q, rrr)
INSN(llacq_w, rr_i)
INSN(screl_w, rr_i)
INSN(llacq_d, rr_i)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7898f5f719..3089d42044 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -262,6 +262,7 @@ ll_w 0010 0000 .............. ..... ..... @rr_i14s2
sc_w 0010 0001 .............. ..... ..... @rr_i14s2
ll_d 0010 0010 .............. ..... ..... @rr_i14s2
sc_d 0010 0011 .............. ..... ..... @rr_i14s2
+sc_q 0011 10000101 01110 ..... ..... ..... @rrr
llacq_w 0011 10000101 01111 00000 ..... ..... @rr_i0
screl_w 0011 10000101 01111 00001 ..... ..... @rr_i0
llacq_d 0011 10000101 01111 00010 ..... ..... @rr_i0
diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
index c9a6dcfdeb..565daa7219 100644
--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc
@@ -6,14 +6,48 @@
static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool acq)
{
TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
TCGv t0 = make_address_i(ctx, src1, a->imm);
+ TCGv_i128 t16 = tcg_temp_new_i128();
+ TCGv mask = tcg_constant_tl(0xf);
+ TCGv one = tcg_constant_tl(1);
+ TCGv zero = tcg_constant_tl(0);
+ TCGLabel *l1 = gen_new_label();
+ TCGLabel *done = gen_new_label();
+
+ if (avail_SCQ(ctx) && mop == MO_TEUQ) {
+ /*
+ * The LL.D+LD.D may be paired with SC.Q,
+ * load 128-bit if aligned: (t0 & 0xf) == 0
+ */
+ tcg_gen_and_tl(t1, t0, mask);
+ tcg_gen_brcond_tl(TCG_COND_EQ, t1, zero, l1);
+ /* fallthrough if not aligned to 16B */
+ }
tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop | MO_ALIGN);
tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr));
tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval));
gen_set_gpr(a->rd, t1, EXT_NONE);
+ if (avail_SCQ(ctx) && mop == MO_TEUQ) {
+ tcg_gen_br(done);
+
+ gen_set_label(l1);
+
+ /* Load 16B data and save into llval/llval_high */
+ tcg_gen_qemu_ld_i128(t16, t0, ctx->mem_idx, MO_128 | MO_ALIGN);
+ tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr));
+ tcg_gen_extr_i128_i64(t1, t2, t16);
+ tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval));
+ tcg_gen_st_tl(t2, tcg_env, offsetof(CPULoongArchState, llval_high));
+ tcg_gen_st_tl(one, tcg_env, offsetof(CPULoongArchState, llbit_scq));
+ gen_set_gpr(a->rd, t1, EXT_NONE);
+
+ gen_set_label(done);
+ }
+
if (acq) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
@@ -28,6 +62,7 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel)
TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE);
TCGv t0 = tcg_temp_new();
TCGv val = tcg_temp_new();
+ TCGv zero = tcg_constant_tl(0);
TCGLabel *l1 = gen_new_label();
TCGLabel *done = gen_new_label();
@@ -37,6 +72,11 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel)
if (rel) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
+
+ if (avail_SCQ(ctx)) {
+ tcg_gen_st_tl(zero, tcg_env, offsetof(CPULoongArchState, llbit_scq));
+ }
+
tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1);
tcg_gen_movi_tl(dest, 0);
tcg_gen_br(done);
@@ -53,6 +93,47 @@ static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop, bool rel)
return true;
}
+static bool gen_sc_q(DisasContext *ctx, arg_rrr *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv src3 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv_i128 t16 = tcg_temp_new_i128();
+ TCGv_i128 c16 = tcg_temp_new_i128();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv zero = tcg_constant_tl(0);
+
+ TCGLabel *l1 = gen_new_label();
+ TCGLabel *done = gen_new_label();
+
+ tcg_gen_st_tl(zero, tcg_env, offsetof(CPULoongArchState, llbit_scq));
+ tcg_gen_brcond_tl(TCG_COND_EQ, src1, cpu_lladdr, l1);
+ tcg_gen_movi_tl(dest, 0);
+ tcg_gen_br(done);
+
+ gen_set_label(l1);
+ tcg_gen_concat_i64_i128(t16, src2, src3);
+ tcg_gen_concat_i64_i128(c16, cpu_llval,
+ cpu_llval_high);
+
+ /* generate cmpxchg */
+ tcg_gen_atomic_cmpxchg_i128(t16, cpu_lladdr, c16,
+ t16, ctx->mem_idx, mop | MO_ALIGN);
+
+ /* check if success */
+ tcg_gen_extr_i128_i64(t1, t2, t16);
+ tcg_gen_xor_i64(t1, t1, cpu_llval);
+ tcg_gen_xor_i64(t2, t2, cpu_llval_high);
+ tcg_gen_or_i64(t1, t1, t2);
+ tcg_gen_setcondi_i64(TCG_COND_EQ, dest, t1, 0);
+ gen_set_label(done);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
static bool gen_cas(DisasContext *ctx, arg_rrr *a,
void (*func)(TCGv, TCGv, TCGv, TCGv, TCGArg, MemOp),
MemOp mop)
@@ -98,6 +179,7 @@ TRANS(ll_w, ALL, gen_ll, MO_TESL, false)
TRANS(sc_w, ALL, gen_sc, MO_TESL, false)
TRANS(ll_d, 64, gen_ll, MO_TEUQ, false)
TRANS(sc_d, 64, gen_sc, MO_TEUQ, false)
+TRANS(sc_q, 64, gen_sc_q, MO_128)
TRANS(llacq_w, LLACQ_SCREL, gen_ll, MO_TESL, true)
TRANS(screl_w, LLACQ_SCREL, gen_sc, MO_TESL, true)
TRANS(llacq_d, LLACQ_SCREL_64, gen_ll, MO_TEUQ, true)
diff --git a/target/loongarch/tcg/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
index 42f4e74012..8b3c1b037c 100644
--- a/target/loongarch/tcg/insn_trans/trans_memory.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
@@ -7,11 +7,33 @@ static bool gen_load(DisasContext *ctx, arg_rr_i *a, MemOp mop)
{
TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv t1 = tcg_temp_new();
+ TCGv mask = tcg_constant_tl(0x8);
+ TCGv zero = tcg_constant_tl(0);
+ TCGLabel *done = gen_new_label();
+ TCGLabel *l1 = gen_new_label();
addr = make_address_i(ctx, addr, a->imm);
+ if (avail_SCQ(ctx) && mop == MO_TEUQ) {
+ /*
+ * The LL.D+LD.D may be paired with SC.Q,
+ * use llval_high if llbit_scq && (addr == lladdr ^ 0x8)
+ */
+ tcg_gen_brcond_tl(TCG_COND_EQ, cpu_llbit_scq, zero, l1);
+ tcg_gen_xor_tl(t1, addr, mask);
+ tcg_gen_brcond_tl(TCG_COND_NE, cpu_lladdr, t1, l1);
+ gen_set_gpr(a->rd, cpu_llval_high, EXT_NONE);
+ tcg_gen_br(done);
+ gen_set_label(l1);
+ }
+
tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ if (avail_SCQ(ctx) && mop == MO_TEUQ) {
+ gen_set_label(done);
+ }
return true;
}
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
index 055f6fb604..2023f892be 100644
--- a/target/loongarch/tcg/translate.c
+++ b/target/loongarch/tcg/translate.c
@@ -24,7 +24,7 @@
/* Global register indices */
TCGv cpu_gpr[32], cpu_pc;
-static TCGv cpu_lladdr, cpu_llval;
+static TCGv cpu_lladdr, cpu_llval, cpu_llval_high, cpu_llbit_scq;
#define HELPER_H "helper.h"
#include "exec/helper-info.c.inc"
@@ -360,6 +360,10 @@ void loongarch_translate_init(void)
offsetof(CPULoongArchState, lladdr), "lladdr");
cpu_llval = tcg_global_mem_new(tcg_env,
offsetof(CPULoongArchState, llval), "llval");
+ cpu_llval_high = tcg_global_mem_new(tcg_env,
+ offsetof(CPULoongArchState, llval_high), "llval_high");
+ cpu_llbit_scq = tcg_global_mem_new(tcg_env,
+ offsetof(CPULoongArchState, llbit_scq), "llbit_scq");
#ifndef CONFIG_USER_ONLY
loongarch_csr_translate_init();
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 76bceedf98..ba1c89e57b 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -30,6 +30,7 @@
#define avail_LAMCAS(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAMCAS))
#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
+#define avail_SCQ(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, SCQ))
#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
#define avail_CRC(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, CRC))
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 7/7] target/loongarch: Add LA v1.1 instructions to max cpu
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 6/7] target/loongarch: Add sc.q instructions Jiajie Chen
@ 2025-11-19 12:30 ` Jiajie Chen
2 siblings, 0 replies; 8+ messages in thread
From: Jiajie Chen @ 2025-11-19 12:30 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, gaosong, git, Jiajie Chen
Add LA v1.1 new instructinos to max cpu by enabling new features in
CPUCFG2.
Signed-off-by: Jiajie Chen <c@jia.je>
---
target/loongarch/cpu.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index d74c3c3766..2c357f9342 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -413,7 +413,16 @@ static void loongarch_max_initfn(Object *obj)
if (tcg_enabled()) {
cpu->env.cpucfg[1] = FIELD_DP32(cpu->env.cpucfg[1], CPUCFG1, MSG_INT, 1);
cpu->msgint = ON_OFF_AUTO_AUTO;
- cpu->env.cpucfg[2] = FIELD_DP32(cpu->env.cpucfg[2], CPUCFG2, HPTW, 1);
+
+ uint32_t data = cpu->env.cpucfg[2];
+ data = FIELD_DP32(data, CPUCFG2, HPTW, 1);
+ /* Enable LA v1.1 instructions */
+ data = FIELD_DP32(data, CPUCFG2, FRECIPE, 1);
+ data = FIELD_DP32(data, CPUCFG2, LAM_BH, 1);
+ data = FIELD_DP32(data, CPUCFG2, LAMCAS, 1);
+ data = FIELD_DP32(data, CPUCFG2, LLACQ_SCREL, 1);
+ data = FIELD_DP32(data, CPUCFG2, SCQ, 1);
+ cpu->env.cpucfg[2] = data;
}
}
--
2.51.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2025-11-19 12:33 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-19 12:24 [PATCH v2 0/7] Add LoongArch v1.1 instructions Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 1/7] target/loongarch: Require atomics to be aligned Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 2/7] target/loongarch: Add am{swap/add}[_db].{b/h} Jiajie Chen
2025-11-19 12:24 ` [PATCH v2 3/7] target/loongarch: Add amcas[_db].{b/h/w/d} Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 4/7] target/loongarch: Add estimated reciprocal instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 5/7] target/loongarch: Add llacq/screl instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 6/7] target/loongarch: Add sc.q instructions Jiajie Chen
2025-11-19 12:30 ` [PATCH v2 7/7] target/loongarch: Add LA v1.1 instructions to max cpu Jiajie Chen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).