From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: redha.gouicem@gmail.com, qemu-arm@nongnu.org, qemu-ppc@nongnu.org
Subject: [PATCH] tcg: Special case split barriers before/after load
Date: Sat, 30 Apr 2022 16:45:34 -0700 [thread overview]
Message-ID: <20220430234534.446733-1-richard.henderson@linaro.org> (raw)
When st:ld is not required by the guest but ld:st is, we can
put ld:ld+ld:st barriers after loads, and then st:st barriers
before stores to enforce all required barriers.
The st:st barrier is often special cased by hosts, and that
is expected to be more efficient than a full barrier.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
Redha, I expect this to produce exactly the same barriers as you
did with your 'fix guest memory ordering enforcement' patch.
While this compiles, it does not fix the failures that I see
occasionally with our private gitlab runner. The standalone
version of this failure is
export QTEST_QEMU_BINARY=./qemu-system-i386
for i in `seq 1 100`; do
./tests/qtest/ahci-test > /dev/null &
done
wait
About 10 to 15% of the runs will fail with
ERROR:../src/tests/qtest/ahci-test.c:92:verify_state: assertion failed (ahci_fingerprint == ahci->fingerprint): (0xe0000000 == 0x29228086)
Note that this test never seems to fail unless the system is under
load, thus starting 100 tests on my 80 core neoverse-n1 system.
r~
---
tcg/tcg-op.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 48 insertions(+), 7 deletions(-)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 5d48537927..4c568a2592 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2834,9 +2834,6 @@ static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
static void tcg_gen_req_mo(TCGBar type)
{
-#ifdef TCG_GUEST_DEFAULT_MO
- type &= TCG_GUEST_DEFAULT_MO;
-#endif
type &= ~TCG_TARGET_DEFAULT_MO;
if (type) {
tcg_gen_mb(type | TCG_BAR_SC);
@@ -2868,12 +2865,49 @@ static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
#endif
}
+typedef enum {
+ BAR_LD_BEFORE,
+ BAR_LD_AFTER,
+ BAR_ST_BEFORE,
+} ChooseBarrier;
+
+static TCGBar choose_barrier(ChooseBarrier which)
+{
+#ifdef TCG_GUEST_DEFAULT_MO
+ const TCGBar guest_mo = TCG_GUEST_DEFAULT_MO;
+#else
+ const TCGBar guest_mo = TCG_MO_ALL;
+#endif
+ TCGBar ret[3];
+
+ if (guest_mo == 0) {
+ return 0;
+ }
+ /*
+ * Special case for i386 and s390x. Because store-load is not
+ * required by the guest, we can split the barriers such that we
+ * wind up with a store-store barrier, which is expected to be
+ * quicker on some hosts.
+ */
+ if (guest_mo == (TCG_MO_ALL & ~TCG_MO_ST_LD)) {
+ ret[BAR_LD_BEFORE] = 0;
+ ret[BAR_LD_AFTER] = TCG_MO_LD_LD | TCG_MO_LD_ST;
+ ret[BAR_ST_BEFORE] = TCG_MO_ST_ST;
+ } else {
+ ret[BAR_LD_BEFORE] = (TCG_MO_LD_LD | TCG_MO_ST_LD) & guest_mo;
+ ret[BAR_ST_BEFORE] = (TCG_MO_LD_ST | TCG_MO_ST_ST) & guest_mo;
+ ret[BAR_LD_AFTER] = 0;
+ }
+ return ret[which];
+}
+
void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp orig_memop;
MemOpIdx oi;
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ tcg_gen_req_mo(choose_barrier(BAR_LD_BEFORE));
+
memop = tcg_canonicalize_memop(memop, 0, 0);
oi = make_memop_idx(memop, idx);
@@ -2904,6 +2938,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
g_assert_not_reached();
}
}
+
+ tcg_gen_req_mo(choose_barrier(BAR_LD_AFTER));
}
void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
@@ -2911,7 +2947,8 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
TCGv_i32 swap = NULL;
MemOpIdx oi;
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ tcg_gen_req_mo(choose_barrier(BAR_ST_BEFORE));
+
memop = tcg_canonicalize_memop(memop, 0, 1);
oi = make_memop_idx(memop, idx);
@@ -2959,7 +2996,8 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
return;
}
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ tcg_gen_req_mo(choose_barrier(BAR_LD_BEFORE));
+
memop = tcg_canonicalize_memop(memop, 1, 0);
oi = make_memop_idx(memop, idx);
@@ -2994,6 +3032,8 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
g_assert_not_reached();
}
}
+
+ tcg_gen_req_mo(choose_barrier(BAR_LD_AFTER));
}
void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
@@ -3006,7 +3046,8 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
return;
}
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ tcg_gen_req_mo(choose_barrier(BAR_ST_BEFORE));
+
memop = tcg_canonicalize_memop(memop, 1, 1);
oi = make_memop_idx(memop, idx);
--
2.25.1
next reply other threads:[~2022-04-30 23:46 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-30 23:45 Richard Henderson [this message]
2022-05-30 15:10 ` [PATCH] tcg: Special case split barriers before/after load Philippe Mathieu-Daudé via
2022-06-07 14:01 ` Redha
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220430234534.446733-1-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
--cc=redha.gouicem@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).