* [PATCH 1/4] tcg: Add tcg_out_tb_start backend hook
2023-08-16 14:25 [PATCH 0/4] tcg/aarch64: Enable BTI within the JIT Richard Henderson
@ 2023-08-16 14:25 ` Richard Henderson
2023-08-16 21:38 ` Philippe Mathieu-Daudé
2023-08-16 14:25 ` [PATCH 2/4] util/cpuinfo-aarch64: Add CPUINFO_BTI Richard Henderson
` (3 subsequent siblings)
4 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2023-08-16 14:25 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, Jordan Niethe
This hook may emit code at the beginning of the TB.
Suggested-by: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tcg.c | 3 +++
tcg/aarch64/tcg-target.c.inc | 5 +++++
tcg/arm/tcg-target.c.inc | 5 +++++
tcg/i386/tcg-target.c.inc | 5 +++++
tcg/loongarch64/tcg-target.c.inc | 5 +++++
tcg/mips/tcg-target.c.inc | 5 +++++
tcg/ppc/tcg-target.c.inc | 5 +++++
tcg/riscv/tcg-target.c.inc | 5 +++++
tcg/s390x/tcg-target.c.inc | 5 +++++
tcg/sparc64/tcg-target.c.inc | 5 +++++
tcg/tci/tcg-target.c.inc | 5 +++++
11 files changed, 53 insertions(+)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index ddfe9a96cb..1e9b7433bb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -108,6 +108,7 @@ static void tcg_register_jit_int(const void *buf, size_t size,
__attribute__((unused));
/* Forward declarations for functions declared and used in tcg-target.c.inc. */
+static void tcg_out_tb_start(TCGContext *s);
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
intptr_t arg2);
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
@@ -6009,6 +6010,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
s->gen_insn_data =
tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
+ tcg_out_tb_start(s);
+
num_insns = -1;
QTAILQ_FOREACH(op, &s->ops, link) {
TCGOpcode opc = op->opc;
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 35ca80cd56..8d71ac68f6 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -3123,6 +3123,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_insn(s, 3207, RET, TCG_REG_LR);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
int i;
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 83e286088f..7473b23672 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -2951,6 +2951,11 @@ static void tcg_out_epilogue(TCGContext *s)
(1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
typedef struct {
DebugFrameHeader h;
uint8_t fde_def_cfa[4];
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index a6b2eae995..05581dd89d 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -4114,6 +4114,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc(s, OPC_RET, 0, 0, 0);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
memset(p, 0x90, count);
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index baf5fc3819..29281e954b 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1698,6 +1698,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static void tcg_target_init(TCGContext *s)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 9faa8bdf0b..b4da737dbb 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -2628,6 +2628,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static void tcg_target_init(TCGContext *s)
{
tcg_target_detect_isa();
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 511e14b180..a3255851fe 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2482,6 +2482,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out32(s, BCLR | BO_ALWAYS);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
{
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index eeaeb6b6e3..22834d087b 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -2054,6 +2054,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static volatile sig_atomic_t got_sigill;
static void sigill_handler(int signo, siginfo_t *si, void *data)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index a94f7908d6..379aaef1e5 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -3457,6 +3457,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
memset(p, 0x07, count * sizeof(tcg_insn_unit));
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index ffcb879211..6458ca202d 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -955,6 +955,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_movi_s13(s, TCG_REG_O0, 0);
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
int i;
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 253f27f174..95b3352f82 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -955,6 +955,11 @@ static inline void tcg_target_qemu_prologue(TCGContext *s)
{
}
+static void tcg_out_tb_start(TCGContext *s)
+{
+ /* nothing to do */
+}
+
bool tcg_target_has_memory_bswap(MemOp memop)
{
return true;
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 1/4] tcg: Add tcg_out_tb_start backend hook
2023-08-16 14:25 ` [PATCH 1/4] tcg: Add tcg_out_tb_start backend hook Richard Henderson
@ 2023-08-16 21:38 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 10+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-08-16 21:38 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: qemu-arm, Jordan Niethe
On 16/8/23 16:25, Richard Henderson wrote:
> This hook may emit code at the beginning of the TB.
>
> Suggested-by: Jordan Niethe <jniethe5@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/tcg.c | 3 +++
> tcg/aarch64/tcg-target.c.inc | 5 +++++
> tcg/arm/tcg-target.c.inc | 5 +++++
> tcg/i386/tcg-target.c.inc | 5 +++++
> tcg/loongarch64/tcg-target.c.inc | 5 +++++
> tcg/mips/tcg-target.c.inc | 5 +++++
> tcg/ppc/tcg-target.c.inc | 5 +++++
> tcg/riscv/tcg-target.c.inc | 5 +++++
> tcg/s390x/tcg-target.c.inc | 5 +++++
> tcg/sparc64/tcg-target.c.inc | 5 +++++
> tcg/tci/tcg-target.c.inc | 5 +++++
> 11 files changed, 53 insertions(+)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 2/4] util/cpuinfo-aarch64: Add CPUINFO_BTI
2023-08-16 14:25 [PATCH 0/4] tcg/aarch64: Enable BTI within the JIT Richard Henderson
2023-08-16 14:25 ` [PATCH 1/4] tcg: Add tcg_out_tb_start backend hook Richard Henderson
@ 2023-08-16 14:25 ` Richard Henderson
2023-08-16 21:37 ` Philippe Mathieu-Daudé
2023-08-16 14:25 ` [PATCH 3/4] tcg/aarch64: Emit BTI insns at jump landing pads Richard Henderson
` (2 subsequent siblings)
4 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2023-08-16 14:25 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
host/include/aarch64/host/cpuinfo.h | 1 +
util/cpuinfo-aarch64.c | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/host/include/aarch64/host/cpuinfo.h b/host/include/aarch64/host/cpuinfo.h
index 769626b098..a59c8418d2 100644
--- a/host/include/aarch64/host/cpuinfo.h
+++ b/host/include/aarch64/host/cpuinfo.h
@@ -10,6 +10,7 @@
#define CPUINFO_LSE (1u << 1)
#define CPUINFO_LSE2 (1u << 2)
#define CPUINFO_AES (1u << 3)
+#define CPUINFO_BTI (1u << 4)
/* Initialized with a constructor. */
extern unsigned cpuinfo;
diff --git a/util/cpuinfo-aarch64.c b/util/cpuinfo-aarch64.c
index ababc39550..97f5548a95 100644
--- a/util/cpuinfo-aarch64.c
+++ b/util/cpuinfo-aarch64.c
@@ -57,11 +57,15 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
info |= (hwcap & HWCAP_AES ? CPUINFO_AES: 0);
+
+ unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
+ info |= (hwcap2 & HWCAP2_BTI ? CPUINFO_BTI : 0);
#endif
#ifdef CONFIG_DARWIN
info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
info |= sysctl_for_bool("hw.optional.arm.FEAT_AES") * CPUINFO_AES;
+ info |= sysctl_for_bool("hw.optional.arm.FEAT_BTI") * CPUINFO_BTI;
#endif
cpuinfo = info;
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/4] tcg/aarch64: Emit BTI insns at jump landing pads
2023-08-16 14:25 [PATCH 0/4] tcg/aarch64: Enable BTI within the JIT Richard Henderson
2023-08-16 14:25 ` [PATCH 1/4] tcg: Add tcg_out_tb_start backend hook Richard Henderson
2023-08-16 14:25 ` [PATCH 2/4] util/cpuinfo-aarch64: Add CPUINFO_BTI Richard Henderson
@ 2023-08-16 14:25 ` Richard Henderson
2023-09-12 16:23 ` Peter Maydell
2023-08-16 14:25 ` [PATCH 4/4] tcg: Map code_gen_buffer with PROT_BTI Richard Henderson
2023-09-09 20:50 ` [PATCH 0/4] tcg/aarch64: Enable BTI within the JIT Richard Henderson
4 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2023-08-16 14:25 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm
The prologue is entered via "call"; the epilogue, each tb,
and each goto_tb continuation point are all reached via "jump".
As tcg_out_goto_long is only used by tcg_out_exit_tb, merge
the two functions. Change the indirect register used to
TCG_REG_TMP1, aka X16, so that the BTI condition created
is "jump" instead of "jump or call".
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/aarch64/tcg-target.c.inc | 49 +++++++++++++++++++++++++-----------
1 file changed, 34 insertions(+), 15 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 8d71ac68f6..fca5baea57 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -602,6 +602,10 @@ typedef enum {
DMB_ISH = 0xd50338bf,
DMB_LD = 0x00000100,
DMB_ST = 0x00000200,
+
+ BTI_C = 0xd503245f,
+ BTI_J = 0xd503249f,
+ BTI_JC = 0xd50324df,
} AArch64Insn;
static inline uint32_t tcg_in32(TCGContext *s)
@@ -843,6 +847,17 @@ static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
| rn << 5 | (rd & 0x1f));
}
+static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
+{
+ /*
+ * While BTI insns are nops on hosts without FEAT_BTI,
+ * there is no point in emitting them in that case either.
+ */
+ if (cpuinfo & CPUINFO_BTI) {
+ tcg_out32(s, insn);
+ }
+}
+
/* Register to register move using ORR (shifted register with no shift). */
static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
{
@@ -1351,18 +1366,6 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
tcg_out_insn(s, 3206, B, offset);
}
-static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
-{
- ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
- if (offset == sextract64(offset, 0, 26)) {
- tcg_out_insn(s, 3206, B, offset);
- } else {
- /* Choose X9 as a call-clobbered non-LR temporary. */
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
- tcg_out_insn(s, 3207, BR, TCG_REG_X9);
- }
-}
-
static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
{
ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
@@ -1947,12 +1950,23 @@ static const tcg_insn_unit *tb_ret_addr;
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
{
+ const tcg_insn_unit *target;
+ ptrdiff_t offset;
+
/* Reuse the zeroing that exists for goto_ptr. */
if (a0 == 0) {
- tcg_out_goto_long(s, tcg_code_gen_epilogue);
+ target = tcg_code_gen_epilogue;
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
- tcg_out_goto_long(s, tb_ret_addr);
+ target = tb_ret_addr;
+ }
+
+ offset = tcg_pcrel_diff(s, target) >> 2;
+ if (offset == sextract64(offset, 0, 26)) {
+ tcg_out_insn(s, 3206, B, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP1, (intptr_t)target);
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP1);
}
}
@@ -1970,6 +1984,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
tcg_out32(s, I3206_B);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
set_jmp_reset_offset(s, which);
+ tcg_out_bti(s, BTI_J);
}
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
@@ -3062,6 +3077,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
{
TCGReg r;
+ tcg_out_bti(s, BTI_C);
+
/* Push (FP, LR) and allocate space for all saved registers. */
tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
TCG_REG_SP, -PUSH_SIZE, 1, 1);
@@ -3102,10 +3119,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
* and fall through to the rest of the epilogue.
*/
tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_bti(s, BTI_J);
tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
/* TB epilogue */
tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_bti(s, BTI_J);
/* Remove TCG locals stack space. */
tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
@@ -3125,7 +3144,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
static void tcg_out_tb_start(TCGContext *s)
{
- /* nothing to do */
+ tcg_out_bti(s, BTI_J);
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 3/4] tcg/aarch64: Emit BTI insns at jump landing pads
2023-08-16 14:25 ` [PATCH 3/4] tcg/aarch64: Emit BTI insns at jump landing pads Richard Henderson
@ 2023-09-12 16:23 ` Peter Maydell
0 siblings, 0 replies; 10+ messages in thread
From: Peter Maydell @ 2023-09-12 16:23 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, qemu-arm
On Wed, 16 Aug 2023 at 15:27, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The prologue is entered via "call"; the epilogue, each tb,
> and each goto_tb continuation point are all reached via "jump".
>
> As tcg_out_goto_long is only used by tcg_out_exit_tb, merge
> the two functions. Change the indirect register used to
> TCG_REG_TMP1, aka X16, so that the BTI condition created
> is "jump" instead of "jump or call".
TCG_REG_TMP1 is X17, not X16...
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
> {
> + const tcg_insn_unit *target;
> + ptrdiff_t offset;
> +
> /* Reuse the zeroing that exists for goto_ptr. */
> if (a0 == 0) {
> - tcg_out_goto_long(s, tcg_code_gen_epilogue);
> + target = tcg_code_gen_epilogue;
> } else {
> tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
> - tcg_out_goto_long(s, tb_ret_addr);
> + target = tb_ret_addr;
> + }
> +
> + offset = tcg_pcrel_diff(s, target) >> 2;
> + if (offset == sextract64(offset, 0, 26)) {
> + tcg_out_insn(s, 3206, B, offset);
> + } else {
> + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP1, (intptr_t)target);
> + tcg_out_insn(s, 3207, BR, TCG_REG_TMP1);
Since it's now important that the tempreg we have here is
one of X16/X17 in order to get the right BTI behaviour,
I think a build-time assert of this would be helpful.
That will catch the possibility that we forget about this
and decide to rearrange which registers we use as
tempregs later.
> }
> }
>
> @@ -1970,6 +1984,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
> tcg_out32(s, I3206_B);
> tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
> set_jmp_reset_offset(s, which);
> + tcg_out_bti(s, BTI_J);
> }
Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
"Did we miss anywhere that should have had a BTI insn"
is a bit tricky to review, but I assume you've done enough
testing on a BTI-enabled host to catch that.
thanks
-- PMM
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 4/4] tcg: Map code_gen_buffer with PROT_BTI
2023-08-16 14:25 [PATCH 0/4] tcg/aarch64: Enable BTI within the JIT Richard Henderson
` (2 preceding siblings ...)
2023-08-16 14:25 ` [PATCH 3/4] tcg/aarch64: Emit BTI insns at jump landing pads Richard Henderson
@ 2023-08-16 14:25 ` Richard Henderson
2023-08-16 22:13 ` Philippe Mathieu-Daudé
2023-09-09 20:50 ` [PATCH 0/4] tcg/aarch64: Enable BTI within the JIT Richard Henderson
4 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2023-08-16 14:25 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm
For linux aarch64 host supporting BTI, map the buffer
to require BTI instructions at branch landing pads.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/region.c | 39 ++++++++++++++++++++++++++++-----------
1 file changed, 28 insertions(+), 11 deletions(-)
diff --git a/tcg/region.c b/tcg/region.c
index 2b28ed3556..58cb68c6c8 100644
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -33,8 +33,19 @@
#include "tcg/tcg.h"
#include "exec/translation-block.h"
#include "tcg-internal.h"
+#include "host/cpuinfo.h"
+/*
+ * Local source-level compatibility with Unix.
+ * Used by tcg_region_init below.
+ */
+#if defined(_WIN32)
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define PROT_EXEC 4
+#endif
+
struct tcg_region_tree {
QemuMutex lock;
QTree *tree;
@@ -83,6 +94,16 @@ bool in_code_gen_buffer(const void *p)
return (size_t)(p - region.start_aligned) <= region.total_size;
}
+static int host_prot_read_exec(void)
+{
+#if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI)
+ if (cpuinfo & CPUINFO_BTI) {
+ return PROT_READ | PROT_EXEC | PROT_BTI;
+ }
+#endif
+ return PROT_READ | PROT_EXEC;
+}
+
#ifdef CONFIG_DEBUG_TCG
const void *tcg_splitwx_to_rx(void *rw)
{
@@ -505,14 +526,6 @@ static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
return PROT_READ | PROT_WRITE;
}
#elif defined(_WIN32)
-/*
- * Local source-level compatibility with Unix.
- * Used by tcg_region_init below.
- */
-#define PROT_READ 1
-#define PROT_WRITE 2
-#define PROT_EXEC 4
-
static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
{
void *buf;
@@ -567,7 +580,7 @@ static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
goto fail;
}
- buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+ buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0);
if (buf_rx == MAP_FAILED) {
goto fail_rx;
}
@@ -642,7 +655,7 @@ static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
return -1;
}
- if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
+ if (mprotect((void *)buf_rx, size, host_prot_read_exec()) != 0) {
error_setg_errno(errp, errno, "mprotect for jit splitwx");
munmap((void *)buf_rx, size);
munmap((void *)buf_rw, size);
@@ -805,7 +818,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
need_prot = PROT_READ | PROT_WRITE;
#ifndef CONFIG_TCG_INTERPRETER
if (tcg_splitwx_diff == 0) {
- need_prot |= PROT_EXEC;
+ need_prot |= host_prot_read_exec();
}
#endif
for (size_t i = 0, n = region.n; i < n; i++) {
@@ -820,7 +833,11 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
} else if (need_prot == (PROT_READ | PROT_WRITE)) {
rc = qemu_mprotect_rw(start, end - start);
} else {
+#ifdef CONFIG_POSIX
+ rc = mprotect(start, end - start, need_prot);
+#else
g_assert_not_reached();
+#endif
}
if (rc) {
error_setg_errno(&error_fatal, errno,
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 4/4] tcg: Map code_gen_buffer with PROT_BTI
2023-08-16 14:25 ` [PATCH 4/4] tcg: Map code_gen_buffer with PROT_BTI Richard Henderson
@ 2023-08-16 22:13 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 10+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-08-16 22:13 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: qemu-arm, Joelle van Dyne
(Cc'ing Joelle)
On 16/8/23 16:25, Richard Henderson wrote:
> For linux aarch64 host supporting BTI, map the buffer
> to require BTI instructions at branch landing pads.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/region.c | 39 ++++++++++++++++++++++++++++-----------
> 1 file changed, 28 insertions(+), 11 deletions(-)
>
> diff --git a/tcg/region.c b/tcg/region.c
> index 2b28ed3556..58cb68c6c8 100644
> --- a/tcg/region.c
> +++ b/tcg/region.c
> @@ -33,8 +33,19 @@
> #include "tcg/tcg.h"
> #include "exec/translation-block.h"
> #include "tcg-internal.h"
> +#include "host/cpuinfo.h"
>
>
> +/*
> + * Local source-level compatibility with Unix.
> + * Used by tcg_region_init below.
> + */
> +#if defined(_WIN32)
> +#define PROT_READ 1
> +#define PROT_WRITE 2
> +#define PROT_EXEC 4
> +#endif
> +
> struct tcg_region_tree {
> QemuMutex lock;
> QTree *tree;
> @@ -83,6 +94,16 @@ bool in_code_gen_buffer(const void *p)
> return (size_t)(p - region.start_aligned) <= region.total_size;
> }
>
> +static int host_prot_read_exec(void)
> +{
> +#if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI)
> + if (cpuinfo & CPUINFO_BTI) {
> + return PROT_READ | PROT_EXEC | PROT_BTI;
> + }
> +#endif
> + return PROT_READ | PROT_EXEC;
> +}
> +
> #ifdef CONFIG_DEBUG_TCG
> const void *tcg_splitwx_to_rx(void *rw)
> {
> @@ -505,14 +526,6 @@ static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
> return PROT_READ | PROT_WRITE;
> }
> #elif defined(_WIN32)
> -/*
> - * Local source-level compatibility with Unix.
> - * Used by tcg_region_init below.
> - */
> -#define PROT_READ 1
> -#define PROT_WRITE 2
> -#define PROT_EXEC 4
> -
> static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
> {
> void *buf;
> @@ -567,7 +580,7 @@ static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
> goto fail;
> }
>
> - buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
> + buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0);
> if (buf_rx == MAP_FAILED) {
> goto fail_rx;
> }
> @@ -642,7 +655,7 @@ static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
> return -1;
> }
>
> - if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
> + if (mprotect((void *)buf_rx, size, host_prot_read_exec()) != 0) {
> error_setg_errno(errp, errno, "mprotect for jit splitwx");
> munmap((void *)buf_rx, size);
> munmap((void *)buf_rw, size);
> @@ -805,7 +818,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
> need_prot = PROT_READ | PROT_WRITE;
> #ifndef CONFIG_TCG_INTERPRETER
> if (tcg_splitwx_diff == 0) {
> - need_prot |= PROT_EXEC;
> + need_prot |= host_prot_read_exec();
> }
> #endif
> for (size_t i = 0, n = region.n; i < n; i++) {
> @@ -820,7 +833,11 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
> } else if (need_prot == (PROT_READ | PROT_WRITE)) {
> rc = qemu_mprotect_rw(start, end - start);
> } else {
> +#ifdef CONFIG_POSIX
> + rc = mprotect(start, end - start, need_prot);
Hmm this bypass the qemu_real_host_page_mask() checks in
qemu_mprotect__osdep(), but I guess this is acceptable.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> +#else
> g_assert_not_reached();
> +#endif
> }
> if (rc) {
> error_setg_errno(&error_fatal, errno,
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/4] tcg/aarch64: Enable BTI within the JIT
2023-08-16 14:25 [PATCH 0/4] tcg/aarch64: Enable BTI within the JIT Richard Henderson
` (3 preceding siblings ...)
2023-08-16 14:25 ` [PATCH 4/4] tcg: Map code_gen_buffer with PROT_BTI Richard Henderson
@ 2023-09-09 20:50 ` Richard Henderson
4 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2023-09-09 20:50 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm
Ping. Patch 3 still missing review.
On 8/16/23 07:25, Richard Henderson wrote:
> Patch 1 is cherry-picked from
>
> [PATCH v3 02/14] tcg: Add tcg_out_tb_start backend hook
> https://lore.kernel.org/qemu-devel/20230815195741.8325-3-richard.henderson@linaro.org/T/#u
>
> here used for a different application.
>
> There are not as many landing pads as I had imagined, so the
> overhead here is really quite minimal.
>
> The architecture enables the check only when the PTE for the
> jump target is marked "guarded". Linux implements this by
> adding a PROT_BTI bit for mmap and mprotect. I have isolated
> this within a host_prot_read_exec() local function, which
> seems clean enough. So far, as far as I can tell, Linux it
> the only OS to support BTI.
>
>
> r~
>
>
> Richard Henderson (4):
> tcg: Add tcg_out_tb_start backend hook
> util/cpuinfo-aarch64: Add CPUINFO_BTI
> tcg/aarch64: Emit BTI insns at jump landing pads
> tcg: Map code_gen_buffer with PROT_BTI
>
> host/include/aarch64/host/cpuinfo.h | 1 +
> tcg/region.c | 39 ++++++++++++++++------
> tcg/tcg.c | 3 ++
> util/cpuinfo-aarch64.c | 4 +++
> tcg/aarch64/tcg-target.c.inc | 52 +++++++++++++++++++++--------
> tcg/arm/tcg-target.c.inc | 5 +++
> tcg/i386/tcg-target.c.inc | 5 +++
> tcg/loongarch64/tcg-target.c.inc | 5 +++
> tcg/mips/tcg-target.c.inc | 5 +++
> tcg/ppc/tcg-target.c.inc | 5 +++
> tcg/riscv/tcg-target.c.inc | 5 +++
> tcg/s390x/tcg-target.c.inc | 5 +++
> tcg/sparc64/tcg-target.c.inc | 5 +++
> tcg/tci/tcg-target.c.inc | 5 +++
> 14 files changed, 119 insertions(+), 25 deletions(-)
>
^ permalink raw reply [flat|nested] 10+ messages in thread