* [PATCH] tcg: round-robin: do not use mb_read for rr_current_cpu
@ 2023-05-10 16:03 Paolo Bonzini
2023-05-10 16:03 ` [PATCH] coroutine-asm: add x86 CET shadow stack support Paolo Bonzini
0 siblings, 1 reply; 5+ messages in thread
From: Paolo Bonzini @ 2023-05-10 16:03 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson
Note that qatomic_mb_set can remain, similar to how Linux has smp_store_mb
(an optimized version of following a store with a full memory barrier).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/tcg/tcg-accel-ops-rr.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 290833a37fb2..055f6ae29553 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -71,11 +71,13 @@ static void rr_kick_next_cpu(void)
{
CPUState *cpu;
do {
- cpu = qatomic_mb_read(&rr_current_cpu);
+ cpu = qatomic_read(&rr_current_cpu);
if (cpu) {
cpu_exit(cpu);
}
- } while (cpu != qatomic_mb_read(&rr_current_cpu));
+ /* Finish kicking this cpu before reading again. */
+ smp_mb();
+ } while (cpu != qatomic_read(&rr_current_cpu));
}
static void rr_kick_thread(void *opaque)
@@ -206,8 +208,9 @@ static void *rr_cpu_thread_fn(void *arg)
}
while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
-
+ /* Store rr_current_cpu before evaluating cpu_can_run(). */
qatomic_mb_set(&rr_current_cpu, cpu);
+
current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
@@ -245,7 +248,7 @@ static void *rr_cpu_thread_fn(void *arg)
cpu = CPU_NEXT(cpu);
} /* while (cpu && !cpu->exit_request).. */
- /* Does not need qatomic_mb_set because a spurious wakeup is okay. */
+ /* Does not need a memory barrier because a spurious wakeup is okay. */
qatomic_set(&rr_current_cpu, NULL);
if (cpu && cpu->exit_request) {
--
2.40.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH] coroutine-asm: add x86 CET shadow stack support
2023-05-10 16:03 [PATCH] tcg: round-robin: do not use mb_read for rr_current_cpu Paolo Bonzini
@ 2023-05-10 16:03 ` Paolo Bonzini
2023-05-10 16:05 ` Paolo Bonzini
0 siblings, 1 reply; 5+ messages in thread
From: Paolo Bonzini @ 2023-05-10 16:03 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
meson.build | 16 +++++++--
util/coroutine-asm.c | 82 ++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 93 insertions(+), 5 deletions(-)
diff --git a/meson.build b/meson.build
index 0121ccab78dd..17e4a3bc582e 100644
--- a/meson.build
+++ b/meson.build
@@ -328,6 +328,10 @@ elif coroutine_backend not in supported_backends
.format(coroutine_backend, ', '.join(supported_backends)))
endif
+if cfi_mode == 'hw' and coroutine_backend != 'asm'
+ error('Hardware control-flow integrity requires the "asm" coroutine backend.')
+endif
+
# Compiles if SafeStack *not* enabled
safe_stack_probe = '''
int main(void)
@@ -469,16 +473,22 @@ if cfi_mode == 'sw'
endif
endif
elif cfi_mode in ['hw', 'auto']
- if cfi_mode == 'hw'
- error('Hardware CFI is not supported yet')
+ if cpu in ['x86', 'x86_64']
+ cfi_flags += cc.get_supported_arguments('-fcf-protection=full')
+ if cfi_mode == 'hw'
+ error('C compiler does not support -fcf-protection')
+ endif
+ elif cfi_mode == 'hw'
+ error('Hardware CFI is only supported on x86')
endif
if cfi_flags == [] and cfi_mode == 'auto'
cfi_mode = 'disabled'
endif
endif
-if cpu in ['x86', 'x86_64']
+if cpu in ['x86', 'x86_64'] and cfi_mode != 'hw'
cfi_flags += cc.get_supported_arguments('-fcf-protection=branch')
endif
+
add_global_arguments(cfi_flags, native: false, language: all_languages)
add_global_link_arguments(cfi_flags, native: false, language: all_languages)
diff --git a/util/coroutine-asm.c b/util/coroutine-asm.c
index a06ecbcb0a07..771b1d4a0fc9 100644
--- a/util/coroutine-asm.c
+++ b/util/coroutine-asm.c
@@ -22,6 +22,13 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/coroutine_int.h"
+#include "qemu/error-report.h"
+
+#ifdef CONFIG_CF_PROTECTION
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+int arch_prctl(int code, unsigned long addr);
+#endif
#ifdef CONFIG_VALGRIND_H
#include <valgrind/valgrind.h>
@@ -39,10 +46,14 @@
typedef struct {
Coroutine base;
void *sp;
+ void *ssp;
void *stack;
size_t stack_size;
+ /* x86: CET shadow stack */
+ void *sstack;
+ size_t sstack_size;
#ifdef CONFIG_VALGRIND_H
unsigned int valgrind_stack_id;
#endif
@@ -77,6 +88,35 @@ static void start_switch_fiber(void **fake_stack_save,
#endif
}
+static bool have_sstack(void)
+{
+#if defined CONFIG_CF_PROTECTION && defined __x86_64__
+ uint64_t ssp;
+ asm ("xor %0, %0; rdsspq %0\n" : "=r" (ssp));
+ return !!ssp;
+#else
+ return 0;
+#endif
+}
+
+static void *alloc_sstack(size_t sz)
+{
+#if defined CONFIG_CF_PROTECTION && defined __x86_64__
+#ifndef ARCH_X86_CET_ALLOC_SHSTK
+#define ARCH_X86_CET_ALLOC_SHSTK 0x3004
+#endif
+
+ uint64_t arg = sz;
+ if (arch_prctl(ARCH_X86_CET_ALLOC_SHSTK, (unsigned long) &arg) < 0) {
+ abort();
+ }
+
+ return (void *)arg;
+#else
+ abort();
+#endif
+}
+
#ifdef __x86_64__
/*
* We hardcode all operands to specific registers so that we can write down all the
@@ -88,6 +128,26 @@ static void start_switch_fiber(void **fake_stack_save,
* Note that push and call would clobber the red zone. Makefile.objs compiles this
* file with -mno-red-zone. The alternative is to subtract/add 128 bytes from rsp
* around the switch, with slightly lower cache performance.
+ *
+ * The RSTORSSP and SAVEPREVSSP instructions are intricate. In a nutshell they are:
+ *
+ * RSTORSSP(mem): oldSSP = SSP
+ * SSP = mem
+ * *SSP = oldSSP
+ *
+ * SAVEPREVSSP: oldSSP = shadow_stack_pop()
+ * *(oldSSP - 8) = oldSSP # "push" to old shadow stack
+ *
+ * Therefore, RSTORSSP(mem) followed by SAVEPREVSSP is the same as
+ *
+ * shadow_stack_push(SSP)
+ * SSP = mem
+ * shadow_stack_pop()
+ *
+ * From the simplified description you can see that co->ssp, being stored before
+ * the RSTORSSP+SAVEPREVSSP sequence, points to the top actual entry of the shadow
+ * stack, not to the restore token. Hence we use an offset of -8 in the operand
+ * of rstorssp.
*/
#define CO_SWITCH(from, to, action, jump) ({ \
int action_ = action; \
@@ -100,7 +160,15 @@ static void start_switch_fiber(void **fake_stack_save,
"jmp 2f\n" /* switch back continues at label 2 */ \
\
"1: .cfi_adjust_cfa_offset 8\n" \
- "movq %%rsp, %c[SP](%[FROM])\n" /* save source SP */ \
+ "xor %%rbp, %%rbp\n" /* use old frame pointer as scratch reg */ \
+ "rdsspq %%rbp\n" \
+ "test %%rbp, %%rbp\n" /* if CET is enabled... */ \
+ "jz 9f\n" \
+ "movq %%rbp, %c[SSP](%[FROM])\n" /* ... save source shadow SP, */ \
+ "movq %c[SSP](%[TO]), %%rbp\n" /* restore destination shadow stack, */ \
+ "rstorssp -8(%%rbp)\n" \
+ "saveprevssp\n" /* and save source shadow SP token */ \
+ "9: movq %%rsp, %c[SP](%[FROM])\n" /* save source SP */ \
"movq %c[SP](%[TO]), %%rsp\n" /* load destination SP */ \
jump "\n" /* coroutine switch */ \
\
@@ -108,7 +176,8 @@ static void start_switch_fiber(void **fake_stack_save,
"popq %%rbp\n" \
".cfi_adjust_cfa_offset -8\n" \
: "+a" (action_), [FROM] "+b" (from_), [TO] "+D" (to_) \
- : [SP] "i" (offsetof(CoroutineAsm, sp)) \
+ : [SP] "i" (offsetof(CoroutineAsm, sp)), \
+ [SSP] "i" (offsetof(CoroutineAsm, ssp)) \
: "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
"memory"); \
action_; \
@@ -141,6 +210,12 @@ Coroutine *qemu_coroutine_new(void)
co->stack = qemu_alloc_stack(&co->stack_size);
co->sp = co->stack + co->stack_size;
+ if (have_sstack()) {
+ co->sstack_size = COROUTINE_SHADOW_STACK_SIZE;
+ co->sstack = alloc_sstack(co->sstack_size);
+ co->ssp = co->sstack + co->sstack_size;
+ }
+
#ifdef CONFIG_VALGRIND_H
co->valgrind_stack_id =
VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
@@ -186,6 +261,9 @@ void qemu_coroutine_delete(Coroutine *co_)
#endif
qemu_free_stack(co->stack, co->stack_size);
+ if (co->sstack) {
+ munmap(co->sstack, co->sstack_size);
+ }
g_free(co);
}
--
2.40.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH] tcg: round-robin: do not use mb_read for rr_current_cpu
@ 2023-05-10 16:04 Paolo Bonzini
2023-05-10 16:45 ` Richard Henderson
0 siblings, 1 reply; 5+ messages in thread
From: Paolo Bonzini @ 2023-05-10 16:04 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson
Note that qatomic_mb_set can remain, similar to how Linux has smp_store_mb
(an optimized version of following a store with a full memory barrier).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/tcg/tcg-accel-ops-rr.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 290833a37fb2..055f6ae29553 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -71,11 +71,13 @@ static void rr_kick_next_cpu(void)
{
CPUState *cpu;
do {
- cpu = qatomic_mb_read(&rr_current_cpu);
+ cpu = qatomic_read(&rr_current_cpu);
if (cpu) {
cpu_exit(cpu);
}
- } while (cpu != qatomic_mb_read(&rr_current_cpu));
+ /* Finish kicking this cpu before reading again. */
+ smp_mb();
+ } while (cpu != qatomic_read(&rr_current_cpu));
}
static void rr_kick_thread(void *opaque)
@@ -206,8 +208,9 @@ static void *rr_cpu_thread_fn(void *arg)
}
while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
-
+ /* Store rr_current_cpu before evaluating cpu_can_run(). */
qatomic_mb_set(&rr_current_cpu, cpu);
+
current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
@@ -245,7 +248,7 @@ static void *rr_cpu_thread_fn(void *arg)
cpu = CPU_NEXT(cpu);
} /* while (cpu && !cpu->exit_request).. */
- /* Does not need qatomic_mb_set because a spurious wakeup is okay. */
+ /* Does not need a memory barrier because a spurious wakeup is okay. */
qatomic_set(&rr_current_cpu, NULL);
if (cpu && cpu->exit_request) {
--
2.40.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] coroutine-asm: add x86 CET shadow stack support
2023-05-10 16:03 ` [PATCH] coroutine-asm: add x86 CET shadow stack support Paolo Bonzini
@ 2023-05-10 16:05 ` Paolo Bonzini
0 siblings, 0 replies; 5+ messages in thread
From: Paolo Bonzini @ 2023-05-10 16:05 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson
Sorry, this was sent incorrectly (extra "-1" in the git-send-email
command line).
Paolo
On Wed, May 10, 2023 at 6:03 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> meson.build | 16 +++++++--
> util/coroutine-asm.c | 82 ++++++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 93 insertions(+), 5 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index 0121ccab78dd..17e4a3bc582e 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -328,6 +328,10 @@ elif coroutine_backend not in supported_backends
> .format(coroutine_backend, ', '.join(supported_backends)))
> endif
>
> +if cfi_mode == 'hw' and coroutine_backend != 'asm'
> + error('Hardware control-flow integrity requires the "asm" coroutine backend.')
> +endif
> +
> # Compiles if SafeStack *not* enabled
> safe_stack_probe = '''
> int main(void)
> @@ -469,16 +473,22 @@ if cfi_mode == 'sw'
> endif
> endif
> elif cfi_mode in ['hw', 'auto']
> - if cfi_mode == 'hw'
> - error('Hardware CFI is not supported yet')
> + if cpu in ['x86', 'x86_64']
> + cfi_flags += cc.get_supported_arguments('-fcf-protection=full')
> + if cfi_mode == 'hw'
> + error('C compiler does not support -fcf-protection')
> + endif
> + elif cfi_mode == 'hw'
> + error('Hardware CFI is only supported on x86')
> endif
> if cfi_flags == [] and cfi_mode == 'auto'
> cfi_mode = 'disabled'
> endif
> endif
> -if cpu in ['x86', 'x86_64']
> +if cpu in ['x86', 'x86_64'] and cfi_mode != 'hw'
> cfi_flags += cc.get_supported_arguments('-fcf-protection=branch')
> endif
> +
> add_global_arguments(cfi_flags, native: false, language: all_languages)
> add_global_link_arguments(cfi_flags, native: false, language: all_languages)
>
> diff --git a/util/coroutine-asm.c b/util/coroutine-asm.c
> index a06ecbcb0a07..771b1d4a0fc9 100644
> --- a/util/coroutine-asm.c
> +++ b/util/coroutine-asm.c
> @@ -22,6 +22,13 @@
> #include "qemu/osdep.h"
> #include "qemu-common.h"
> #include "qemu/coroutine_int.h"
> +#include "qemu/error-report.h"
> +
> +#ifdef CONFIG_CF_PROTECTION
> +#include <asm/prctl.h>
> +#include <sys/prctl.h>
> +int arch_prctl(int code, unsigned long addr);
> +#endif
>
> #ifdef CONFIG_VALGRIND_H
> #include <valgrind/valgrind.h>
> @@ -39,10 +46,14 @@
> typedef struct {
> Coroutine base;
> void *sp;
> + void *ssp;
>
> void *stack;
> size_t stack_size;
>
> + /* x86: CET shadow stack */
> + void *sstack;
> + size_t sstack_size;
> #ifdef CONFIG_VALGRIND_H
> unsigned int valgrind_stack_id;
> #endif
> @@ -77,6 +88,35 @@ static void start_switch_fiber(void **fake_stack_save,
> #endif
> }
>
> +static bool have_sstack(void)
> +{
> +#if defined CONFIG_CF_PROTECTION && defined __x86_64__
> + uint64_t ssp;
> + asm ("xor %0, %0; rdsspq %0\n" : "=r" (ssp));
> + return !!ssp;
> +#else
> + return 0;
> +#endif
> +}
> +
> +static void *alloc_sstack(size_t sz)
> +{
> +#if defined CONFIG_CF_PROTECTION && defined __x86_64__
> +#ifndef ARCH_X86_CET_ALLOC_SHSTK
> +#define ARCH_X86_CET_ALLOC_SHSTK 0x3004
> +#endif
> +
> + uint64_t arg = sz;
> + if (arch_prctl(ARCH_X86_CET_ALLOC_SHSTK, (unsigned long) &arg) < 0) {
> + abort();
> + }
> +
> + return (void *)arg;
> +#else
> + abort();
> +#endif
> +}
> +
> #ifdef __x86_64__
> /*
> * We hardcode all operands to specific registers so that we can write down all the
> @@ -88,6 +128,26 @@ static void start_switch_fiber(void **fake_stack_save,
> * Note that push and call would clobber the red zone. Makefile.objs compiles this
> * file with -mno-red-zone. The alternative is to subtract/add 128 bytes from rsp
> * around the switch, with slightly lower cache performance.
> + *
> + * The RSTORSSP and SAVEPREVSSP instructions are intricate. In a nutshell they are:
> + *
> + * RSTORSSP(mem): oldSSP = SSP
> + * SSP = mem
> + * *SSP = oldSSP
> + *
> + * SAVEPREVSSP: oldSSP = shadow_stack_pop()
> + * *(oldSSP - 8) = oldSSP # "push" to old shadow stack
> + *
> + * Therefore, RSTORSSP(mem) followed by SAVEPREVSSP is the same as
> + *
> + * shadow_stack_push(SSP)
> + * SSP = mem
> + * shadow_stack_pop()
> + *
> + * From the simplified description you can see that co->ssp, being stored before
> + * the RSTORSSP+SAVEPREVSSP sequence, points to the top actual entry of the shadow
> + * stack, not to the restore token. Hence we use an offset of -8 in the operand
> + * of rstorssp.
> */
> #define CO_SWITCH(from, to, action, jump) ({ \
> int action_ = action; \
> @@ -100,7 +160,15 @@ static void start_switch_fiber(void **fake_stack_save,
> "jmp 2f\n" /* switch back continues at label 2 */ \
> \
> "1: .cfi_adjust_cfa_offset 8\n" \
> - "movq %%rsp, %c[SP](%[FROM])\n" /* save source SP */ \
> + "xor %%rbp, %%rbp\n" /* use old frame pointer as scratch reg */ \
> + "rdsspq %%rbp\n" \
> + "test %%rbp, %%rbp\n" /* if CET is enabled... */ \
> + "jz 9f\n" \
> + "movq %%rbp, %c[SSP](%[FROM])\n" /* ... save source shadow SP, */ \
> + "movq %c[SSP](%[TO]), %%rbp\n" /* restore destination shadow stack, */ \
> + "rstorssp -8(%%rbp)\n" \
> + "saveprevssp\n" /* and save source shadow SP token */ \
> + "9: movq %%rsp, %c[SP](%[FROM])\n" /* save source SP */ \
> "movq %c[SP](%[TO]), %%rsp\n" /* load destination SP */ \
> jump "\n" /* coroutine switch */ \
> \
> @@ -108,7 +176,8 @@ static void start_switch_fiber(void **fake_stack_save,
> "popq %%rbp\n" \
> ".cfi_adjust_cfa_offset -8\n" \
> : "+a" (action_), [FROM] "+b" (from_), [TO] "+D" (to_) \
> - : [SP] "i" (offsetof(CoroutineAsm, sp)) \
> + : [SP] "i" (offsetof(CoroutineAsm, sp)), \
> + [SSP] "i" (offsetof(CoroutineAsm, ssp)) \
> : "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
> "memory"); \
> action_; \
> @@ -141,6 +210,12 @@ Coroutine *qemu_coroutine_new(void)
> co->stack = qemu_alloc_stack(&co->stack_size);
> co->sp = co->stack + co->stack_size;
>
> + if (have_sstack()) {
> + co->sstack_size = COROUTINE_SHADOW_STACK_SIZE;
> + co->sstack = alloc_sstack(co->sstack_size);
> + co->ssp = co->sstack + co->sstack_size;
> + }
> +
> #ifdef CONFIG_VALGRIND_H
> co->valgrind_stack_id =
> VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
> @@ -186,6 +261,9 @@ void qemu_coroutine_delete(Coroutine *co_)
> #endif
>
> qemu_free_stack(co->stack, co->stack_size);
> + if (co->sstack) {
> + munmap(co->sstack, co->sstack_size);
> + }
> g_free(co);
> }
>
> --
> 2.40.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] tcg: round-robin: do not use mb_read for rr_current_cpu
2023-05-10 16:04 [PATCH] tcg: round-robin: do not use mb_read for rr_current_cpu Paolo Bonzini
@ 2023-05-10 16:45 ` Richard Henderson
0 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2023-05-10 16:45 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 5/10/23 17:04, Paolo Bonzini wrote:
> Note that qatomic_mb_set can remain, similar to how Linux has smp_store_mb
> (an optimized version of following a store with a full memory barrier).
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> accel/tcg/tcg-accel-ops-rr.c | 11 +++++++----
> 1 file changed, 7 insertions(+), 4 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-05-10 16:45 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-05-10 16:03 [PATCH] tcg: round-robin: do not use mb_read for rr_current_cpu Paolo Bonzini
2023-05-10 16:03 ` [PATCH] coroutine-asm: add x86 CET shadow stack support Paolo Bonzini
2023-05-10 16:05 ` Paolo Bonzini
-- strict thread matches above, loose matches on Subject: below --
2023-05-10 16:04 [PATCH] tcg: round-robin: do not use mb_read for rr_current_cpu Paolo Bonzini
2023-05-10 16:45 ` Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).