From: Balint Reczey via <qemu-devel@nongnu.org>
Cc: Warner Losh <imp@bsdimp.com>, Kyle Evans <kevans@freebsd.org>,
Riku Voipio <riku.voipio@iki.fi>,
Laurent Vivier <laurent@vivier.eu>,
Paolo Bonzini <pbonzini@redhat.com>,
"Marc-André Lureau" <marcandre.lureau@redhat.com>,
"Daniel P. Berrangé" <berrange@redhat.com>,
"Philippe Mathieu-Daudé" <philmd@linaro.org>,
"Alex Bennée" <alex.bennee@linaro.org>,
Thomas Huth <thuth@redhat.com>
Subject: [PATCH v2 1/1] user: add runtime switch to call safe_syscall via libc
Date: Sun, 2 Nov 2025 14:26:45 +0100 [thread overview]
Message-ID: <b1a93d6bbac236f43803b5f0f0a220f6.rbalint@gmail.com> (raw)
Add a libc-backed path for safe_syscall() that make syscalls via
libc's syscall(). This enables interposing syscalls via LD_PRELOAD when
running static guest binaries under a dynamically linked qemu-user.
The assembly implementation (safe_syscall_base()) remains the default.
A runtime switch controls the behavior, which can be enabled in
./configure.
Configure option: --enable-libc-syscalls
Command line: -libc-syscalls
Environment: QEMU_LIBC_SYSCALLS
This preserves the existing signal-rewind semantics via the assembly
path by default, while enabling optional libc-based integration when
requested.
Signed-off-by: Balint Reczey <balint@balintreczey.hu>
---
bsd-user/main.c | 20 +++++++++++
common-user/meson.build | 4 +++
common-user/safe-syscall.c | 66 +++++++++++++++++++++++++++++++++++
docs/user/main.rst | 30 ++++++++++++++--
include/user/safe-syscall.h | 19 +++++++++-
linux-user/main.c | 15 ++++++++
meson.build | 1 +
meson_options.txt | 4 +++
scripts/meson-buildoptions.sh | 4 +++
9 files changed, 160 insertions(+), 3 deletions(-)
create mode 100644 common-user/safe-syscall.c
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 73aae8c327..692468453e 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -38,6 +38,9 @@
#include "qemu/plugin.h"
#include "user/guest-base.h"
#include "user/page-protection.h"
+#ifdef CONFIG_LIBC_SYSCALLS
+#include "user/safe-syscall.h"
+#endif
#include "accel/accel-ops.h"
#include "tcg/startup.h"
#include "qemu/timer.h"
@@ -166,6 +169,9 @@ static void usage(void)
"-E var=value sets/modifies targets environment variable(s)\n"
"-U var unsets targets environment variable(s)\n"
"-B address set guest_base address to address\n"
+#ifdef CONFIG_LIBC_SYSCALLS
+ "-libc-syscalls use libc syscall() instead of assembly safe-syscall\n"
+#endif
"\n"
"Debug options:\n"
"-d item1[,...] enable logging of specified items\n"
@@ -183,6 +189,10 @@ static void usage(void)
"Environment variables:\n"
"QEMU_STRACE Print system calls and arguments similar to the\n"
" 'strace' program. Enable by setting to any value.\n"
+#ifdef CONFIG_LIBC_SYSCALLS
+ "QEMU_LIBC_SYSCALLS Use libc syscall() instead of assembly safe-syscall.\n"
+ " Enable by setting to any value.\n"
+#endif
"You can use -E and -U options to set/unset environment variables\n"
"for target process. It is possible to provide several variables\n"
"by repeating the option. For example:\n"
@@ -310,6 +320,12 @@ int main(int argc, char **argv)
qemu_add_opts(&qemu_trace_opts);
qemu_plugin_add_opts();
+#ifdef CONFIG_LIBC_SYSCALLS
+ if (getenv("QEMU_LIBC_SYSCALLS")) {
+ qemu_use_libc_syscalls = true;
+ }
+#endif
+
optind = 1;
for (;;) {
if (optind >= argc) {
@@ -380,6 +396,10 @@ int main(int argc, char **argv)
have_guest_base = true;
} else if (!strcmp(r, "drop-ld-preload")) {
(void) envlist_unsetenv(envlist, "LD_PRELOAD");
+#ifdef CONFIG_LIBC_SYSCALLS
+ } else if (!strcmp(r, "libc-syscalls")) {
+ qemu_use_libc_syscalls = true;
+#endif
} else if (!strcmp(r, "seed")) {
seed_optarg = optarg;
} else if (!strcmp(r, "one-insn-per-tb")) {
diff --git a/common-user/meson.build b/common-user/meson.build
index ac9de5b9e3..1df0302001 100644
--- a/common-user/meson.build
+++ b/common-user/meson.build
@@ -8,3 +8,7 @@ user_ss.add(files(
'safe-syscall.S',
'safe-syscall-error.c',
))
+
+if get_option('libc_syscalls').enabled()
+ user_ss.add(files('safe-syscall.c'))
+endif
diff --git a/common-user/safe-syscall.c b/common-user/safe-syscall.c
new file mode 100644
index 0000000000..13a702761f
--- /dev/null
+++ b/common-user/safe-syscall.c
@@ -0,0 +1,66 @@
+/*
+ * safe-syscall.c: C implementation using libc's syscall()
+ * to handle signals occurring right before system calls.
+ *
+ * Written by Balint Reczey <balint@balintreczey.hu>
+ *
+ * Copyright (C) 2025 Balint Reczey
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#if defined(__linux__)
+# include "special-errno.h"
+#elif defined(__FreeBSD__)
+# include "errno_defs.h"
+#endif
+#include "user/safe-syscall.h"
+#include <stdarg.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include "qemu/atomic.h"
+
+/* Global runtime toggle (default: false). */
+bool qemu_use_libc_syscalls;
+
+/*
+ * libc-backed implementation: Make a system call via libc's syscall()
+ * if no guest signal is pending.
+ *
+ * IMPORTANT: Unlike the assembly implementation, this approach
+ * can't completely eliminate the race between checking signal_pending and
+ * entering the syscall. The platform-specific assembly versions eliminate
+ * the race as described in details in safe-syscall.h.
+ *
+ * In practice, this race window is extremely narrow (typically a few CPU cycles),
+ * and guest signals are rare events. The primary use case for this implementation
+ * is debugging and LD_PRELOAD interposition where perfect atomicity may
+ * be less critical than observability.
+ */
+long safe_syscall_libc(int *pending, long number, ...)
+{
+ va_list ap;
+ long arg1, arg2, arg3, arg4, arg5, arg6;
+ long ret;
+
+ va_start(ap, number);
+ /* Extract up to 6 syscall arguments */
+ arg1 = va_arg(ap, long);
+ arg2 = va_arg(ap, long);
+ arg3 = va_arg(ap, long);
+ arg4 = va_arg(ap, long);
+ arg5 = va_arg(ap, long);
+ arg6 = va_arg(ap, long);
+ va_end(ap);
+
+ if (qatomic_read(pending)) {
+ errno = QEMU_ERESTARTSYS;
+ return -1;
+ }
+
+ /* Make the actual system call using libc's syscall() */
+ ret = syscall(number, arg1, arg2, arg3, arg4, arg5, arg6);
+
+ return ret;
+}
diff --git a/docs/user/main.rst b/docs/user/main.rst
index a8ddf91424..c3f0d4b4fe 100644
--- a/docs/user/main.rst
+++ b/docs/user/main.rst
@@ -70,7 +70,7 @@ Command line options
::
- qemu-i386 [-h] [-d] [-L path] [-s size] [-cpu model] [-g endpoint] [-B offset] [-R size] program [arguments...]
+ qemu-i386 [-h] [-d] [-L path] [-s size] [-cpu model] [-g endpoint] [-B offset] [-R size] [-libc-syscalls] program [arguments...]
``-h``
Print the help
@@ -101,6 +101,16 @@ Command line options
bytes). \"G\", \"M\", and \"k\" suffixes may be used when specifying
the size.
+``-libc-syscalls``
+ Use the host C library's ``syscall()`` entry point for guest system calls
+ instead of QEMU's built-in assembly safe-syscall. By default this option
+ is disabled and QEMU uses its internal assembly implementation for
+ performance and precise control of signal-restart semantics. This switch is
+ primarily intended for debugging and other special scenarios (for example
+ when interposing on ``syscall()`` via ``LD_PRELOAD``). Available on Linux
+ and BSD user-mode builds. This option is only present if QEMU was configured
+ with ``-Dlibc_syscalls=enabled``.
+
Debug options:
``-d item1,...``
@@ -135,6 +145,10 @@ QEMU_STRACE
format are printed with information for six arguments. Many
flag-style arguments don't have decoders and will show up as numbers.
+QEMU_LIBC_SYSCALLS
+ When set to a non-empty value, behave as if ``-libc-syscalls`` was specified
+ on the command line. Defaults to disabled.
+
Other binaries
~~~~~~~~~~~~~~
@@ -231,7 +245,7 @@ Command line options
::
- qemu-sparc64 [-h] [-d] [-L path] [-s size] [-bsd type] program [arguments...]
+ qemu-sparc64 [-h] [-d] [-L path] [-s size] [-bsd type] [-libc-syscalls] program [arguments...]
``-h``
Print the help
@@ -256,6 +270,12 @@ Command line options
Set the type of the emulated BSD Operating system. Valid values are
FreeBSD, NetBSD and OpenBSD (default).
+``-libc-syscalls``
+ Use the host C library's ``syscall()`` entry point for guest system calls
+ instead of QEMU's built-in assembly safe-syscall. See the Linux user-mode
+ option of the same name for details. Defaults to disabled. This option is
+ only present if QEMU was configured with ``-Dlibc_syscalls=enabled``.
+
Debug options:
``-d item1,...``
@@ -266,3 +286,9 @@ Debug options:
Run the emulation with one guest instruction per translation block.
This slows down emulation a lot, but can be useful in some situations,
such as when trying to analyse the logs produced by the ``-d`` option.
+
+Environment variables:
+
+QEMU_LIBC_SYSCALLS
+ When set to a non-empty value, behave as if ``-libc-syscalls`` was specified
+ on the command line. Defaults to disabled.
diff --git a/include/user/safe-syscall.h b/include/user/safe-syscall.h
index aa075f4d5c..682f2f16f0 100644
--- a/include/user/safe-syscall.h
+++ b/include/user/safe-syscall.h
@@ -128,13 +128,30 @@
/* The core part of this function is implemented in assembly */
long safe_syscall_base(int *pending, long number, ...);
long safe_syscall_set_errno_tail(int value);
+#ifdef CONFIG_LIBC_SYSCALLS
+/* This is implemented in C.*/
+long safe_syscall_libc(int *pending, long number, ...);
+extern bool qemu_use_libc_syscalls;
+#endif
-/* These are defined by the safe-syscall.inc.S file */
+/*
+ * These are defined by the safe-syscall.inc.S file.
+ * In the C implementation, they are dummy symbols.
+ */
extern char safe_syscall_start[];
extern char safe_syscall_end[];
+#ifdef CONFIG_LIBC_SYSCALLS
+#define safe_syscall(...) \
+ (qemu_use_libc_syscalls ? \
+ safe_syscall_libc(&get_task_state(thread_cpu)->signal_pending, \
+ __VA_ARGS__) : \
+ safe_syscall_base(&get_task_state(thread_cpu)->signal_pending, \
+ __VA_ARGS__))
+#else
#define safe_syscall(...) \
safe_syscall_base(&get_task_state(thread_cpu)->signal_pending, \
__VA_ARGS__)
+#endif
#endif
diff --git a/linux-user/main.c b/linux-user/main.c
index db751c0757..8d4e4f7722 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -40,6 +40,9 @@
#include "qemu/plugin.h"
#include "user/guest-base.h"
#include "user/page-protection.h"
+#ifdef CONFIG_LIBC_SYSCALLS
+#include "user/safe-syscall.h"
+#endif
#include "exec/gdbstub.h"
#include "gdbstub/user.h"
#include "accel/accel-ops.h"
@@ -456,6 +459,14 @@ static void handle_arg_jitdump(const char *arg)
perf_enable_jitdump();
}
+#ifdef CONFIG_LIBC_SYSCALLS
+static void handle_arg_libc_syscalls(const char *arg)
+{
+ /* Enable libc-backed syscall implementation */
+ qemu_use_libc_syscalls = true;
+}
+#endif
+
static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
#ifdef CONFIG_PLUGIN
@@ -534,6 +545,10 @@ static const struct qemu_argument arg_table[] = {
"", "Generate a /tmp/perf-${pid}.map file for perf"},
{"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump,
"", "Generate a jit-${pid}.dump file for perf"},
+#ifdef CONFIG_LIBC_SYSCALLS
+ {"libc-syscalls", "QEMU_LIBC_SYSCALLS", false, handle_arg_libc_syscalls,
+ "", "use libc syscall() instead of assembly safe-syscall"},
+#endif
{NULL, NULL, false, NULL, NULL, NULL}
};
diff --git a/meson.build b/meson.build
index 48c1795b0f..21bf921e34 100644
--- a/meson.build
+++ b/meson.build
@@ -2508,6 +2508,7 @@ if numa.found()
endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
+config_host_data.set('CONFIG_LIBC_SYSCALLS', get_option('libc_syscalls').enabled())
config_host_data.set('CONFIG_RBD', rbd.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
diff --git a/meson_options.txt b/meson_options.txt
index 2836156257..0f1ad3452c 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -66,6 +66,10 @@ option('tools', type : 'feature', value : 'auto',
option('qga_vss', type : 'feature', value: 'auto',
description: 'build QGA VSS support (broken with MinGW)')
+# Enable libc-backed safe_syscall in user-mode (-libc-syscalls option)
+option('libc_syscalls', type: 'feature', value: 'disabled',
+ description: 'Enable libc-backed safe_syscall() and -libc-syscalls in user-mode (POTENTIALLY UNSAFE SIGNAL HANDLING)')
+
option('malloc_trim', type : 'feature', value : 'auto',
description: 'enable libc malloc_trim() for memory optimization')
option('malloc', type : 'combo', choices : ['system', 'tcmalloc', 'jemalloc'],
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 3d0d132344..31ab6625fb 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -136,6 +136,8 @@ meson_options_help() {
printf "%s\n" ' keyring Linux keyring support'
printf "%s\n" ' kvm KVM acceleration support'
printf "%s\n" ' l2tpv3 l2tpv3 network backend support'
+ printf "%s\n" ' libc-syscalls Enable libc-backed safe_syscall() and -libc-syscalls in'
+ printf "%s\n" ' user-mode (POTENTIALLY UNSAFE SIGNAL HANDLING)'
printf "%s\n" ' libcbor libcbor support'
printf "%s\n" ' libdaxctl libdaxctl support'
printf "%s\n" ' libdw debuginfo support'
@@ -365,6 +367,8 @@ _meson_option_parse() {
--disable-kvm) printf "%s" -Dkvm=disabled ;;
--enable-l2tpv3) printf "%s" -Dl2tpv3=enabled ;;
--disable-l2tpv3) printf "%s" -Dl2tpv3=disabled ;;
+ --enable-libc-syscalls) printf "%s" -Dlibc_syscalls=enabled ;;
+ --disable-libc-syscalls) printf "%s" -Dlibc_syscalls=disabled ;;
--enable-libcbor) printf "%s" -Dlibcbor=enabled ;;
--disable-libcbor) printf "%s" -Dlibcbor=disabled ;;
--enable-libdaxctl) printf "%s" -Dlibdaxctl=enabled ;;
--
2.43.0
reply other threads:[~2025-11-17 21:43 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b1a93d6bbac236f43803b5f0f0a220f6.rbalint@gmail.com \
--to=qemu-devel@nongnu.org \
--cc=alex.bennee@linaro.org \
--cc=balint@balintreczey.hu \
--cc=berrange@redhat.com \
--cc=imp@bsdimp.com \
--cc=kevans@freebsd.org \
--cc=laurent@vivier.eu \
--cc=marcandre.lureau@redhat.com \
--cc=pbonzini@redhat.com \
--cc=philmd@linaro.org \
--cc=riku.voipio@iki.fi \
--cc=thuth@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).