Linux userland API discussions
 help / color / mirror / Atom feed
From: Li Chen <me@linux.beauty>
To: Christian Brauner <brauner@kernel.org>,
	Kees Cook <kees@kernel.org>,
	Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-arch@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-kselftest@vger.kernel.org, x86@kernel.org,
	Arnd Bergmann <arnd@arndb.de>, Andy Lutomirski <luto@kernel.org>,
	Thomas Gleixner <tglx@kernel.org>, Ingo Molnar <mingo@redhat.com>,
	Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Jan Kara <jack@suse.cz>,
	Jonathan Corbet <corbet@lwn.net>,
	Shuah Khan <skhan@linuxfoundation.org>, Li Chen <me@linux.beauty>
Subject: [RFC PATCH v1 06/13] exec: add spawn_template_spawn()
Date: Thu, 28 May 2026 17:52:27 +0800	[thread overview]
Message-ID: <20260528095235.2491226-7-me@linux.beauty> (raw)
In-Reply-To: <20260528095235.2491226-1-me@linux.beauty>

Add spawn_template_spawn() to start a child from a template fd. The child
uses the template's pinned executable file, runs per-spawn fd, cwd, and
signal actions, closes non-stdio fds by default, and then executes through
the normal opened-file exec path.
Return a pidfd for the child so userspace can wait or signal it without
racy pid reuse. Keep fd inheritance opt-in with
SPAWN_TEMPLATE_SPAWN_INHERIT_FDS.
This patch consumes cached template state but does not add ELF metadata
caching; executable identity and ELF metadata caching are added separately.

Signed-off-by: Li Chen <me@linux.beauty>
---
 fs/spawn_template.c      | 346 +++++++++++++++++++++++++++++++++++++++
 include/linux/syscalls.h |   4 +
 2 files changed, 350 insertions(+)

diff --git a/fs/spawn_template.c b/fs/spawn_template.c
index 280a1038cc45e..8c3711929cffb 100644
--- a/fs/spawn_template.c
+++ b/fs/spawn_template.c
@@ -1,14 +1,24 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/anon_inodes.h>
+#include <linux/binfmts.h>
+#include <linux/close_range.h>
 #include <linux/cred.h>
 #include <linux/err.h>
 #include <linux/fcntl.h>
+#include <linux/fdtable.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/fs_struct.h>
 #include <linux/kernel.h>
+#include <linux/namei.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/string.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <uapi/linux/openat2.h>
 #include <uapi/linux/spawn_template.h>
 
 #include "internal.h"
@@ -22,8 +32,262 @@ struct spawn_template {
 	bool deny_write;
 };
 
+struct spawn_template_spawn_context {
+	struct spawn_template *tmpl;
+	struct spawn_template_spawn_args args;
+	struct spawn_template_action *actions;
+};
+
 static const struct file_operations spawn_template_fops;
 
+static int spawn_template_exit_status(int err)
+{
+	switch (err) {
+	case -ENOENT:
+		return 127;
+	case -EACCES:
+	case -ENOEXEC:
+		return 126;
+	default:
+		return 1;
+	}
+}
+
+static bool spawn_template_cred_matches(struct spawn_template *tmpl)
+{
+	return current_cred() == tmpl->creator_cred;
+}
+
+static int spawn_template_copy_signal_set(const struct spawn_template_action *action,
+					  sigset_t *mask)
+{
+	struct spawn_template_sigset sigset;
+
+	if (!action->arg)
+		return -EINVAL;
+	if (copy_from_user(&sigset, u64_to_user_ptr(action->arg),
+			   sizeof(sigset)))
+		return -EFAULT;
+	if (sigset.sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+	if (copy_from_user(mask, u64_to_user_ptr(sigset.sigset), sizeof(*mask)))
+		return -EFAULT;
+	sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+
+	return 0;
+}
+
+static int spawn_template_apply_open(const struct spawn_template_action *action)
+{
+	struct spawn_template_open open;
+	struct file *file __free(fput) = NULL;
+	struct file *tmp;
+	struct open_flags op;
+	int ret;
+
+	if (action->fd < AT_FDCWD || action->newfd < 0 || action->flags ||
+	    !action->arg)
+		return -EINVAL;
+
+	if (copy_from_user(&open, u64_to_user_ptr(action->arg), sizeof(open)))
+		return -EFAULT;
+
+	ret = build_open_flags(&open.how, &op);
+	if (ret)
+		return ret;
+
+	CLASS(filename_flags, name)(u64_to_user_ptr(open.path), op.lookup_flags);
+	tmp = do_file_open(action->fd, name, &op);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
+	file = tmp;
+
+	return replace_fd(action->newfd, file, open.how.flags & O_CLOEXEC);
+}
+
+static int spawn_template_apply_sigmask(const struct spawn_template_action *action)
+{
+	sigset_t mask;
+	int ret;
+
+	if (action->fd || action->newfd || action->flags)
+		return -EINVAL;
+
+	ret = spawn_template_copy_signal_set(action, &mask);
+	if (ret)
+		return ret;
+
+	set_current_blocked(&mask);
+	return 0;
+}
+
+static int spawn_template_apply_sigdefault(const struct spawn_template_action *action)
+{
+	sigset_t mask;
+	struct k_sigaction sa = {};
+	int ret;
+	int sig;
+
+	if (action->fd || action->newfd || action->flags)
+		return -EINVAL;
+
+	ret = spawn_template_copy_signal_set(action, &mask);
+	if (ret)
+		return ret;
+
+	sa.sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa.sa_mask);
+
+	for (sig = 1; sig < _NSIG; sig++) {
+		if (!sigismember(&mask, sig))
+			continue;
+		ret = do_sigaction(sig, &sa, NULL);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int spawn_template_apply_action(const struct spawn_template_action *action)
+{
+	switch (action->type) {
+	case SPAWN_TEMPLATE_ACTION_CLOSE:
+		return close_fd(action->fd);
+	case SPAWN_TEMPLATE_ACTION_DUP2:
+		if (action->fd == action->newfd) {
+			if (action->flags)
+				return -EINVAL;
+			CLASS(fd, f)(action->fd);
+
+			if (fd_empty(f))
+				return -EBADF;
+			return 0;
+		}
+		return ksys_dup3(action->fd, action->newfd, action->flags);
+	case SPAWN_TEMPLATE_ACTION_FCHDIR: {
+		CLASS(fd, f)(action->fd);
+		int ret;
+
+		if (fd_empty(f))
+			return -EBADF;
+		if (!d_can_lookup(fd_file(f)->f_path.dentry))
+			return -ENOTDIR;
+
+		ret = file_permission(fd_file(f), MAY_EXEC | MAY_CHDIR);
+		if (!ret)
+			set_fs_pwd(current->fs, &fd_file(f)->f_path);
+		return ret;
+	}
+	case SPAWN_TEMPLATE_ACTION_OPEN:
+		return spawn_template_apply_open(action);
+	case SPAWN_TEMPLATE_ACTION_CLOSE_RANGE:
+		return do_close_range(action->fd, action->newfd, action->flags);
+	case SPAWN_TEMPLATE_ACTION_SIGMASK:
+		return spawn_template_apply_sigmask(action);
+	case SPAWN_TEMPLATE_ACTION_SIGDEFAULT:
+		return spawn_template_apply_sigdefault(action);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int spawn_template_copy_actions(struct spawn_template_action **out_actions,
+				       u64 count, u64 uaddr)
+{
+	struct spawn_template_action __user *uactions;
+	struct spawn_template_action *actions __free(kfree) = NULL;
+	struct spawn_template_action *tmp;
+	u64 i;
+
+	*out_actions = NULL;
+	if (!count)
+		return 0;
+	if (count > SPAWN_TEMPLATE_MAX_ACTIONS)
+		return -E2BIG;
+	if (!uaddr)
+		return -EINVAL;
+
+	uactions = u64_to_user_ptr(uaddr);
+	tmp = memdup_array_user(uactions, count, sizeof(*actions));
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
+	actions = tmp;
+
+	for (i = 0; i < count; i++) {
+		switch (actions[i].type) {
+		case SPAWN_TEMPLATE_ACTION_CLOSE:
+			if (actions[i].fd < 0 || actions[i].flags ||
+			    actions[i].newfd || actions[i].arg)
+				return -EINVAL;
+			break;
+		case SPAWN_TEMPLATE_ACTION_DUP2:
+			if (actions[i].fd < 0 || actions[i].newfd < 0 ||
+			    (actions[i].flags & ~O_CLOEXEC) || actions[i].arg)
+				return -EINVAL;
+			break;
+		case SPAWN_TEMPLATE_ACTION_FCHDIR:
+			if (actions[i].fd < 0 || actions[i].flags ||
+			    actions[i].newfd || actions[i].arg)
+				return -EINVAL;
+			break;
+		case SPAWN_TEMPLATE_ACTION_OPEN:
+			if (actions[i].fd < AT_FDCWD || actions[i].newfd < 0 ||
+			    actions[i].flags || !actions[i].arg)
+				return -EINVAL;
+			break;
+		case SPAWN_TEMPLATE_ACTION_CLOSE_RANGE:
+			if (actions[i].fd < 0 || actions[i].newfd < 0 ||
+			    actions[i].fd > actions[i].newfd ||
+			    (actions[i].flags &
+			     ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC)) ||
+			    actions[i].arg)
+				return -EINVAL;
+			break;
+		case SPAWN_TEMPLATE_ACTION_SIGMASK:
+		case SPAWN_TEMPLATE_ACTION_SIGDEFAULT:
+			if (actions[i].fd || actions[i].newfd ||
+			    actions[i].flags || !actions[i].arg)
+				return -EINVAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	*out_actions = no_free_ptr(actions);
+	return 0;
+}
+
+static int spawn_template_child(void *data)
+{
+	struct spawn_template_spawn_context *ctx = data;
+	struct spawn_template *tmpl = ctx->tmpl;
+	int ret;
+	u64 i;
+
+	for (i = 0; i < ctx->args.actions_len; i++) {
+		ret = spawn_template_apply_action(&ctx->actions[i]);
+		if (ret < 0)
+			goto out_exec_error;
+	}
+
+	if (!(ctx->args.flags & SPAWN_TEMPLATE_SPAWN_INHERIT_FDS)) {
+		ret = do_close_range(3, ~0U, 0);
+		if (ret < 0)
+			goto out_exec_error;
+	}
+
+	ret = kernel_execveat_file(tmpl->exec_file, "",
+				   u64_to_user_ptr(ctx->args.argv),
+				   u64_to_user_ptr(ctx->args.envp),
+				   AT_EMPTY_PATH);
+out_exec_error:
+	if (ret < 0)
+		do_exit(spawn_template_exit_status(ret));
+	return 0;
+}
+
 static bool spawn_template_file_exec_allowed(struct file *file)
 {
 	if (!S_ISREG(file_inode(file)->i_mode))
@@ -53,6 +317,18 @@ static const struct file_operations spawn_template_fops = {
 	.llseek		= noop_llseek,
 };
 
+static struct file *spawn_template_file_from_fd(int fd)
+{
+	CLASS(fd, f)(fd);
+
+	if (fd_empty(f))
+		return ERR_PTR(-EBADF);
+	if (fd_file(f)->f_op != &spawn_template_fops)
+		return ERR_PTR(-EINVAL);
+
+	return get_file(fd_file(f));
+}
+
 static int spawn_template_open_execfd(int execfd, struct file **file,
 				      bool *deny_write)
 {
@@ -178,3 +454,73 @@ SYSCALL_DEFINE2(spawn_template_create,
 	kfree(tmpl);
 	return ret;
 }
+
+SYSCALL_DEFINE3(spawn_template_spawn, int, template_fd,
+		struct spawn_template_spawn_args __user *, uargs,
+		size_t, usize)
+{
+	struct spawn_template_spawn_context *ctx;
+	struct kernel_clone_args kargs;
+	struct file *template_file;
+	int ret;
+
+	BUILD_BUG_ON(sizeof(struct spawn_template_spawn_args) !=
+		     SPAWN_TEMPLATE_SPAWN_ARGS_SIZE_VER0);
+
+	if (usize < SPAWN_TEMPLATE_SPAWN_ARGS_SIZE_VER0)
+		return -EINVAL;
+	if (usize > PAGE_SIZE)
+		return -E2BIG;
+
+	template_file = spawn_template_file_from_fd(template_fd);
+	if (IS_ERR(template_file))
+		return PTR_ERR(template_file);
+
+	if (!spawn_template_cred_matches(template_file->private_data)) {
+		ret = -EACCES;
+		goto out_put_template;
+	}
+
+	ctx = kzalloc_obj(*ctx, GFP_KERNEL);
+	if (!ctx) {
+		ret = -ENOMEM;
+		goto out_put_template;
+	}
+
+	ctx->tmpl = template_file->private_data;
+
+	ret = copy_struct_from_user(&ctx->args, sizeof(ctx->args), uargs,
+				    usize);
+	if (ret)
+		goto out_free_ctx;
+
+	if ((ctx->args.flags & ~SPAWN_TEMPLATE_SPAWN_INHERIT_FDS) ||
+	    !ctx->args.pidfd || ctx->args.reserved[0] ||
+	    ctx->args.reserved[1] || ctx->args.reserved[2] ||
+	    ctx->args.reserved[3]) {
+		ret = -EINVAL;
+		goto out_free_ctx;
+	}
+
+	ret = spawn_template_copy_actions(&ctx->actions, ctx->args.actions_len,
+					  ctx->args.actions);
+	if (ret)
+		goto out_free_ctx;
+
+	kargs = (struct kernel_clone_args) {
+		.flags		= CLONE_VM | CLONE_VFORK | CLONE_PIDFD,
+		.pidfd		= u64_to_user_ptr(ctx->args.pidfd),
+		.exit_signal	= SIGCHLD,
+		.fn		= spawn_template_child,
+		.fn_arg		= ctx,
+	};
+
+	ret = kernel_clone(&kargs);
+
+	kfree(ctx->actions);
+out_free_ctx:
+	kfree(ctx);
+out_put_template:
+	fput(template_file);
+	return ret;
+}
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4b41950488bd6..df7368edf6778 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -68,6 +68,7 @@ union bpf_attr;
 struct io_uring_params;
 struct clone_args;
 struct spawn_template_create_args;
+struct spawn_template_spawn_args;
 struct open_how;
 struct mount_attr;
 struct landlock_ruleset_attr;
@@ -824,6 +825,9 @@ asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
 asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size);
 asmlinkage long sys_spawn_template_create(struct spawn_template_create_args __user *uargs,
 					  size_t size);
+asmlinkage long sys_spawn_template_spawn(int template_fd,
+					 struct spawn_template_spawn_args __user *uargs,
+					 size_t size);
 
 asmlinkage long sys_execve(const char __user *filename,
 		const char __user *const __user *argv,
-- 
2.52.0


  parent reply	other threads:[~2026-05-28  9:56 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-28  9:52 [RFC PATCH v1 00/13] exec: add spawn templates for repeated executable startup Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 01/13] exec: factor argument setup out of do_execveat_common() Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 02/13] exec: add an internal helper for opened executables Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 03/13] file: expose helpers for in-kernel fd actions Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 04/13] exec: add spawn template UAPI definitions Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 05/13] exec: add spawn template file descriptors Li Chen
2026-05-28  9:52 ` Li Chen [this message]
2026-05-28  9:52 ` [RFC PATCH v1 07/13] exec: validate spawn template executable identity Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 08/13] binfmt_elf: cache ELF metadata for spawn templates Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 09/13] Documentation: describe " Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 10/13] exec: require absolute paths for path-created templates Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 11/13] exec: let close-range actions target the max fd Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 12/13] syscalls: add generic spawn template entries Li Chen
2026-05-28  9:52 ` [RFC PATCH v1 13/13] selftests/exec: cover spawn template basics Li Chen
2026-05-28 11:02 ` [RFC PATCH v1 00/13] exec: add spawn templates for repeated executable startup Christian Brauner
2026-06-01  2:47   ` Li Chen
2026-06-01 19:55   ` Kees Cook
2026-05-28 12:55 ` Mateusz Guzik
2026-06-01 15:11   ` Li Chen
2026-05-28 18:27 ` Andy Lutomirski
2026-06-02 12:07   ` Li Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260528095235.2491226-7-me@linux.beauty \
    --to=me@linux.beauty \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=brauner@kernel.org \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jack@suse.cz \
    --cc=kees@kernel.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=skhan@linuxfoundation.org \
    --cc=tglx@kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox