From: Mike Christie <michael.christie@oracle.com>
To: geert@linux-m68k.org, hdanton@sina.com, hch@infradead.org,
stefanha@redhat.com, jasowang@redhat.com, mst@redhat.com,
sgarzare@redhat.com, virtualization@lists.linux-foundation.org,
christian.brauner@ubuntu.com, axboe@kernel.dk,
ebiederm@xmission.com, linux-kernel@vger.kernel.org
Subject: [PATCH V8 6/8] vhost_task: Allow vhost layer to use copy_process
Date: Wed, 9 Feb 2022 17:29:37 -0600 [thread overview]
Message-ID: <20220209232939.9169-7-michael.christie@oracle.com> (raw)
In-Reply-To: <20220209232939.9169-1-michael.christie@oracle.com>
Qemu will create vhost devices in the kernel which perform network, SCSI,
etc IO and management operations from worker threads created by the
kthread API. Because the kthread API does a copy_process on the kthreadd
thread, the vhost layer has to use kthread_use_mm to access the Qemu
thread's memory and cgroup_attach_task_all to add itself to the Qemu
thread's cgroups, and it bypasses the RLIMIT_NPROC limit.
This patch adds a new struct vhost_task which can be used instead of
kthreads. They allow the vhost layer to use copy_process and inherit
the userspace process's mm and cgroups and the task is accounted for
under the userspace's nproc count.
Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
MAINTAINERS | 2 +
drivers/vhost/Kconfig | 5 ++
include/linux/sched/vhost_task.h | 23 ++++++
kernel/Makefile | 1 +
kernel/vhost_task.c | 123 +++++++++++++++++++++++++++++++
5 files changed, 154 insertions(+)
create mode 100644 include/linux/sched/vhost_task.h
create mode 100644 kernel/vhost_task.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 3e461db9cd91..4a3d9541e3cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -20477,7 +20477,9 @@ L: virtualization@lists.linux-foundation.org
L: netdev@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git
+F: kernel/vhost_task.c
F: drivers/vhost/
+F: include/linux/sched/vhost_task.h
F: include/linux/vhost_iotlb.h
F: include/uapi/linux/vhost.h
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 587fbae06182..b455d9ab6f3d 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -13,9 +13,14 @@ config VHOST_RING
This option is selected by any driver which needs to access
the host side of a virtio ring.
+config VHOST_TASK
+ bool
+ default n
+
config VHOST
tristate
select VHOST_IOTLB
+ select VHOST_TASK
help
This option is selected by any driver which needs to access
the core of vhost.
diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h
new file mode 100644
index 000000000000..50d02a25d37b
--- /dev/null
+++ b/include/linux/sched/vhost_task.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VHOST_TASK_H
+#define _LINUX_VHOST_TASK_H
+
+#include <linux/completion.h>
+
+struct task_struct;
+
+struct vhost_task {
+ int (*fn)(void *data);
+ void *data;
+ struct completion exited;
+ unsigned long flags;
+ struct task_struct *task;
+};
+
+struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, int node);
+__printf(2, 3)
+void vhost_task_start(struct vhost_task *vtsk, const char namefmt[], ...);
+void vhost_task_stop(struct vhost_task *vtsk);
+bool vhost_task_should_stop(struct vhost_task *vtsk);
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 56f4ee97f328..d82f388082b8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -15,6 +15,7 @@ obj-y = fork.o exec_domain.o panic.o \
obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o
obj-$(CONFIG_MODULES) += kmod.o
obj-$(CONFIG_MULTIUSER) += groups.o
+obj-$(CONFIG_VHOST_TASK) += vhost_task.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace internal ftrace files
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
new file mode 100644
index 000000000000..4ab1c195bc76
--- /dev/null
+++ b/kernel/vhost_task.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Oracle Corporation
+ */
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/sched/task.h>
+#include <linux/sched/vhost_task.h>
+#include <linux/sched/signal.h>
+
+enum vhost_task_flags {
+ VHOST_TASK_FLAGS_STOP,
+};
+
+static void vhost_task_fn(void *data)
+{
+ struct vhost_task *vtsk = data;
+ int ret;
+
+ ret = vtsk->fn(vtsk->data);
+ complete(&vtsk->exited);
+ do_exit(ret);
+}
+
+/**
+ * vhost_task_stop - stop a vhost_task
+ * @vtsk: vhost_task to stop
+ *
+ * Callers must call vhost_task_should_stop and return from their worker
+ * function when it returns true;
+ */
+void vhost_task_stop(struct vhost_task *vtsk)
+{
+ pid_t pid = vtsk->task->pid;
+
+ set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
+ wake_up_process(vtsk->task);
+ /*
+ * Make sure vhost_task_fn is no longer accessing the vhost_task before
+ * freeing it below. If userspace crashed or exited without closing,
+ * then the vhost_task->task could already be marked dead so
+ * kernel_wait will return early.
+ */
+ wait_for_completion(&vtsk->exited);
+ /*
+ * If we are just closing/removing a device and the parent process is
+ * not exiting then reap the task.
+ */
+ kernel_wait4(pid, NULL, __WCLONE, NULL);
+ kfree(vtsk);
+}
+EXPORT_SYMBOL_GPL(vhost_task_stop);
+
+/**
+ * vhost_task_should_stop - should the vhost task return from the work function
+ */
+bool vhost_task_should_stop(struct vhost_task *vtsk)
+{
+ return test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
+}
+EXPORT_SYMBOL_GPL(vhost_task_should_stop);
+
+/**
+ * vhost_task_create - create a copy of a process to be used by the kernel
+ * @fn: thread stack
+ * @arg: data to be passed to fn
+ * @node: numa node to allocate task from
+ *
+ * This returns a specialized task for use by the vhost layer or NULL on
+ * failure. The returned task is inactive, and the caller must fire it up
+ * through vhost_task_start().
+ */
+struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, int node)
+{
+ struct kernel_clone_args args = {
+ .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM,
+ .exit_signal = 0,
+ .stack = (unsigned long)vhost_task_fn,
+ .worker_flags = USER_WORKER | USER_WORKER_NO_FILES |
+ USER_WORKER_SIG_IGN,
+ };
+ struct vhost_task *vtsk;
+ struct task_struct *tsk;
+
+ vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL);
+ if (!vtsk)
+ return ERR_PTR(-ENOMEM);
+
+ init_completion(&vtsk->exited);
+ vtsk->data = arg;
+ vtsk->fn = fn;
+ args.stack_size = (unsigned long)vtsk;
+
+ tsk = copy_process(NULL, 0, node, &args);
+ if (IS_ERR(tsk)) {
+ kfree(vtsk);
+ return NULL;
+ }
+
+ vtsk->task = tsk;
+
+ return vtsk;
+}
+EXPORT_SYMBOL_GPL(vhost_task_create);
+
+/**
+ * vhost_task_start - start a vhost_task created with vhost_task_create
+ * @vtsk: vhost_task to wake up
+ * @namefmt: printf-style format string for the thread name
+ */
+void vhost_task_start(struct vhost_task *vtsk, const char namefmt[], ...)
+{
+ char name[TASK_COMM_LEN];
+ va_list args;
+
+ va_start(args, namefmt);
+ vsnprintf(name, sizeof(name), namefmt, args);
+ set_task_comm(vtsk->task, name);
+ va_end(args);
+
+ wake_up_new_task(vtsk->task);
+}
+EXPORT_SYMBOL_GPL(vhost_task_start);
--
2.25.1
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
next prev parent reply other threads:[~2022-02-09 23:30 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-09 23:29 [PATCH V8 0/8] Use copy_process in vhost layer Mike Christie
2022-02-09 23:29 ` [PATCH V8 1/8] fork: Make IO worker options flag based Mike Christie
2022-02-09 23:29 ` [PATCH V8 2/8] fork/vm: Move common PF_IO_WORKER behavior to new flag Mike Christie
2022-02-09 23:29 ` [PATCH V8 3/8] fork: add USER_WORKER flag to not dup/clone files Mike Christie
2022-02-09 23:29 ` [PATCH V8 4/8] fork: Add USER_WORKER flag to ignore signals Mike Christie
2022-02-09 23:29 ` [PATCH V8 5/8] fork: allow kernel code to call copy_process Mike Christie
2022-02-09 23:29 ` Mike Christie [this message]
2022-02-09 23:29 ` [PATCH V8 7/8] vhost: move worker thread fields to new struct Mike Christie
2022-02-09 23:29 ` [PATCH V8 8/8] vhost: use vhost_tasks for worker threads Mike Christie
2022-03-01 1:13 ` [PATCH V8 0/8] Use copy_process in vhost layer Mike Christie
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220209232939.9169-7-michael.christie@oracle.com \
--to=michael.christie@oracle.com \
--cc=axboe@kernel.dk \
--cc=christian.brauner@ubuntu.com \
--cc=ebiederm@xmission.com \
--cc=geert@linux-m68k.org \
--cc=hch@infradead.org \
--cc=hdanton@sina.com \
--cc=jasowang@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mst@redhat.com \
--cc=sgarzare@redhat.com \
--cc=stefanha@redhat.com \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).