From: "Michael S. Tsirkin" <mst@redhat.com>
To: Cindy Lu <lulu@redhat.com>
Cc: jasowang@redhat.com, michael.christie@oracle.com,
sgarzare@redhat.com, linux-kernel@vger.kernel.org,
virtualization@lists.linux-foundation.org,
netdev@vger.kernel.org
Subject: Re: [PATCH v9 2/4] vhost: Reintroduce kthread mode support in vhost
Date: Mon, 21 Apr 2025 06:58:35 -0400 [thread overview]
Message-ID: <20250421065549-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20250421024457.112163-3-lulu@redhat.com>
On Mon, Apr 21, 2025 at 10:44:08AM +0800, Cindy Lu wrote:
> This patch reintroduces kthread mode support in vhost,
> It also introduces struct vhost_worker_ops to abstract
> worker create/stop/wakeup operations.
>
> * Bring back the original vhost_worker() implementation,
> and renamed to vhost_run_work_kthread_list().
>
> * Add cgroup support for the kthread
>
> * Introduce struct vhost_worker_ops:
> - Encapsulates create / stop / wake‑up callbacks.
> - vhost_worker_create() selects the proper ops according to
> inherit_owner.
>
> This partially reverts or improves upon:
> commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads")
> commit 1cdaafa1b8b4 ("vhost: replace single worker pointer with xarray")
>
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
> drivers/vhost/vhost.c | 188 ++++++++++++++++++++++++++++++++++++++----
> drivers/vhost/vhost.h | 12 +++
> 2 files changed, 182 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 250dc43f1786..be97028a8baf 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -22,6 +22,7 @@
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
> #include <linux/kthread.h>
> +#include <linux/cgroup.h>
> #include <linux/module.h>
> #include <linux/sort.h>
> #include <linux/sched/mm.h>
> @@ -242,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker *worker,
> * test_and_set_bit() implies a memory barrier.
> */
> llist_add(&work->node, &worker->work_list);
> - vhost_task_wake(worker->vtsk);
> + worker->ops->wakeup(worker);
> }
> }
>
> @@ -388,6 +389,44 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> __vhost_vq_meta_reset(vq);
> }
>
> +static int vhost_run_work_kthread_list(void *data)
> +{
> + struct vhost_worker *worker = data;
> + struct vhost_work *work, *work_next;
> + struct vhost_dev *dev = worker->dev;
> + struct llist_node *node;
> +
> + kthread_use_mm(dev->mm);
> +
> + for (;;) {
> + /* mb paired w/ kthread_stop */
> + set_current_state(TASK_INTERRUPTIBLE);
> +
> + if (kthread_should_stop()) {
> + __set_current_state(TASK_RUNNING);
> + break;
> + }
> + node = llist_del_all(&worker->work_list);
> + if (!node)
> + schedule();
> +
> + node = llist_reverse_order(node);
> + /* make sure flag is seen after deletion */
> + smp_wmb();
> + llist_for_each_entry_safe(work, work_next, node, node) {
> + clear_bit(VHOST_WORK_QUEUED, &work->flags);
> + __set_current_state(TASK_RUNNING);
> + kcov_remote_start_common(worker->kcov_handle);
> + work->fn(work);
> + kcov_remote_stop();
> + cond_resched();
> + }
> + }
> + kthread_unuse_mm(dev->mm);
> +
> + return 0;
> +}
> +
> static bool vhost_run_work_list(void *data)
> {
> struct vhost_worker *worker = data;
> @@ -582,6 +621,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
> }
> EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
>
> +struct vhost_attach_cgroups_struct {
> + struct vhost_work work;
> + struct task_struct *owner;
> + int ret;
> +};
> +
> +static void vhost_attach_cgroups_work(struct vhost_work *work)
> +{
> + struct vhost_attach_cgroups_struct *s;
> +
> + s = container_of(work, struct vhost_attach_cgroups_struct, work);
> + s->ret = cgroup_attach_task_all(s->owner, current);
> +}
> +
> +static int vhost_attach_task_to_cgroups(struct vhost_worker *worker)
> +{
> + struct vhost_attach_cgroups_struct attach;
> + int saved_cnt;
> +
> + attach.owner = current;
> +
> + vhost_work_init(&attach.work, vhost_attach_cgroups_work);
> + vhost_worker_queue(worker, &attach.work);
> +
> + mutex_lock(&worker->mutex);
> +
> + /*
> + * Bypass attachment_cnt check in __vhost_worker_flush:
> + * Temporarily change it to INT_MAX to bypass the check
> + */
> + saved_cnt = worker->attachment_cnt;
> + worker->attachment_cnt = INT_MAX;
> + __vhost_worker_flush(worker);
> + worker->attachment_cnt = saved_cnt;
You mean this one?
if (!worker->attachment_cnt || worker->killed)
return;
Just introduce a variant of __vhost_worker_flush that
skips this check.
E.g.
Rename __vhost_worker_flush -> _vhost_worker_flush.
then rework:
static void _vhost_worker_flush(struct vhost_worker *worker)
{
struct vhost_flush_struct flush;
if (!worker->attachment_cnt || worker->killed)
return;
__vhost_worker_flush(worker);
}
> +
> + mutex_unlock(&worker->mutex);
> +
> + return attach.ret;
> +}
> +
> /* Caller should have device mutex */
> bool vhost_dev_has_owner(struct vhost_dev *dev)
> {
> @@ -627,7 +706,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev,
>
> WARN_ON(!llist_empty(&worker->work_list));
> xa_erase(&dev->worker_xa, worker->id);
> - vhost_task_stop(worker->vtsk);
> + worker->ops->stop(worker);
> kfree(worker);
> }
>
> @@ -650,42 +729,115 @@ static void vhost_workers_free(struct vhost_dev *dev)
> xa_destroy(&dev->worker_xa);
> }
>
> +static void vhost_task_wakeup(struct vhost_worker *worker)
> +{
> + return vhost_task_wake(worker->vtsk);
> +}
> +
> +static void vhost_kthread_wakeup(struct vhost_worker *worker)
> +{
> + wake_up_process(worker->kthread_task);
> +}
> +
> +static void vhost_task_do_stop(struct vhost_worker *worker)
> +{
> + return vhost_task_stop(worker->vtsk);
> +}
> +
> +static void vhost_kthread_do_stop(struct vhost_worker *worker)
> +{
> + kthread_stop(worker->kthread_task);
> +}
> +
> +static int vhost_task_worker_create(struct vhost_worker *worker,
> + struct vhost_dev *dev, const char *name)
> +{
> + struct vhost_task *vtsk;
> + u32 id;
> + int ret;
> +
> + vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
> + worker, name);
> + if (IS_ERR(vtsk))
> + return PTR_ERR(vtsk);
> +
> + worker->vtsk = vtsk;
> + vhost_task_start(vtsk);
> + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> + if (ret < 0) {
> + vhost_task_do_stop(worker);
> + return ret;
> + }
> + worker->id = id;
> + return 0;
> +}
> +
> +static int vhost_kthread_worker_create(struct vhost_worker *worker,
> + struct vhost_dev *dev, const char *name)
> +{
> + struct task_struct *task;
> + u32 id;
> + int ret;
> +
> + task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name);
> + if (IS_ERR(task))
> + return PTR_ERR(task);
> +
> + worker->kthread_task = task;
> + wake_up_process(task);
> + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> + if (ret < 0)
> + goto stop_worker;
> +
> + ret = vhost_attach_task_to_cgroups(worker);
> + if (ret)
> + goto stop_worker;
> +
> + worker->id = id;
> + return 0;
> +
> +stop_worker:
> + vhost_kthread_do_stop(worker);
> + return ret;
> +}
> +
> +static const struct vhost_worker_ops kthread_ops = {
> + .create = vhost_kthread_worker_create,
> + .stop = vhost_kthread_do_stop,
> + .wakeup = vhost_kthread_wakeup,
> +};
> +
> +static const struct vhost_worker_ops vhost_task_ops = {
> + .create = vhost_task_worker_create,
> + .stop = vhost_task_do_stop,
> + .wakeup = vhost_task_wakeup,
> +};
> +
> static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> {
> struct vhost_worker *worker;
> - struct vhost_task *vtsk;
> char name[TASK_COMM_LEN];
> int ret;
> - u32 id;
> + const struct vhost_worker_ops *ops =
> + dev->inherit_owner ? &vhost_task_ops : &kthread_ops;
>
> worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
> if (!worker)
> return NULL;
>
> worker->dev = dev;
> + worker->ops = ops;
> snprintf(name, sizeof(name), "vhost-%d", current->pid);
>
> - vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
> - worker, name);
> - if (IS_ERR(vtsk))
> - goto free_worker;
> -
> mutex_init(&worker->mutex);
> init_llist_head(&worker->work_list);
> worker->kcov_handle = kcov_common_handle();
> - worker->vtsk = vtsk;
> -
> - vhost_task_start(vtsk);
> -
> - ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> + ret = ops->create(worker, dev, name);
> if (ret < 0)
> - goto stop_worker;
> - worker->id = id;
> + goto free_worker;
>
> return worker;
>
> -stop_worker:
> - vhost_task_stop(vtsk);
> free_worker:
> kfree(worker);
> return NULL;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 19bb94922a0e..af4b2f7d3b91 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -26,7 +26,18 @@ struct vhost_work {
> unsigned long flags;
> };
>
> +struct vhost_worker;
> +struct vhost_dev;
> +
> +struct vhost_worker_ops {
> + int (*create)(struct vhost_worker *worker, struct vhost_dev *dev,
> + const char *name);
> + void (*stop)(struct vhost_worker *worker);
> + void (*wakeup)(struct vhost_worker *worker);
> +};
> +
> struct vhost_worker {
> + struct task_struct *kthread_task;
> struct vhost_task *vtsk;
> struct vhost_dev *dev;
> /* Used to serialize device wide flushing with worker swapping. */
> @@ -36,6 +47,7 @@ struct vhost_worker {
> u32 id;
> int attachment_cnt;
> bool killed;
> + const struct vhost_worker_ops *ops;
> };
>
> /* Poll a file (eventfd or socket) */
> --
> 2.45.0
next prev parent reply other threads:[~2025-04-21 10:58 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-21 2:44 [PATCH v9 0/4] vhost: Add support of kthread API Cindy Lu
2025-04-21 2:44 ` [PATCH v9 1/4] vhost: Add a new parameter in vhost_dev to allow user select kthread Cindy Lu
2025-04-21 3:25 ` Jason Wang
2025-04-22 13:36 ` Stefano Garzarella
2025-04-21 2:44 ` [PATCH v9 2/4] vhost: Reintroduce kthread mode support in vhost Cindy Lu
2025-04-21 3:39 ` Jason Wang
2025-04-21 10:59 ` Michael S. Tsirkin
2025-04-21 10:58 ` Michael S. Tsirkin [this message]
2025-04-21 2:44 ` [PATCH v9 3/4] vhost: add VHOST_FORK_FROM_OWNER ioctl and validate inherit_owner Cindy Lu
2025-04-21 3:40 ` Jason Wang
2025-04-22 13:45 ` Stefano Garzarella
2025-04-21 2:44 ` [PATCH v9 4/4] vhost: Add a KConfig knob to enable IOCTL VHOST_FORK_FROM_OWNER Cindy Lu
2025-04-21 3:45 ` Jason Wang
2025-04-21 3:46 ` Jason Wang
2025-04-29 3:39 ` Jason Wang
2025-04-29 10:55 ` Michael S. Tsirkin
2025-04-30 3:34 ` Jason Wang
2025-04-30 9:27 ` Michael S. Tsirkin
2025-05-13 4:08 ` Jason Wang
2025-05-13 7:08 ` Michael S. Tsirkin
2025-05-14 2:52 ` Jason Wang
2025-05-15 6:05 ` Cindy Lu
2025-05-15 6:14 ` Michael S. Tsirkin
2025-05-16 1:31 ` Jason Wang
2025-05-16 10:39 ` Michael S. Tsirkin
2025-05-19 7:34 ` Jason Wang
2025-04-22 13:50 ` Stefano Garzarella
2025-04-23 1:01 ` Cindy Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250421065549-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=jasowang@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lulu@redhat.com \
--cc=michael.christie@oracle.com \
--cc=netdev@vger.kernel.org \
--cc=sgarzare@redhat.com \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.