From: "Michael S. Tsirkin" <mst@redhat.com>
To: Mike Christie <michael.christie@oracle.com>
Cc: virtualization@lists.linux-foundation.org, stefanha@redhat.com
Subject: Re: [PATCH v6 01/11] vhost: add vhost_worker pointer to vhost_virtqueue
Date: Tue, 4 Apr 2023 14:38:01 -0400 [thread overview]
Message-ID: <20230404143713-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20230328021717.42268-12-michael.christie@oracle.com> <20230328021717.42268-11-michael.christie@oracle.com> <20230328021717.42268-10-michael.christie@oracle.com> <20230328021717.42268-9-michael.christie@oracle.com> <20230328021717.42268-8-michael.christie@oracle.com> <20230328021717.42268-7-michael.christie@oracle.com> <20230328021717.42268-6-michael.christie@oracle.com> <20230328021717.42268-5-michael.christie@oracle.com> <20230328021717.42268-4-michael.christie@oracle.com> <20230328021717.42268-3-michael.christie@oracle.com> <20230328021717.42268-2-michael.christie@oracle.com>
On Mon, Mar 27, 2023 at 09:17:07PM -0500, Mike Christie wrote:
> This patchset allows userspace to map vqs to different workers. This
> patch adds a worker pointer to the vq so we can store that info.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
Thanks! Conflicts with a bunch of refactorings upstream:
could you rebase this on my tree and repost?
I need to queue this soon so it gets time in -next.
> ---
> drivers/vhost/vhost.c | 24 +++++++++++++-----------
> drivers/vhost/vhost.h | 1 +
> 2 files changed, 14 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 4368ee9b999c..e041e116afee 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -486,6 +486,7 @@ void vhost_dev_init(struct vhost_dev *dev,
> vq->log = NULL;
> vq->indirect = NULL;
> vq->heads = NULL;
> + vq->worker = NULL;
> vq->dev = dev;
> mutex_init(&vq->mutex);
> vhost_vq_reset(dev, vq);
> @@ -554,16 +555,15 @@ static void vhost_worker_free(struct vhost_dev *dev)
> kfree(worker);
> }
>
> -static int vhost_worker_create(struct vhost_dev *dev)
> +static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> {
> struct vhost_worker *worker;
> struct vhost_task *vtsk;
> char name[TASK_COMM_LEN];
> - int ret;
>
> worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
> if (!worker)
> - return -ENOMEM;
> + return NULL;
>
> dev->worker = worker;
> worker->kcov_handle = kcov_common_handle();
> @@ -571,25 +571,24 @@ static int vhost_worker_create(struct vhost_dev *dev)
> snprintf(name, sizeof(name), "vhost-%d", current->pid);
>
> vtsk = vhost_task_create(vhost_worker, worker, name);
> - if (!vtsk) {
> - ret = -ENOMEM;
> + if (!vtsk)
> goto free_worker;
> - }
>
> worker->vtsk = vtsk;
> vhost_task_start(vtsk);
> - return 0;
> + return worker;
>
> free_worker:
> kfree(worker);
> dev->worker = NULL;
> - return ret;
> + return NULL;
> }
>
> /* Caller should have device mutex */
> long vhost_dev_set_owner(struct vhost_dev *dev)
> {
> - int err;
> + struct vhost_worker *worker;
> + int err, i;
>
> /* Is there an owner already? */
> if (vhost_dev_has_owner(dev)) {
> @@ -600,9 +599,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
> vhost_attach_mm(dev);
>
> if (dev->use_worker) {
> - err = vhost_worker_create(dev);
> - if (err)
> + worker = vhost_worker_create(dev);
> + if (!worker)
> goto err_worker;
> +
> + for (i = 0; i < dev->nvqs; i++)
> + dev->vqs[i]->worker = worker;
> }
>
> err = vhost_dev_alloc_iovecs(dev);
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 0308638cdeee..e72b665ba3a5 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -74,6 +74,7 @@ struct vhost_vring_call {
> /* The virtqueue structure describes a queue attached to a device. */
> struct vhost_virtqueue {
> struct vhost_dev *dev;
> + struct vhost_worker *worker;
>
> /* The actual ring of buffers. */
> struct mutex mutex;
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:08PM -0500, Mike Christie wrote:
> In the next patches each vq might have different workers so one could
> have work but others do not. For net, we only want to check specific vqs,
> so this adds a helper to check if a vq has work pending and converts
> vhost-net to use it.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/net.c | 2 +-
> drivers/vhost/vhost.c | 6 +++---
> drivers/vhost/vhost.h | 2 +-
> 3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 07181cd8d52e..8ed63651b9eb 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -546,7 +546,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
> endtime = busy_clock() + busyloop_timeout;
>
> while (vhost_can_busy_poll(endtime)) {
> - if (vhost_has_work(&net->dev)) {
> + if (vhost_vq_has_work(vq)) {
> *busyloop_intr = true;
> break;
> }
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index e041e116afee..6567aed69ebb 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -262,11 +262,11 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
> EXPORT_SYMBOL_GPL(vhost_work_queue);
>
> /* A lockless hint for busy polling code to exit the loop */
> -bool vhost_has_work(struct vhost_dev *dev)
> +bool vhost_vq_has_work(struct vhost_virtqueue *vq)
> {
> - return dev->worker && !llist_empty(&dev->worker->work_list);
> + return vq->worker && !llist_empty(&vq->worker->work_list);
> }
> -EXPORT_SYMBOL_GPL(vhost_has_work);
> +EXPORT_SYMBOL_GPL(vhost_vq_has_work);
>
> void vhost_poll_queue(struct vhost_poll *poll)
> {
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index e72b665ba3a5..0dde119fb0ee 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -45,7 +45,6 @@ struct vhost_poll {
>
> void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
> void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
> -bool vhost_has_work(struct vhost_dev *dev);
>
> void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
> __poll_t mask, struct vhost_dev *dev);
> @@ -195,6 +194,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
> struct vhost_log *log, unsigned int *log_num);
> void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
>
> +bool vhost_vq_has_work(struct vhost_virtqueue *vq);
> bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
> int vhost_vq_init_access(struct vhost_virtqueue *);
> int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:09PM -0500, Mike Christie wrote:
> This patch has the core work queueing function take a worker for when we
> support multiple workers. It also adds a helper that takes a vq during
> queueing so modules can control which vq/worker to queue work on.
>
> This temp leaves vhost_work_queue. It will be removed when the drivers
> are converted in the next patches.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++----------------
> drivers/vhost/vhost.h | 1 +
> 2 files changed, 29 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 6567aed69ebb..cc2628ba9a77 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -231,6 +231,34 @@ void vhost_poll_stop(struct vhost_poll *poll)
> }
> EXPORT_SYMBOL_GPL(vhost_poll_stop);
>
> +static void vhost_work_queue_on(struct vhost_worker *worker,
> + struct vhost_work *work)
> +{
> + if (!worker)
> + return;
> +
> + if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
> + /* We can only add the work to the list after we're
> + * sure it was not in the list.
> + * test_and_set_bit() implies a memory barrier.
> + */
> + llist_add(&work->node, &worker->work_list);
> + wake_up_process(worker->vtsk->task);
> + }
> +}
> +
> +void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
> +{
> + vhost_work_queue_on(dev->worker, work);
> +}
> +EXPORT_SYMBOL_GPL(vhost_work_queue);
> +
> +void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
> +{
> + vhost_work_queue_on(vq->worker, work);
> +}
> +EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
> +
> void vhost_dev_flush(struct vhost_dev *dev)
> {
> struct vhost_flush_struct flush;
> @@ -245,22 +273,6 @@ void vhost_dev_flush(struct vhost_dev *dev)
> }
> EXPORT_SYMBOL_GPL(vhost_dev_flush);
>
> -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
> -{
> - if (!dev->worker)
> - return;
> -
> - if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
> - /* We can only add the work to the list after we're
> - * sure it was not in the list.
> - * test_and_set_bit() implies a memory barrier.
> - */
> - llist_add(&work->node, &dev->worker->work_list);
> - wake_up_process(dev->worker->vtsk->task);
> - }
> -}
> -EXPORT_SYMBOL_GPL(vhost_work_queue);
> -
> /* A lockless hint for busy polling code to exit the loop */
> bool vhost_vq_has_work(struct vhost_virtqueue *vq)
> {
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 0dde119fb0ee..b64ee4ef387d 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -194,6 +194,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
> struct vhost_log *log, unsigned int *log_num);
> void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
>
> +void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work);
> bool vhost_vq_has_work(struct vhost_virtqueue *vq);
> bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
> int vhost_vq_init_access(struct vhost_virtqueue *);
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:10PM -0500, Mike Christie wrote:
> This patch has the core work flush function take a worker. When we
> support multiple workers we can then flush each worker during device
> removal, stoppage, etc.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/vhost.c | 24 +++++++++++++++---------
> 1 file changed, 15 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index cc2628ba9a77..6160aa1cc922 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -247,6 +247,20 @@ static void vhost_work_queue_on(struct vhost_worker *worker,
> }
> }
>
> +static void vhost_work_flush_on(struct vhost_worker *worker)
> +{
> + struct vhost_flush_struct flush;
> +
> + if (!worker)
> + return;
> +
> + init_completion(&flush.wait_event);
> + vhost_work_init(&flush.work, vhost_flush_work);
> +
> + vhost_work_queue_on(worker, &flush.work);
> + wait_for_completion(&flush.wait_event);
> +}
> +
> void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
> {
> vhost_work_queue_on(dev->worker, work);
> @@ -261,15 +275,7 @@ EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
>
> void vhost_dev_flush(struct vhost_dev *dev)
> {
> - struct vhost_flush_struct flush;
> -
> - if (dev->worker) {
> - init_completion(&flush.wait_event);
> - vhost_work_init(&flush.work, vhost_flush_work);
> -
> - vhost_work_queue(dev, &flush.work);
> - wait_for_completion(&flush.wait_event);
> - }
> + vhost_work_flush_on(dev->worker);
> }
> EXPORT_SYMBOL_GPL(vhost_dev_flush);
>
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:11PM -0500, Mike Christie wrote:
> This has the drivers pass in their poll to vq mapping and then converts
> the core poll code to use the vq based helpers. In the next patches we
> will allow vqs to be handled by different workers, so to allow drivers
> to execute operations like queue, stop, flush, etc on specific polls/vqs
> we need to know the mappings.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/net.c | 6 ++++--
> drivers/vhost/vhost.c | 8 +++++---
> drivers/vhost/vhost.h | 4 +++-
> 3 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 8ed63651b9eb..4a9b757071a2 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -1342,8 +1342,10 @@ static int vhost_net_open(struct inode *inode, struct file *f)
> VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true,
> NULL);
>
> - vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
> - vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
> + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev,
> + vqs[VHOST_NET_VQ_TX]);
> + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev,
> + vqs[VHOST_NET_VQ_RX]);
>
> f->private_data = n;
> n->page_frag.page = NULL;
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 6160aa1cc922..6968f8fc17e8 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -187,13 +187,15 @@ EXPORT_SYMBOL_GPL(vhost_work_init);
>
> /* Init poll structure */
> void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
> - __poll_t mask, struct vhost_dev *dev)
> + __poll_t mask, struct vhost_dev *dev,
> + struct vhost_virtqueue *vq)
> {
> init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
> init_poll_funcptr(&poll->table, vhost_poll_func);
> poll->mask = mask;
> poll->dev = dev;
> poll->wqh = NULL;
> + poll->vq = vq;
>
> vhost_work_init(&poll->work, fn);
> }
> @@ -288,7 +290,7 @@ EXPORT_SYMBOL_GPL(vhost_vq_has_work);
>
> void vhost_poll_queue(struct vhost_poll *poll)
> {
> - vhost_work_queue(poll->dev, &poll->work);
> + vhost_vq_work_queue(poll->vq, &poll->work);
> }
> EXPORT_SYMBOL_GPL(vhost_poll_queue);
>
> @@ -510,7 +512,7 @@ void vhost_dev_init(struct vhost_dev *dev,
> vhost_vq_reset(dev, vq);
> if (vq->handle_kick)
> vhost_poll_init(&vq->poll, vq->handle_kick,
> - EPOLLIN, dev);
> + EPOLLIN, dev, vq);
> }
> }
> EXPORT_SYMBOL_GPL(vhost_dev_init);
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index b64ee4ef387d..d9b8abbe3a26 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -41,13 +41,15 @@ struct vhost_poll {
> struct vhost_work work;
> __poll_t mask;
> struct vhost_dev *dev;
> + struct vhost_virtqueue *vq;
> };
>
> void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
> void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
>
> void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
> - __poll_t mask, struct vhost_dev *dev);
> + __poll_t mask, struct vhost_dev *dev,
> + struct vhost_virtqueue *vq);
> int vhost_poll_start(struct vhost_poll *poll, struct file *file);
> void vhost_poll_stop(struct vhost_poll *poll);
> void vhost_poll_queue(struct vhost_poll *poll);
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:12PM -0500, Mike Christie wrote:
> Convert from vhost_work_queue to vhost_vq_work_queue.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/vsock.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index c8e6087769a1..1dcbc8669f95 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -285,7 +285,7 @@ vhost_transport_send_pkt(struct sk_buff *skb)
> atomic_inc(&vsock->queued_replies);
>
> virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
> - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
> + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
>
> rcu_read_unlock();
> return len;
> @@ -582,7 +582,7 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
> /* Some packets may have been queued before the device was started,
> * let's kick the send worker to send them.
> */
> - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
> + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
>
> mutex_unlock(&vsock->dev.mutex);
> return 0;
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:13PM -0500, Mike Christie wrote:
> This patch separates the scsi cmd completion code paths so we can complete
> cmds based on their vq instead of having all cmds complete on the same
> worker/CPU. This will be useful with the next patches that allow us to
> create mulitple worker threads and bind them to different vqs, so we can
> have completions running on different threads/CPUs.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> drivers/vhost/scsi.c | 56 ++++++++++++++++++++------------------------
> 1 file changed, 26 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> index 3b0b556c57ef..ecb5cd7450b8 100644
> --- a/drivers/vhost/scsi.c
> +++ b/drivers/vhost/scsi.c
> @@ -167,6 +167,7 @@ MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi d
>
> struct vhost_scsi_virtqueue {
> struct vhost_virtqueue vq;
> + struct vhost_scsi *vs;
> /*
> * Reference counting for inflight reqs, used for flush operation. At
> * each time, one reference tracks new commands submitted, while we
> @@ -181,6 +182,9 @@ struct vhost_scsi_virtqueue {
> struct vhost_scsi_cmd *scsi_cmds;
> struct sbitmap scsi_tags;
> int max_cmds;
> +
> + struct vhost_work completion_work;
> + struct llist_head completion_list;
> };
>
> struct vhost_scsi {
> @@ -190,12 +194,8 @@ struct vhost_scsi {
>
> struct vhost_dev dev;
> struct vhost_scsi_virtqueue *vqs;
> - unsigned long *compl_bitmap;
> struct vhost_scsi_inflight **old_inflight;
>
> - struct vhost_work vs_completion_work; /* cmd completion work item */
> - struct llist_head vs_completion_list; /* cmd completion queue */
> -
> struct vhost_work vs_event_work; /* evt injection work item */
> struct llist_head vs_event_list; /* evt injection queue */
>
> @@ -368,10 +368,11 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
> } else {
> struct vhost_scsi_cmd *cmd = container_of(se_cmd,
> struct vhost_scsi_cmd, tvc_se_cmd);
> - struct vhost_scsi *vs = cmd->tvc_vhost;
> + struct vhost_scsi_virtqueue *svq = container_of(cmd->tvc_vq,
> + struct vhost_scsi_virtqueue, vq);
>
> - llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list);
> - vhost_work_queue(&vs->dev, &vs->vs_completion_work);
> + llist_add(&cmd->tvc_completion_list, &svq->completion_list);
> + vhost_vq_work_queue(&svq->vq, &svq->completion_work);
> }
> }
>
> @@ -534,17 +535,17 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
> */
> static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
> {
> - struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
> - vs_completion_work);
> + struct vhost_scsi_virtqueue *svq = container_of(work,
> + struct vhost_scsi_virtqueue, completion_work);
> struct virtio_scsi_cmd_resp v_rsp;
> struct vhost_scsi_cmd *cmd, *t;
> struct llist_node *llnode;
> struct se_cmd *se_cmd;
> struct iov_iter iov_iter;
> - int ret, vq;
> + bool signal = false;
> + int ret;
>
> - bitmap_zero(vs->compl_bitmap, vs->dev.nvqs);
> - llnode = llist_del_all(&vs->vs_completion_list);
> + llnode = llist_del_all(&svq->completion_list);
> llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) {
> se_cmd = &cmd->tvc_se_cmd;
>
> @@ -564,21 +565,17 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
> cmd->tvc_in_iovs, sizeof(v_rsp));
> ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
> if (likely(ret == sizeof(v_rsp))) {
> - struct vhost_scsi_virtqueue *q;
> + signal = true;
> +
> vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
> - q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
> - vq = q - vs->vqs;
> - __set_bit(vq, vs->compl_bitmap);
> } else
> pr_err("Faulted on virtio_scsi_cmd_resp\n");
>
> vhost_scsi_release_cmd_res(se_cmd);
> }
>
> - vq = -1;
> - while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1))
> - < vs->dev.nvqs)
> - vhost_signal(&vs->dev, &vs->vqs[vq].vq);
> + if (signal)
> + vhost_signal(&svq->vs->dev, &svq->vq);
> }
>
> static struct vhost_scsi_cmd *
> @@ -1795,6 +1792,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
>
> static int vhost_scsi_open(struct inode *inode, struct file *f)
> {
> + struct vhost_scsi_virtqueue *svq;
> struct vhost_scsi *vs;
> struct vhost_virtqueue **vqs;
> int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs;
> @@ -1813,10 +1811,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
> }
> nvqs += VHOST_SCSI_VQ_IO;
>
> - vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL);
> - if (!vs->compl_bitmap)
> - goto err_compl_bitmap;
> -
> vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight),
> GFP_KERNEL | __GFP_ZERO);
> if (!vs->old_inflight)
> @@ -1831,7 +1825,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
> if (!vqs)
> goto err_local_vqs;
>
> - vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
> vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
>
> vs->vs_events_nr = 0;
> @@ -1842,8 +1835,14 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
> vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
> vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
> for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) {
> - vqs[i] = &vs->vqs[i].vq;
> - vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
> + svq = &vs->vqs[i];
> +
> + vqs[i] = &svq->vq;
> + svq->vs = vs;
> + init_llist_head(&svq->completion_list);
> + vhost_work_init(&svq->completion_work,
> + vhost_scsi_complete_cmd_work);
> + svq->vq.handle_kick = vhost_scsi_handle_kick;
> }
> vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV,
> VHOST_SCSI_WEIGHT, 0, true, NULL);
> @@ -1858,8 +1857,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
> err_vqs:
> kfree(vs->old_inflight);
> err_inflight:
> - bitmap_free(vs->compl_bitmap);
> -err_compl_bitmap:
> kvfree(vs);
> err_vs:
> return r;
> @@ -1879,7 +1876,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
> kfree(vs->dev.vqs);
> kfree(vs->vqs);
> kfree(vs->old_inflight);
> - bitmap_free(vs->compl_bitmap);
> kvfree(vs);
> return 0;
> }
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:14PM -0500, Mike Christie wrote:
> Convert from vhost_work_queue to vhost_vq_work_queue.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/scsi.c | 18 +++++++++---------
> 1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> index ecb5cd7450b8..3e86b5fbeca6 100644
> --- a/drivers/vhost/scsi.c
> +++ b/drivers/vhost/scsi.c
> @@ -363,8 +363,9 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
> if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) {
> struct vhost_scsi_tmf *tmf = container_of(se_cmd,
> struct vhost_scsi_tmf, se_cmd);
> + struct vhost_virtqueue *vq = &tmf->svq->vq;
>
> - vhost_work_queue(&tmf->vhost->dev, &tmf->vwork);
> + vhost_vq_work_queue(vq, &tmf->vwork);
> } else {
> struct vhost_scsi_cmd *cmd = container_of(se_cmd,
> struct vhost_scsi_cmd, tvc_se_cmd);
> @@ -1357,11 +1358,9 @@ static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
> }
>
> static void
> -vhost_scsi_send_evt(struct vhost_scsi *vs,
> - struct vhost_scsi_tpg *tpg,
> - struct se_lun *lun,
> - u32 event,
> - u32 reason)
> +vhost_scsi_send_evt(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
> + struct vhost_scsi_tpg *tpg, struct se_lun *lun,
> + u32 event, u32 reason)
> {
> struct vhost_scsi_evt *evt;
>
> @@ -1383,7 +1382,7 @@ vhost_scsi_send_evt(struct vhost_scsi *vs,
> }
>
> llist_add(&evt->list, &vs->vs_event_list);
> - vhost_work_queue(&vs->dev, &vs->vs_event_work);
> + vhost_vq_work_queue(vq, &vs->vs_event_work);
> }
>
> static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
> @@ -1397,7 +1396,8 @@ static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
> goto out;
>
> if (vs->vs_events_missed)
> - vhost_scsi_send_evt(vs, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
> + vhost_scsi_send_evt(vs, vq, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT,
> + 0);
> out:
> mutex_unlock(&vq->mutex);
> }
> @@ -2016,7 +2016,7 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg,
> goto unlock;
>
> if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG))
> - vhost_scsi_send_evt(vs, tpg, lun,
> + vhost_scsi_send_evt(vs, vq, tpg, lun,
> VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
> unlock:
> mutex_unlock(&vq->mutex);
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:15PM -0500, Mike Christie wrote:
> vhost_work_queue is no longer used. Each driver is using the poll or vq
> based queueing, so remove vhost_work_queue.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/vhost.c | 6 ------
> drivers/vhost/vhost.h | 1 -
> 2 files changed, 7 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 6968f8fc17e8..f812daf25648 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -263,12 +263,6 @@ static void vhost_work_flush_on(struct vhost_worker *worker)
> wait_for_completion(&flush.wait_event);
> }
>
> -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
> -{
> - vhost_work_queue_on(dev->worker, work);
> -}
> -EXPORT_SYMBOL_GPL(vhost_work_queue);
> -
> void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
> {
> vhost_work_queue_on(vq->worker, work);
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index d9b8abbe3a26..ef55fae2517c 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -45,7 +45,6 @@ struct vhost_poll {
> };
>
> void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
> -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
>
> void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
> __poll_t mask, struct vhost_dev *dev,
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:16PM -0500, Mike Christie wrote:
> With one worker we will always send the scsi cmd responses then send the
> TMF rsp, because LIO will always complete the scsi cmds first then call
> into us to send the TMF response.
>
> With multiple workers, the IO vq workers could be running while the
> TMF/ctl vq worker is so this has us do a flush before completing the TMF
> to make sure cmds are completed when it's work is later queued and run.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/scsi.c | 22 +++++++++++++++++++---
> drivers/vhost/vhost.c | 6 ++++++
> drivers/vhost/vhost.h | 1 +
> 3 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> index 3e86b5fbeca6..48dba4fe2dac 100644
> --- a/drivers/vhost/scsi.c
> +++ b/drivers/vhost/scsi.c
> @@ -1158,12 +1158,28 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work)
> {
> struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf,
> vwork);
> - int resp_code;
> + struct vhost_virtqueue *ctl_vq, *vq;
> + int resp_code, i;
> +
> + if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) {
> + /*
> + * Flush IO vqs that don't share a worker with the ctl to make
> + * sure they have sent their responses before us.
> + */
> + ctl_vq = &tmf->vhost->vqs[VHOST_SCSI_VQ_CTL].vq;
> + for (i = VHOST_SCSI_VQ_IO; i < tmf->vhost->dev.nvqs; i++) {
> + vq = &tmf->vhost->vqs[i].vq;
> +
> + if (vhost_vq_is_setup(vq) &&
> + vq->worker != ctl_vq->worker) {
> + vhost_vq_flush(vq);
> + }
> + }
>
> - if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE)
> resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
> - else
> + } else {
> resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED;
> + }
>
> vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs,
> tmf->vq_desc, &tmf->resp_iov, resp_code);
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index f812daf25648..1fa5e9a49092 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -275,6 +275,12 @@ void vhost_dev_flush(struct vhost_dev *dev)
> }
> EXPORT_SYMBOL_GPL(vhost_dev_flush);
>
> +void vhost_vq_flush(struct vhost_virtqueue *vq)
> +{
> + vhost_work_flush_on(vq->worker);
> +}
> +EXPORT_SYMBOL_GPL(vhost_vq_flush);
> +
> /* A lockless hint for busy polling code to exit the loop */
> bool vhost_vq_has_work(struct vhost_virtqueue *vq)
> {
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index ef55fae2517c..395707c680e5 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -53,6 +53,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file);
> void vhost_poll_stop(struct vhost_poll *poll);
> void vhost_poll_queue(struct vhost_poll *poll);
> void vhost_dev_flush(struct vhost_dev *dev);
> +void vhost_vq_flush(struct vhost_virtqueue *vq);
>
> struct vhost_log {
> u64 addr;
> --
> 2.25.1
On Mon, Mar 27, 2023 at 09:17:17PM -0500, Mike Christie wrote:
> For vhost-scsi with 3 vqs and a workload like that tries to use those vqs
> like:
>
> fio --filename=/dev/sdb --direct=1 --rw=randrw --bs=4k \
> --ioengine=libaio --iodepth=128 --numjobs=3
>
> the single vhost worker thread will become a bottlneck and we are stuck
> at around 500K IOPs no matter how many jobs, virtqueues, and CPUs are
> used.
>
> To better utilize virtqueues and available CPUs, this patch allows
> userspace to create workers and bind them to vqs. You can have N workers
> per dev and also share N workers with M vqs.
>
> With the patches and doing a worker per vq, we can scale to at least
> 16 vCPUs/vqs (that's my system limit) with the same command fio command
> above with numjobs=16:
>
> fio --filename=/dev/sdb --direct=1 --rw=randrw --bs=4k \
> --ioengine=libaio --iodepth=64 --numjobs=16
>
> which gives around 2326K IOPs.
>
> Note that for testing I dropped depth to 64 above because the vhost/virt
> layer supports only 1024 total commands per device. And the only tuning I
> did was set LIO's emulate_pr to 0 to avoid LIO's PR lock in the main IO
> path which becomes an issue at around 12 jobs/virtqueues.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/vhost.c | 177 ++++++++++++++++++++++++++++---
> drivers/vhost/vhost.h | 4 +-
> include/uapi/linux/vhost.h | 22 ++++
> include/uapi/linux/vhost_types.h | 15 +++
> 4 files changed, 204 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 1fa5e9a49092..e40699e83c6d 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -271,7 +271,11 @@ EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
>
> void vhost_dev_flush(struct vhost_dev *dev)
> {
> - vhost_work_flush_on(dev->worker);
> + struct vhost_worker *worker;
> + unsigned long i;
> +
> + xa_for_each(&dev->worker_xa, i, worker)
> + vhost_work_flush_on(worker);
> }
> EXPORT_SYMBOL_GPL(vhost_dev_flush);
>
> @@ -489,7 +493,6 @@ void vhost_dev_init(struct vhost_dev *dev,
> dev->umem = NULL;
> dev->iotlb = NULL;
> dev->mm = NULL;
> - dev->worker = NULL;
> dev->iov_limit = iov_limit;
> dev->weight = weight;
> dev->byte_weight = byte_weight;
> @@ -499,7 +502,7 @@ void vhost_dev_init(struct vhost_dev *dev,
> INIT_LIST_HEAD(&dev->read_list);
> INIT_LIST_HEAD(&dev->pending_list);
> spin_lock_init(&dev->iotlb_lock);
> -
> + xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC);
>
> for (i = 0; i < dev->nvqs; ++i) {
> vq = dev->vqs[i];
> @@ -562,32 +565,67 @@ static void vhost_detach_mm(struct vhost_dev *dev)
> dev->mm = NULL;
> }
>
> -static void vhost_worker_free(struct vhost_dev *dev)
> +static void vhost_worker_put(struct vhost_dev *dev, struct vhost_worker *worker)
> {
> - struct vhost_worker *worker = dev->worker;
> -
> if (!worker)
> return;
>
> - dev->worker = NULL;
> + if (!refcount_dec_and_test(&worker->refcount))
> + return;
> +
> WARN_ON(!llist_empty(&worker->work_list));
> vhost_task_stop(worker->vtsk);
> kfree(worker);
> }
>
> +static void vhost_vq_detach_worker(struct vhost_virtqueue *vq)
> +{
> + if (vq->worker)
> + vhost_worker_put(vq->dev, vq->worker);
> + vq->worker = NULL;
> +}
> +
> +static void vhost_workers_free(struct vhost_dev *dev)
> +{
> + struct vhost_worker *worker;
> + unsigned long i;
> +
> + if (!dev->use_worker)
> + return;
> +
> + for (i = 0; i < dev->nvqs; i++)
> + vhost_vq_detach_worker(dev->vqs[i]);
> + /*
> + * Drop the refcount taken during allocation, and handle the default
> + * worker and the cases where userspace might have crashed or was lazy
> + * and did a VHOST_NEW_WORKER but not a VHOST_FREE_WORKER.
> + */
> + xa_for_each(&dev->worker_xa, i, worker) {
> + xa_erase(&dev->worker_xa, worker->id);
> + vhost_worker_put(dev, worker);
> + }
> + xa_destroy(&dev->worker_xa);
> +}
> +
> static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> {
> struct vhost_worker *worker;
> struct vhost_task *vtsk;
> char name[TASK_COMM_LEN];
> + int ret;
> + u32 id;
>
> worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
> if (!worker)
> return NULL;
>
> - dev->worker = worker;
> worker->kcov_handle = kcov_common_handle();
> init_llist_head(&worker->work_list);
> + /*
> + * We increase the refcount for the initial creation and then
> + * later each time it's attached to a virtqueue.
> + */
> + refcount_set(&worker->refcount, 1);
> snprintf(name, sizeof(name), "vhost-%d", current->pid);
>
> vtsk = vhost_task_create(vhost_worker, worker, name);
> @@ -596,14 +634,104 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>
> worker->vtsk = vtsk;
> vhost_task_start(vtsk);
> +
> + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> + if (ret < 0)
> + goto stop_worker;
> + worker->id = id;
> +
> return worker;
>
> +stop_worker:
> + vhost_task_stop(vtsk);
> free_worker:
> kfree(worker);
> - dev->worker = NULL;
> return NULL;
> }
>
> +/* Caller must have device and virtqueue mutex */
> +static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
> + struct vhost_worker *worker)
> +{
> + refcount_inc(&worker->refcount);
> + vhost_vq_detach_worker(vq);
> + vq->worker = worker;
> +}
> +
> +/* Caller must have device and virtqueue mutex */
> +static int vhost_vq_attach_worker(struct vhost_virtqueue *vq,
> + struct vhost_vring_worker *info)
> +{
> + unsigned long index = info->worker_id;
> + struct vhost_dev *dev = vq->dev;
> + struct vhost_worker *worker;
> +
> + if (!dev->use_worker)
> + return -EINVAL;
> +
> + /*
> + * We don't support setting a worker on an active vq to make flushing
> + * and removal simple.
> + */
> + if (vhost_vq_get_backend(vq))
> + return -EBUSY;
> +
> + worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT);
> + if (!worker || worker->id != info->worker_id)
> + return -ENODEV;
> +
> + __vhost_vq_attach_worker(vq, worker);
> + return 0;
> +}
> +
> +/* Caller must have device mutex */
> +static int vhost_new_worker(struct vhost_dev *dev,
> + struct vhost_worker_state *info)
> +{
> + struct vhost_worker *worker;
> +
> + if (!dev->use_worker)
> + return -EINVAL;
> +
> + worker = vhost_worker_create(dev);
> + if (!worker)
> + return -ENOMEM;
> +
> + info->worker_id = worker->id;
> + return 0;
> +}
> +
> +/* Caller must have device mutex */
> +static int vhost_free_worker(struct vhost_dev *dev,
> + struct vhost_worker_state *info)
> +{
> + unsigned long index = info->worker_id;
> + struct vhost_worker *worker;
> +
> + if (!dev->use_worker)
> + return -EINVAL;
> +
> + worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT);
> + if (!worker || worker->id != info->worker_id)
> + return -ENODEV;
> +
> + /*
> + * We can free the worker if it's not attached to any virtqueues.
> + */
> + if (refcount_read(&worker->refcount) != 1)
> + return -EBUSY;
> +
> + xa_erase(&dev->worker_xa, worker->id);
> + /*
> + * Make sure if there was a flush that saw the worker in the XA that
> + * it has completed.
> + */
> + vhost_work_flush_on(worker);
> +
> + vhost_worker_put(dev, worker);
> + return 0;
> +}
> +
> /* Caller should have device mutex */
> long vhost_dev_set_owner(struct vhost_dev *dev)
> {
> @@ -624,7 +752,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
> goto err_worker;
>
> for (i = 0; i < dev->nvqs; i++)
> - dev->vqs[i]->worker = worker;
> + __vhost_vq_attach_worker(dev->vqs[i], worker);
> }
>
> err = vhost_dev_alloc_iovecs(dev);
> @@ -633,7 +761,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>
> return 0;
> err_iovecs:
> - vhost_worker_free(dev);
> + vhost_workers_free(dev);
> err_worker:
> vhost_detach_mm(dev);
> err_mm:
> @@ -726,7 +854,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
> dev->iotlb = NULL;
> vhost_clear_msg(dev);
> wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
> - vhost_worker_free(dev);
> + vhost_workers_free(dev);
> vhost_detach_mm(dev);
> }
> EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
> @@ -1616,6 +1744,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
> struct eventfd_ctx *ctx = NULL;
> u32 __user *idxp = argp;
> struct vhost_virtqueue *vq;
> + struct vhost_vring_worker w;
> struct vhost_vring_state s;
> struct vhost_vring_file f;
> u32 idx;
> @@ -1723,7 +1852,16 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
> if (copy_to_user(argp, &s, sizeof(s)))
> r = -EFAULT;
> break;
> - default:
> + case VHOST_ATTACH_VRING_WORKER:
> + if (copy_from_user(&w, argp, sizeof(w))) {
> + r = -EFAULT;
> + break;
> + }
> + r = vhost_vq_attach_worker(vq, &w);
> + if (!r && copy_to_user(argp, &w, sizeof(w)))
> + r = -EFAULT;
> + break;
> +default:
> r = -ENOIOCTLCMD;
> }
>
> @@ -1776,6 +1914,7 @@ EXPORT_SYMBOL_GPL(vhost_init_device_iotlb);
> /* Caller must have device mutex */
> long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
> {
> + struct vhost_worker_state w;
> struct eventfd_ctx *ctx;
> u64 p;
> long r;
> @@ -1836,6 +1975,18 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
> if (ctx)
> eventfd_ctx_put(ctx);
> break;
> + case VHOST_NEW_WORKER:
> + r = vhost_new_worker(d, &w);
> + if (!r && copy_to_user(argp, &w, sizeof(w)))
> + r = -EFAULT;
> + break;
> + case VHOST_FREE_WORKER:
> + if (copy_from_user(&w, argp, sizeof(w))) {
> + r = -EFAULT;
> + break;
> + }
> + r = vhost_free_worker(d, &w);
> + break;
> default:
> r = -ENOIOCTLCMD;
> break;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 395707c680e5..a67ae8293c38 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -30,6 +30,8 @@ struct vhost_worker {
> struct vhost_task *vtsk;
> struct llist_head work_list;
> u64 kcov_handle;
> + refcount_t refcount;
> + u32 id;
> };
>
> /* Poll a file (eventfd or socket) */
> @@ -156,7 +158,6 @@ struct vhost_dev {
> struct vhost_virtqueue **vqs;
> int nvqs;
> struct eventfd_ctx *log_ctx;
> - struct vhost_worker *worker;
> struct vhost_iotlb *umem;
> struct vhost_iotlb *iotlb;
> spinlock_t iotlb_lock;
> @@ -166,6 +167,7 @@ struct vhost_dev {
> int iov_limit;
> int weight;
> int byte_weight;
> + struct xarray worker_xa;
> bool use_worker;
> int (*msg_handler)(struct vhost_dev *dev, u32 asid,
> struct vhost_iotlb_msg *msg);
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index 92e1b700b51c..7329e7f349dd 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -45,6 +45,23 @@
> #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
> /* Specify an eventfd file descriptor to signal on log write. */
> #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
> +/* By default, a device gets one vhost_worker that its virtqueues share. This
> + * command allows the owner of the device to create an additional vhost_worker
> + * for the device. It can later be bound to 1 or more of its virtqueues using
> + * the VHOST_ATTACH_VRING_WORKER command.
> + *
> + * This must be called after VHOST_SET_OWNER and the caller must be the owner
> + * of the device. The new thread will inherit caller's cgroups and namespaces,
> + * and will share the caller's memory space. The new thread will also be
> + * counted against the caller's RLIMIT_NPROC value.
> + */
> +#define VHOST_NEW_WORKER _IOW(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
> +/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
> + * virtqueue. If userspace is not able to call this for workers its created,
> + * the kernel will free all the device's workers when the device is closed and
> + * the last reference to the device has been released.
> + */
> +#define VHOST_FREE_WORKER _IOR(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
>
> /* Ring setup. */
> /* Set number of descriptors in ring. This parameter can not
> @@ -70,6 +87,11 @@
> #define VHOST_VRING_BIG_ENDIAN 1
> #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
> #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> +/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
> + * virtqueues. This must be done before the virtqueue is active.
> + */
> +#define VHOST_ATTACH_VRING_WORKER _IOR(VHOST_VIRTIO, 0x15, \
> + struct vhost_vring_worker)
>
> /* The following ioctls use eventfd file descriptors to signal and poll
> * for events. */
> diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
> index c5690a8992d8..ad0fe2e721be 100644
> --- a/include/uapi/linux/vhost_types.h
> +++ b/include/uapi/linux/vhost_types.h
> @@ -47,6 +47,21 @@ struct vhost_vring_addr {
> __u64 log_guest_addr;
> };
>
> +struct vhost_worker_state {
> + /*
> + * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id.
> + * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker
> + * to free.
> + */
> + int worker_id;
> +};
> +
> +struct vhost_vring_worker {
> + unsigned int index;
> + /* The id of the vhost_worker returned from VHOST_NEW_WORKER */
> + int worker_id;
> +};
> +
> /* no alignment requirement */
> struct vhost_iotlb_msg {
> __u64 iova;
> --
> 2.25.1
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
next prev parent reply other threads:[~2023-04-04 18:38 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-28 2:17 [PATCH v6 00/11] vhost: multiple worker support Mike Christie
2023-03-28 2:17 ` [PATCH v6 01/11] vhost: add vhost_worker pointer to vhost_virtqueue Mike Christie
2023-04-04 7:04 ` Jason Wang
2023-04-04 18:38 ` Michael S. Tsirkin [this message]
2023-04-04 23:15 ` Mike Christie
2023-03-28 2:17 ` [PATCH v6 02/11] vhost, vhost-net: add helper to check if vq has work Mike Christie
2023-04-04 7:05 ` Jason Wang
2023-03-28 2:17 ` [PATCH v6 03/11] vhost: take worker or vq instead of dev for queueing Mike Christie
2023-04-04 7:07 ` Jason Wang
2023-03-28 2:17 ` [PATCH v6 04/11] vhost: take worker or vq instead of dev for flushing Mike Christie
2023-04-04 7:08 ` Jason Wang
2023-03-28 2:17 ` [PATCH v6 05/11] vhost: convert poll work to be vq based Mike Christie
2023-03-28 2:17 ` [PATCH v6 06/11] vhost-sock: convert to vhost_vq_work_queue Mike Christie
2023-03-28 2:17 ` [PATCH v6 07/11] vhost-scsi: make SCSI cmd completion per vq Mike Christie
2023-03-28 2:17 ` [PATCH v6 08/11] vhost-scsi: convert to vhost_vq_work_queue Mike Christie
2023-03-28 2:17 ` [PATCH v6 09/11] vhost: remove vhost_work_queue Mike Christie
2023-03-28 2:17 ` [PATCH v6 10/11] vhost-scsi: flush IO vqs then send TMF rsp Mike Christie
2023-03-28 2:17 ` [PATCH v6 11/11] vhost: allow userspace to create workers Mike Christie
2023-04-04 8:00 ` Jason Wang
2023-04-04 23:08 ` Mike Christie
2023-04-10 7:04 ` Jason Wang
2023-04-10 17:16 ` Mike Christie
2023-04-11 3:00 ` Jason Wang
2023-04-11 22:15 ` Mike Christie
2023-04-12 7:56 ` Jason Wang
2023-04-13 22:36 ` Mike Christie
2023-04-14 2:26 ` Jason Wang
2023-04-14 16:49 ` Mike Christie
2023-04-21 6:49 ` [PATCH v6 00/11] vhost: multiple worker support Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230404143713-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=michael.christie@oracle.com \
--cc=stefanha@redhat.com \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).