virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Jason Wang <jasowang@redhat.com>
Cc: linux-scsi@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	target-devel@vger.kernel.org, stefanha@redhat.com,
	pbonzini@redhat.com
Subject: Re: [PATCH V3 11/11] vhost: allow userspace to create workers
Date: Tue, 26 Oct 2021 09:09:52 -0400	[thread overview]
Message-ID: <20211026090923-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <8aee8f07-76bd-f111-bc5f-fc5cad46ce56@redhat.com>

On Tue, Oct 26, 2021 at 01:37:14PM +0800, Jason Wang wrote:
> 
> 在 2021/10/22 下午1:19, Mike Christie 写道:
> > This patch allows userspace to create workers and bind them to vqs. You
> > can have N workers per dev and also share N workers with M vqs.
> > 
> > Signed-off-by: Mike Christie <michael.christie@oracle.com>
> 
> 
> A question, who is the best one to determine the binding? Is it the VMM
> (Qemu etc) or the management stack? If the latter, it looks to me it's
> better to expose this via sysfs?

I think it's a bit much to expect this from management.

> 
> > ---
> >   drivers/vhost/vhost.c            | 99 ++++++++++++++++++++++++++++----
> >   drivers/vhost/vhost.h            |  2 +-
> >   include/uapi/linux/vhost.h       | 11 ++++
> >   include/uapi/linux/vhost_types.h | 12 ++++
> >   4 files changed, 112 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 04f43a6445e1..c86e88d7f35c 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -493,7 +493,6 @@ void vhost_dev_init(struct vhost_dev *dev,
> >   	dev->umem = NULL;
> >   	dev->iotlb = NULL;
> >   	dev->mm = NULL;
> > -	dev->worker = NULL;
> >   	dev->iov_limit = iov_limit;
> >   	dev->weight = weight;
> >   	dev->byte_weight = byte_weight;
> > @@ -576,20 +575,40 @@ static void vhost_worker_stop(struct vhost_worker *worker)
> >   	wait_for_completion(worker->exit_done);
> >   }
> > -static void vhost_worker_free(struct vhost_dev *dev)
> > -{
> > -	struct vhost_worker *worker = dev->worker;
> > +static void vhost_worker_put(struct vhost_worker *worker)
> > +{
> >   	if (!worker)
> >   		return;
> > -	dev->worker = NULL;
> > +	if (!refcount_dec_and_test(&worker->refcount))
> > +		return;
> > +
> >   	WARN_ON(!llist_empty(&worker->work_list));
> >   	vhost_worker_stop(worker);
> >   	kfree(worker);
> >   }
> > -static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> > +static void vhost_vq_clear_worker(struct vhost_virtqueue *vq)
> > +{
> > +	if (vq->worker)
> > +		vhost_worker_put(vq->worker);
> > +	vq->worker = NULL;
> > +}
> > +
> > +static void vhost_workers_free(struct vhost_dev *dev)
> > +{
> > +	int i;
> > +
> > +	if (!dev->use_worker)
> > +		return;
> > +
> > +	for (i = 0; i < dev->nvqs; i++)
> > +		vhost_vq_clear_worker(dev->vqs[i]);
> > +}
> > +
> > +static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev,
> > +						int init_vq_map_count)
> >   {
> >   	struct vhost_worker *worker;
> >   	struct task_struct *task;
> > @@ -598,9 +617,9 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> >   	if (!worker)
> >   		return NULL;
> > -	dev->worker = worker;
> >   	worker->kcov_handle = kcov_common_handle();
> >   	init_llist_head(&worker->work_list);
> > +	refcount_set(&worker->refcount, init_vq_map_count);
> >   	/*
> >   	 * vhost used to use the kthread API which ignores all signals by
> > @@ -617,10 +636,58 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> >   free_worker:
> >   	kfree(worker);
> > -	dev->worker = NULL;
> >   	return NULL;
> >   }
> > +static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t pid)
> > +{
> > +	struct vhost_worker *worker = NULL;
> > +	int i;
> > +
> > +	for (i = 0; i < dev->nvqs; i++) {
> > +		if (dev->vqs[i]->worker->task->pid != pid)
> > +			continue;
> > +
> > +		worker = dev->vqs[i]->worker;
> > +		break;
> > +	}
> > +
> > +	return worker;
> > +}
> > +
> > +/* Caller must have device mutex */
> > +static int vhost_vq_setup_worker(struct vhost_virtqueue *vq,
> > +				 struct vhost_vring_worker *info)
> > +{
> > +	struct vhost_dev *dev = vq->dev;
> > +	struct vhost_worker *worker;
> > +
> > +	if (!dev->use_worker)
> > +		return -EINVAL;
> > +
> > +	/* We don't support setting a worker on an active vq */
> > +	if (vq->private_data)
> > +		return -EBUSY;
> 
> 
> Is it valuable to allow the worker switching on active vq?
> 
> 
> > +
> > +	if (info->pid == VHOST_VRING_NEW_WORKER) {
> > +		worker = vhost_worker_create(dev, 1);
> > +		if (!worker)
> > +			return -ENOMEM;
> > +
> > +		info->pid = worker->task->pid;
> > +	} else {
> > +		worker = vhost_worker_find(dev, info->pid);
> > +		if (!worker)
> > +			return -ENODEV;
> > +
> > +		refcount_inc(&worker->refcount);
> > +	}
> > +
> > +	vhost_vq_clear_worker(vq);
> > +	vq->worker = worker;
> > +	return 0;
> > +}
> > +
> >   /* Caller should have device mutex */
> >   long vhost_dev_set_owner(struct vhost_dev *dev)
> >   {
> > @@ -636,7 +703,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
> >   	vhost_attach_mm(dev);
> >   	if (dev->use_worker) {
> > -		worker = vhost_worker_create(dev);
> > +		worker = vhost_worker_create(dev, dev->nvqs);
> >   		if (!worker)
> >   			goto err_worker;
> > @@ -650,7 +717,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
> >   	return 0;
> >   err_iovecs:
> > -	vhost_worker_free(dev);
> > +	vhost_workers_free(dev);
> >   err_worker:
> >   	vhost_detach_mm(dev);
> >   err_mm:
> > @@ -742,7 +809,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
> >   	dev->iotlb = NULL;
> >   	vhost_clear_msg(dev);
> >   	wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
> > -	vhost_worker_free(dev);
> > +	vhost_workers_free(dev);
> >   	vhost_detach_mm(dev);
> >   }
> >   EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
> > @@ -1612,6 +1679,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
> >   	struct eventfd_ctx *ctx = NULL;
> >   	u32 __user *idxp = argp;
> >   	struct vhost_virtqueue *vq;
> > +	struct vhost_vring_worker w;
> >   	struct vhost_vring_state s;
> >   	struct vhost_vring_file f;
> >   	u32 idx;
> > @@ -1719,6 +1787,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
> >   		if (copy_to_user(argp, &s, sizeof(s)))
> >   			r = -EFAULT;
> >   		break;
> > +	case VHOST_SET_VRING_WORKER:
> > +		if (copy_from_user(&w, argp, sizeof(w))) {
> > +			r = -EFAULT;
> > +			break;
> > +		}
> > +		r = vhost_vq_setup_worker(vq, &w);
> > +		if (!r && copy_to_user(argp, &w, sizeof(w)))
> > +			r = -EFAULT;
> > +		break;
> >   	default:
> >   		r = -ENOIOCTLCMD;
> >   	}
> > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > index 33c63b24187a..0911d1a9bd3b 100644
> > --- a/drivers/vhost/vhost.h
> > +++ b/drivers/vhost/vhost.h
> > @@ -35,6 +35,7 @@ struct vhost_worker {
> >   	struct llist_head	work_list;
> >   	u64			kcov_handle;
> >   	unsigned long		flags;
> > +	refcount_t		refcount;
> >   };
> >   /* Poll a file (eventfd or socket) */
> > @@ -160,7 +161,6 @@ struct vhost_dev {
> >   	struct vhost_virtqueue **vqs;
> >   	int nvqs;
> >   	struct eventfd_ctx *log_ctx;
> > -	struct vhost_worker *worker;
> >   	struct vhost_iotlb *umem;
> >   	struct vhost_iotlb *iotlb;
> >   	spinlock_t iotlb_lock;
> > diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> > index c998860d7bbc..e5c0669430e5 100644
> > --- a/include/uapi/linux/vhost.h
> > +++ b/include/uapi/linux/vhost.h
> > @@ -70,6 +70,17 @@
> >   #define VHOST_VRING_BIG_ENDIAN 1
> >   #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
> >   #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> > +/* By default, a device gets one vhost_worker created during VHOST_SET_OWNER
> > + * that its virtqueues share. This allows userspace to create a vhost_worker
> > + * and map a virtqueue to it or map a virtqueue to an existing worker.
> > + *
> > + * If pid > 0 and it matches an existing vhost_worker thread it will be bound
> > + * to the vq. If pid is VHOST_VRING_NEW_WORKER, then a new worker will be
> > + * created and bound to the vq.
> > + *
> > + * This must be called after VHOST_SET_OWNER and before the vq is active.
> > + */
> > +#define VHOST_SET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x15, struct vhost_vring_worker)
> >   /* The following ioctls use eventfd file descriptors to signal and poll
> >    * for events. */
> > diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
> > index f7f6a3a28977..af654e3cef0e 100644
> > --- a/include/uapi/linux/vhost_types.h
> > +++ b/include/uapi/linux/vhost_types.h
> > @@ -47,6 +47,18 @@ struct vhost_vring_addr {
> >   	__u64 log_guest_addr;
> >   };
> > +#define VHOST_VRING_NEW_WORKER -1
> 
> 
> Do we need VHOST_VRING_FREE_WORKER? And I wonder if using dedicated ioctls
> are better:
> 
> VHOST_VRING_NEW/FREE_WORKER
> VHOST_VRING_ATTACH_WORKER
> 
> etc.
> 
> Thanks
> 
> 
> > +
> > +struct vhost_vring_worker {
> > +	unsigned int index;
> > +	/*
> > +	 * The pid of the vhost worker that the vq will be bound to. If
> > +	 * pid is VHOST_VRING_NEW_WORKER a new worker will be created and its
> > +	 * pid will be returned in pid.
> > +	 */
> > +	__kernel_pid_t pid;
> > +};
> > +
> >   /* no alignment requirement */
> >   struct vhost_iotlb_msg {
> >   	__u64 iova;

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

  reply	other threads:[~2021-10-26 13:10 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-22  5:18 [PATCH V3 00/11] vhost: multiple worker support Mike Christie
2021-10-22  5:19 ` [PATCH] QEMU vhost-scsi: add support for VHOST_SET_VRING_WORKER Mike Christie
2021-10-22  5:19 ` [PATCH V3 01/11] vhost: add vhost_worker pointer to vhost_virtqueue Mike Christie
2021-10-22  5:19 ` [PATCH V3 02/11] vhost, vhost-net: add helper to check if vq has work Mike Christie
2021-10-22  5:19 ` [PATCH V3 03/11] vhost: take worker or vq instead of dev for queueing Mike Christie
2021-10-22  5:19 ` [PATCH V3 04/11] vhost: take worker or vq instead of dev for flushing Mike Christie
2021-10-22  5:19 ` [PATCH V3 05/11] vhost: convert poll work to be vq based Mike Christie
2021-10-22  5:19 ` [PATCH V3 06/11] vhost-sock: convert to vq helpers Mike Christie
2021-10-25  9:08   ` Stefano Garzarella
2021-10-25 16:09     ` michael.christie
2021-10-22  5:19 ` [PATCH V3 07/11] vhost-scsi: make SCSI cmd completion per vq Mike Christie
2021-10-22  5:19 ` [PATCH V3 08/11] vhost-scsi: convert to vq helpers Mike Christie
2021-10-22  5:19 ` [PATCH V3 09/11] vhost-scsi: flush IO vqs then send TMF rsp Mike Christie
2021-10-22  5:19 ` [PATCH V3 10/11] vhost: remove device wide queu/flushing helpers Mike Christie
2021-10-22  5:19 ` [PATCH V3 11/11] vhost: allow userspace to create workers Mike Christie
2021-10-22 10:47   ` Michael S. Tsirkin
2021-10-22 16:12     ` michael.christie
2021-10-22 18:17       ` michael.christie
2021-10-23 20:11         ` Michael S. Tsirkin
2021-10-25 16:04           ` michael.christie
2021-10-25 17:14             ` Michael S. Tsirkin
2021-10-26  5:37   ` Jason Wang
2021-10-26 13:09     ` Michael S. Tsirkin [this message]
2021-10-26 16:36       ` Stefan Hajnoczi
2021-10-26 15:44     ` Stefan Hajnoczi
2021-10-27  2:55       ` Jason Wang
2021-10-27  9:01         ` Stefan Hajnoczi
2021-10-26 16:49     ` michael.christie
2021-10-27  6:02       ` Jason Wang
2021-10-27  9:03       ` Stefan Hajnoczi
2021-10-26 15:22   ` Stefan Hajnoczi
2021-10-26 15:24   ` Stefan Hajnoczi
2021-10-22  6:02 ` [PATCH V3 00/11] vhost: multiple worker support michael.christie
2021-10-22  9:49   ` Michael S. Tsirkin
2021-10-22  9:48 ` Michael S. Tsirkin
2021-10-22 15:54   ` michael.christie
2021-10-23 20:12     ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211026090923-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=target-devel@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).