Re: [PATCH v5 3/3] vfio/pci: make use of irq_update_devid and optimize irq ops

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Alex Williamson <alex.williamson@redhat.com>
To: Ben Luo <luoben@linux.alibaba.com>
Cc: tglx@linutronix.de, linux-kernel@vger.kernel.org,
	tao.ma@linux.alibaba.com, gerry@linux.alibaba.com,
	nanhai.zou@linux.alibaba.com
Subject: Re: [PATCH v5 3/3] vfio/pci: make use of irq_update_devid and optimize irq ops
Date: Fri, 30 Aug 2019 14:06:16 -0600	[thread overview]
Message-ID: <20190830140616.090954b7@x1.home> (raw)
In-Reply-To: <9a8b3fc5d82c3c46feb0de673fbe898cfd884d63.1567151182.git.luoben@linux.alibaba.com>

On Fri, 30 Aug 2019 16:42:06 +0800
Ben Luo <luoben@linux.alibaba.com> wrote:

> When userspace (e.g. qemu) triggers a switch between KVM
> irqfd and userspace eventfd, only dev_id of irqaction
> (i.e. the "trigger" in this patch's context) will be
> changed, but a free-then-request-irq action is taken in
> current code. And, irq affinity setting in VM will also
> trigger a free-then-request-irq action, which actually
> changes nothing, but only need to bounce the irqbypass
> registraion in case that posted-interrupt is in use.
> 
> This patch makes use of irq_update_devid() and optimize
> both cases above, which reduces the risk of losing interrupt
> and also cuts some overhead.
> 
> Signed-off-by: Ben Luo <luoben@linux.alibaba.com>
> ---
>  drivers/vfio/pci/vfio_pci_intrs.c | 124 ++++++++++++++++++++++++++------------
>  1 file changed, 87 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
> index 3fa3f72..d3a93d7 100644
> --- a/drivers/vfio/pci/vfio_pci_intrs.c
> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
> @@ -284,70 +284,120 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
>  static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
>  				      int vector, int fd, bool msix)
>  {
> +	struct eventfd_ctx *trigger = NULL;
>  	struct pci_dev *pdev = vdev->pdev;
> -	struct eventfd_ctx *trigger;
>  	int irq, ret;
>  
>  	if (vector < 0 || vector >= vdev->num_ctx)
>  		return -EINVAL;
>  
> +	if (fd >= 0) {
> +		trigger = eventfd_ctx_fdget(fd);
> +		if (IS_ERR(trigger)) {
> +			/* oops, going to disable this interrupt */
> +			dev_info(&pdev->dev,
> +				 "get ctx error on bad fd: %d for vector:%d\n",
> +				 fd, vector);

I think a user could trigger this maliciously as a denial of service by
simply providing a bogus file descriptor.  The user is informed of the
error by the return value, why do we need to spam the logs?

> +		}
> +	}
> +
>  	irq = pci_irq_vector(pdev, vector);
>  
> +	/*
> +	 * 'trigger' is NULL or invalid, disable the interrupt
> +	 * 'trigger' is same as before, only bounce the bypass registration
> +	 * 'trigger' is a new invalid one, update it to irqaction and other

s/invalid/valid/

> +	 * data structures referencing to the old one; fallback to disable
> +	 * the interrupt on error
> +	 */
>  	if (vdev->ctx[vector].trigger) {
> -		free_irq(irq, vdev->ctx[vector].trigger);
> +		/*
> +		 * even if the trigger is unchanged we need to bounce the
> +		 * interrupt bypass connection to allow affinity changes in
> +		 * the guest to be realized.
> +		 */
>  		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
> -		kfree(vdev->ctx[vector].name);
> -		eventfd_ctx_put(vdev->ctx[vector].trigger);
> -		vdev->ctx[vector].trigger = NULL;
> +
> +		if (vdev->ctx[vector].trigger == trigger) {
> +			/* avoid duplicated referencing to the same trigger */
> +			eventfd_ctx_put(trigger);
> +
> +		} else if (trigger && !IS_ERR(trigger)) {
> +			ret = irq_update_devid(irq,
> +					       vdev->ctx[vector].trigger, trigger);
> +			if (unlikely(ret)) {
> +				dev_info(&pdev->dev,
> +					 "update devid of %d (token %p) failed: %d\n",
> +					 irq, vdev->ctx[vector].trigger, ret);
> +				eventfd_ctx_put(trigger);
> +				free_irq(irq, vdev->ctx[vector].trigger);
> +				kfree(vdev->ctx[vector].name);
> +				eventfd_ctx_put(vdev->ctx[vector].trigger);
> +				vdev->ctx[vector].trigger = NULL;
> +				return ret;
> +			}
> +			eventfd_ctx_put(vdev->ctx[vector].trigger);
> +			vdev->ctx[vector].producer.token = trigger;
> +			vdev->ctx[vector].trigger = trigger;
> +
> +		} else {
> +			free_irq(irq, vdev->ctx[vector].trigger);
> +			kfree(vdev->ctx[vector].name);
> +			eventfd_ctx_put(vdev->ctx[vector].trigger);
> +			vdev->ctx[vector].trigger = NULL;
> +		}
>  	}
>  
>  	if (fd < 0)
>  		return 0;
> +	else if (IS_ERR(trigger))
> +		return PTR_ERR(trigger);
>  
> -	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
> -					   msix ? "x" : "", vector,
> -					   pci_name(pdev));
> -	if (!vdev->ctx[vector].name)
> -		return -ENOMEM;
> +	if (!vdev->ctx[vector].trigger) {
> +		vdev->ctx[vector].name = kasprintf(GFP_KERNEL,
> +						   "vfio-msi%s[%d](%s)",
> +						   msix ? "x" : "", vector,
> +						   pci_name(pdev));
> +		if (!vdev->ctx[vector].name) {
> +			eventfd_ctx_put(trigger);
> +			return -ENOMEM;
> +		}
>  
> -	trigger = eventfd_ctx_fdget(fd);
> -	if (IS_ERR(trigger)) {
> -		kfree(vdev->ctx[vector].name);
> -		return PTR_ERR(trigger);
> -	}
> +		/*
> +		 * The MSIx vector table resides in device memory which may be
> +		 * cleared via backdoor resets. We don't allow direct access to
> +		 * the vector table so even if a userspace driver attempts to
> +		 * save/restore around such a reset it would be unsuccessful.
> +		 * To avoid this, restore the cached value of the message prior
> +		 * to enabling.
> +		 */
> +		if (msix) {
> +			struct msi_msg msg;
>  
> -	/*
> -	 * The MSIx vector table resides in device memory which may be cleared
> -	 * via backdoor resets. We don't allow direct access to the vector
> -	 * table so even if a userspace driver attempts to save/restore around
> -	 * such a reset it would be unsuccessful. To avoid this, restore the
> -	 * cached value of the message prior to enabling.
> -	 */
> -	if (msix) {
> -		struct msi_msg msg;
> +			get_cached_msi_msg(irq, &msg);
> +			pci_write_msi_msg(irq, &msg);
> +		}
>  
> -		get_cached_msi_msg(irq, &msg);
> -		pci_write_msi_msg(irq, &msg);
> -	}
> +		ret = request_irq(irq, vfio_msihandler, 0,
> +				  vdev->ctx[vector].name, trigger);
> +		if (ret) {
> +			kfree(vdev->ctx[vector].name);
> +			eventfd_ctx_put(trigger);
> +			return ret;
> +		}
>  
> -	ret = request_irq(irq, vfio_msihandler, 0,
> -			  vdev->ctx[vector].name, trigger);
> -	if (ret) {
> -		kfree(vdev->ctx[vector].name);
> -		eventfd_ctx_put(trigger);
> -		return ret;
> +		vdev->ctx[vector].producer.token = trigger;
> +		vdev->ctx[vector].producer.irq = irq;
> +		vdev->ctx[vector].trigger = trigger;
>  	}
>  
> -	vdev->ctx[vector].producer.token = trigger;
> -	vdev->ctx[vector].producer.irq = irq;
> +	/* setup bypass connection and make irte updated */
>  	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
>  	if (unlikely(ret))
>  		dev_info(&pdev->dev,
>  		"irq bypass producer (token %p) registration fails: %d\n",
>  		vdev->ctx[vector].producer.token, ret);
>  
> -	vdev->ctx[vector].trigger = trigger;
> -
>  	return 0;
>  }
>

next prev parent reply	other threads:[~2019-08-30 20:06 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-30  8:41 [PATCH v5 0/3] genirq/vfio: Introduce irq_update_devid() and optimize VFIO irq ops Ben Luo
2019-08-30  8:41 ` [PATCH v5 1/3] genirq: enhance error recovery code in free irq Ben Luo
2019-08-30  8:41 ` [PATCH v5 2/3] genirq: introduce irq_update_devid() Ben Luo
2019-08-30  8:42 ` [PATCH v5 3/3] vfio/pci: make use of irq_update_devid and optimize irq ops Ben Luo
2019-08-30 20:06   ` Alex Williamson [this message]
2019-08-31  4:38     ` Ben Luo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190830140616.090954b7@x1.home \
    --to=alex.williamson@redhat.com \
    --cc=gerry@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luoben@linux.alibaba.com \
    --cc=nanhai.zou@linux.alibaba.com \
    --cc=tao.ma@linux.alibaba.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox