public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] vfio-pci: Virtualize PCIe & AF FLR
@ 2016-09-22 16:07 Alex Williamson
  2016-09-22 16:41 ` Greg
  0 siblings, 1 reply; 2+ messages in thread
From: Alex Williamson @ 2016-09-22 16:07 UTC (permalink / raw)
  To: kvm; +Cc: alex.williamson, linux-kernel

We use a BAR restore trick to try to detect when a user has performed
a device reset, possibly through FLR or other backdoors, to put things
back into a working state.  This is important for backdoor resets, but
we can actually just virtualize the "front door" resets provided via
PCIe and AF FLR.  Set these bits as virtualized + writable, allowing
the default write to set them in vconfig, then we can simply check the
bit, perform an FLR of our own, and clear the bit.  We don't actually
have the granularity in PCI to specify the type of reset we want to
do, but generally devices don't implement both PCIe and AF FLR and
we'll favor these over other types of reset, so we should generally
lineup.  We do test whether the device provides the requested FLR type
to stay consistent with hardware capabilities though.

This seems to fix several instance of devices getting into bad states
with userspace drivers, like dpdk, running inside a VM.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci_config.c |   82 ++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 5 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 688691d..3884888 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -804,6 +804,40 @@ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm)
 	return 0;
 }
 
+static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
+				 int count, struct perm_bits *perm,
+				 int offset, __le32 val)
+{
+	__le16 *ctrl = (__le16 *)(vdev->vconfig + pos -
+				  offset + PCI_EXP_DEVCTL);
+
+	count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
+	if (count < 0)
+		return count;
+
+	/*
+	 * The FLR bit is virtualized, if set and the device supports PCIe
+	 * FLR, issue a reset_function.  Regardless, clear the bit, the spec
+	 * requires it to be always read as zero.  NB, reset_function might
+	 * not use a PCIe FLR, we don't have that level of granularity.
+	 */
+	if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) {
+		u32 cap;
+		int ret;
+
+		*ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR);
+
+		ret = pci_user_read_config_dword(vdev->pdev,
+						 pos - offset + PCI_EXP_DEVCAP,
+						 &cap);
+
+		if (!ret && (cap & PCI_EXP_DEVCAP_FLR))
+			pci_try_reset_function(vdev->pdev);
+	}
+
+	return count;
+}
+
 /* Permissions for PCI Express capability */
 static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
 {
@@ -811,26 +845,64 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
 	if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2))
 		return -ENOMEM;
 
+	perm->writefn = vfio_exp_config_write;
+
 	p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
 
 	/*
-	 * Allow writes to device control fields (includes FLR!)
-	 * but not to devctl_phantom which could confuse IOMMU
-	 * or to the ARI bit in devctl2 which is set at probe time
+	 * Allow writes to device control fields, except devctl_phantom,
+	 * which could confuse IOMMU, and the ARI bit in devctl2, which
+	 * is set at probe time.  FLR gets virtualized via our writefn.
 	 */
-	p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM);
+	p_setw(perm, PCI_EXP_DEVCTL,
+	       PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
 	p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
 	return 0;
 }
 
+static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
+				int count, struct perm_bits *perm,
+				int offset, __le32 val)
+{
+	u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL;
+
+	count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
+	if (count < 0)
+		return count;
+
+	/*
+	 * The FLR bit is virtualized, if set and the device supports AF
+	 * FLR, issue a reset_function.  Regardless, clear the bit, the spec
+	 * requires it to be always read as zero.  NB, reset_function might
+	 * not use an AF FLR, we don't have that level of granularity.
+	 */
+	if (*ctrl & PCI_AF_CTRL_FLR) {
+		u8 cap;
+		int ret;
+
+		*ctrl &= ~PCI_AF_CTRL_FLR;
+
+		ret = pci_user_read_config_byte(vdev->pdev,
+						pos - offset + PCI_AF_CAP,
+						&cap);
+
+		if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP))
+			pci_try_reset_function(vdev->pdev);
+	}
+
+	return count;
+}
+
 /* Permissions for Advanced Function capability */
 static int __init init_pci_cap_af_perm(struct perm_bits *perm)
 {
 	if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF]))
 		return -ENOMEM;
 
+	perm->writefn = vfio_af_config_write;
+
 	p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
-	p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR);
+	p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR);
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] vfio-pci: Virtualize PCIe & AF FLR
  2016-09-22 16:07 [PATCH] vfio-pci: Virtualize PCIe & AF FLR Alex Williamson
@ 2016-09-22 16:41 ` Greg
  0 siblings, 0 replies; 2+ messages in thread
From: Greg @ 2016-09-22 16:41 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, linux-kernel

On Thu, 2016-09-22 at 10:07 -0600, Alex Williamson wrote:
> We use a BAR restore trick to try to detect when a user has performed
> a device reset, possibly through FLR or other backdoors, to put things
> back into a working state.  This is important for backdoor resets, but
> we can actually just virtualize the "front door" resets provided via
> PCIe and AF FLR.  Set these bits as virtualized + writable, allowing
> the default write to set them in vconfig, then we can simply check the
> bit, perform an FLR of our own, and clear the bit.  We don't actually
> have the granularity in PCI to specify the type of reset we want to
> do, but generally devices don't implement both PCIe and AF FLR and
> we'll favor these over other types of reset, so we should generally
> lineup.  We do test whether the device provides the requested FLR type
> to stay consistent with hardware capabilities though.
> 
> This seems to fix several instance of devices getting into bad states
> with userspace drivers, like dpdk, running inside a VM.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>  drivers/vfio/pci/vfio_pci_config.c |   82 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 77 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
> index 688691d..3884888 100644
> --- a/drivers/vfio/pci/vfio_pci_config.c
> +++ b/drivers/vfio/pci/vfio_pci_config.c
> @@ -804,6 +804,40 @@ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm)
>  	return 0;
>  }
>  
> +static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
> +				 int count, struct perm_bits *perm,
> +				 int offset, __le32 val)
> +{
> +	__le16 *ctrl = (__le16 *)(vdev->vconfig + pos -
> +				  offset + PCI_EXP_DEVCTL);
> +
> +	count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
> +	if (count < 0)
> +		return count;
> +
> +	/*
> +	 * The FLR bit is virtualized, if set and the device supports PCIe
> +	 * FLR, issue a reset_function.  Regardless, clear the bit, the spec
> +	 * requires it to be always read as zero.  NB, reset_function might
> +	 * not use a PCIe FLR, we don't have that level of granularity.
> +	 */
> +	if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) {
> +		u32 cap;
> +		int ret;
> +
> +		*ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR);
> +
> +		ret = pci_user_read_config_dword(vdev->pdev,
> +						 pos - offset + PCI_EXP_DEVCAP,
> +						 &cap);
> +
> +		if (!ret && (cap & PCI_EXP_DEVCAP_FLR))
> +			pci_try_reset_function(vdev->pdev);
> +	}
> +
> +	return count;
> +}
> +
>  /* Permissions for PCI Express capability */
>  static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
>  {
> @@ -811,26 +845,64 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
>  	if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2))
>  		return -ENOMEM;
>  
> +	perm->writefn = vfio_exp_config_write;
> +
>  	p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
>  
>  	/*
> -	 * Allow writes to device control fields (includes FLR!)
> -	 * but not to devctl_phantom which could confuse IOMMU
> -	 * or to the ARI bit in devctl2 which is set at probe time
> +	 * Allow writes to device control fields, except devctl_phantom,
> +	 * which could confuse IOMMU, and the ARI bit in devctl2, which
> +	 * is set at probe time.  FLR gets virtualized via our writefn.
>  	 */
> -	p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM);
> +	p_setw(perm, PCI_EXP_DEVCTL,
> +	       PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
>  	p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
>  	return 0;
>  }
>  
> +static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
> +				int count, struct perm_bits *perm,
> +				int offset, __le32 val)
> +{
> +	u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL;
> +
> +	count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
> +	if (count < 0)
> +		return count;
> +
> +	/*
> +	 * The FLR bit is virtualized, if set and the device supports AF
> +	 * FLR, issue a reset_function.  Regardless, clear the bit, the spec
> +	 * requires it to be always read as zero.  NB, reset_function might
> +	 * not use an AF FLR, we don't have that level of granularity.
> +	 */
> +	if (*ctrl & PCI_AF_CTRL_FLR) {
> +		u8 cap;
> +		int ret;
> +
> +		*ctrl &= ~PCI_AF_CTRL_FLR;
> +
> +		ret = pci_user_read_config_byte(vdev->pdev,
> +						pos - offset + PCI_AF_CAP,
> +						&cap);
> +
> +		if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP))
> +			pci_try_reset_function(vdev->pdev);
> +	}
> +
> +	return count;
> +}
> +
>  /* Permissions for Advanced Function capability */
>  static int __init init_pci_cap_af_perm(struct perm_bits *perm)
>  {
>  	if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF]))
>  		return -ENOMEM;
>  
> +	perm->writefn = vfio_af_config_write;
> +
>  	p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
> -	p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR);
> +	p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR);
>  	return 0;
>  }
>  
> 

Looks good.

Reviewed-by: Greg Rose <grose@lightfleet.com>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-09-22 16:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-22 16:07 [PATCH] vfio-pci: Virtualize PCIe & AF FLR Alex Williamson
2016-09-22 16:41 ` Greg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox