Intel-Wired-Lan Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Lingyu Liu <lingyu.liu@intel.com>
To: intel-wired-lan@lists.osuosl.org
Cc: kevin.tian@intel.com, yi.l.liu@intel.com, phani.r.burra@intel.com
Subject: [Intel-wired-lan] [PATCH iwl-next V2 15/15] vfio/ice: support iommufd vfio compat mode
Date: Wed, 21 Jun 2023 09:11:12 +0000	[thread overview]
Message-ID: <20230621091112.44945-16-lingyu.liu@intel.com> (raw)
In-Reply-To: <20230621091112.44945-1-lingyu.liu@intel.com>

From: Yahui Cao <yahui.cao@intel.com>

In iommufd vfio compat mode, vfio_dma_rw() will return failure, since
vfio_device_has_container() returns false and device->iommufd_access is
NULL.

Currently device->iommufd_access will not be created if vfio device is
backed by pci device. To support IOVA access, manually create
iommufd_access context by iommufd_access_create/attach() and access IOVA
by iommufd_access_rw(). And in order to minimize the iommufd_access's
impact, store the iommufd_access context in driver data, create it only
before loading the device state and destroy it once finishing loading
the device state.

To be compatible with legacy vfio, use vfio_device_has_container() to
check the vfio uAPI. If in legacy vfio mode, call vfio_dma_rw()
directly, otherwise call iommufd_access_rw().

Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Lingyu Liu <lingyu.liu@intel.com>
---
 .../net/ethernet/intel/ice/ice_migration.c    |  23 +--
 drivers/vfio/pci/ice/ice_vfio_pci.c           | 171 +++++++++++++++++-
 include/linux/net/intel/ice_migration.h       |   4 +-
 3 files changed, 179 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_migration.c b/drivers/net/ethernet/intel/ice/ice_migration.c
index 0bc897ab0dc2..c5bdfee1e3b0 100644
--- a/drivers/net/ethernet/intel/ice/ice_migration.c
+++ b/drivers/net/ethernet/intel/ice/ice_migration.c
@@ -440,7 +440,7 @@ ice_migration_restore_rx_head(struct ice_vf *vf,
 static int
 ice_migration_restore_tx_head(struct ice_vf *vf,
 			      struct ice_migration_dev_state *devstate,
-			      struct vfio_device *vdev)
+			      dma_rw_handler_t handler, void *data)
 {
 	struct ice_tx_desc *tx_desc_dummy, *tx_desc;
 	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
@@ -509,15 +509,15 @@ ice_migration_restore_tx_head(struct ice_vf *vf,
 			ret = -EINVAL;
 			goto err;
 		}
-		ret = vfio_dma_rw(vdev, tx_ring->dma, (void *)tx_desc,
-				  tx_ring->count * sizeof(tx_desc[0]), false);
+		ret = handler(data, tx_ring->dma, (void *)tx_desc,
+			      tx_ring->count * sizeof(tx_desc[0]), false);
 		if (ret) {
 			dev_err(dev, "kvm read guest tx ring error: %d\n",
 				ret);
 			goto err;
 		}
-		ret = vfio_dma_rw(vdev, tx_ring->dma, (void *)tx_desc_dummy,
-				  tx_heads[i] * sizeof(tx_desc_dummy[0]), true);
+		ret = handler(data, tx_ring->dma, (void *)tx_desc_dummy,
+			      tx_heads[i] * sizeof(tx_desc_dummy[0]), true);
 		if (ret) {
 			dev_err(dev, "kvm write guest return error: %d\n",
 				ret);
@@ -546,8 +546,8 @@ ice_migration_restore_tx_head(struct ice_vf *vf,
 				vf->vf_id, i);
 			goto err;
 		}
-		ret = vfio_dma_rw(vdev, tx_ring->dma, (void *)tx_desc,
-				  tx_ring->count * sizeof(tx_desc[0]), true);
+		ret = handler(data, tx_ring->dma, (void *)tx_desc,
+			      tx_ring->count * sizeof(tx_desc[0]), true);
 		if (ret) {
 			dev_err(dev, "kvm write guest tx ring error: %d\n",
 				ret);
@@ -567,7 +567,8 @@ ice_migration_restore_tx_head(struct ice_vf *vf,
  * @opaque: pointer to VF handler in ice vdev
  * @buf: pointer to device state buf in migration buffer
  * @buf_sz: size of migration buffer
- * @vdev: pointer to vfio device
+ * @handler: dma_rw_handler
+ * @data: dma_rw_handler data
  *
  * This function uses the device state saved in migration buffer
  * to restore device state at dst VM
@@ -575,7 +576,7 @@ ice_migration_restore_tx_head(struct ice_vf *vf,
  * Return 0 for success, negative for error
  */
 int ice_migration_restore_devstate(void *opaque, const u8 *buf, u64 buf_sz,
-				   struct vfio_device *vdev)
+				   dma_rw_handler_t handler, void *data)
 {
 	struct ice_migration_virtchnl_msg_slot *msg_slot;
 	struct ice_vf *vf = (struct ice_vf *)opaque;
@@ -587,7 +588,7 @@ int ice_migration_restore_devstate(void *opaque, const u8 *buf, u64 buf_sz,
 	u64 slot_sz;
 	int ret = 0;
 
-	if (!buf || !vdev)
+	if (!buf)
 		return -EINVAL;
 
 	total_sz += sizeof(struct ice_migration_dev_state);
@@ -658,7 +659,7 @@ int ice_migration_restore_devstate(void *opaque, const u8 *buf, u64 buf_sz,
 	 * After virtual channel replay completes, tx rings are enabled.
 	 * Then restore tx head for tx rings by injecting dummy packets.
 	 */
-	ret = ice_migration_restore_tx_head(vf, devstate, vdev);
+	ret = ice_migration_restore_tx_head(vf, devstate, handler, data);
 	if (ret) {
 		dev_err(dev, "failed to restore tx queue head\n");
 		goto err;
diff --git a/drivers/vfio/pci/ice/ice_vfio_pci.c b/drivers/vfio/pci/ice/ice_vfio_pci.c
index 389a2be41896..45b95d8eef5c 100644
--- a/drivers/vfio/pci/ice/ice_vfio_pci.c
+++ b/drivers/vfio/pci/ice/ice_vfio_pci.c
@@ -9,6 +9,9 @@
 #include <linux/net/intel/ice_migration.h>
 #include <linux/vfio_pci_core.h>
 #include <linux/anon_inodes.h>
+#include <linux/iommufd.h>
+
+MODULE_IMPORT_NS(IOMMUFD);
 
 #define DRIVER_DESC     "ICE VFIO PCI - User Level meta-driver for Intel E800 device family"
 
@@ -90,6 +93,10 @@ struct ice_vfio_pci_core_device {
 	u8 __iomem *io_base;
 	void *vf_handle;
 	bool is_dst;
+
+	u32 pt_id;
+	struct iommufd_ctx *ictx;
+	struct iommufd_access *user;
 };
 
 /**
@@ -176,6 +183,112 @@ ice_vfio_pci_load_regs(struct ice_vfio_pci_core_device *ice_vdev,
 		writel(regs->rx_tail[i], io_base + IAVF_QRX_TAIL1(i));
 }
 
+/**
+ * ice_vfio_pci_emulated_unmap - callback to unmap IOVA
+ * @data: function handler data
+ * @iova: I/O virtuall address
+ * @len: IOVA length
+ *
+ * This function is called when application are doing DMA unmap and in some
+ * cases driver needs to explicitly do some unmap ops if this device does not
+ * have backed iommu. Nothing is required here since this is pci baseed vfio
+ * device, which has backed iommu.
+ */
+static void
+ice_vfio_pci_emulated_unmap(void *data, unsigned long iova, unsigned long len)
+{
+}
+
+static const struct iommufd_access_ops ice_vfio_user_ops = {
+	.needs_pin_pages = 1,
+	.unmap = ice_vfio_pci_emulated_unmap,
+};
+
+/**
+ * ice_vfio_dma_rw - read/write function for device IOVA address space
+ * @data: function handler data
+ * @iova: I/O virtuall address
+ * @buf: buffer for read/write access
+ * @len: buffer length
+ * @write: true for write, false for read
+ *
+ * Read/write function for device IOVA access. Since vfio_dma_rw() may fail
+ * at iommufd vfio compatiable mode, we need runtime check what uAPI it is
+ * using and use corresponding access method for IOVA access.
+ *
+ * Return 0 for success, negative value for failure.
+ */
+static int ice_vfio_dma_rw(void *data, dma_addr_t iova,
+			   void *buf, size_t len, bool write)
+{
+	struct ice_vfio_pci_core_device *ice_vdev =
+			(struct ice_vfio_pci_core_device *)data;
+	struct vfio_device *vdev = &ice_vdev->core_device.vdev;
+	unsigned int flags = 0;
+
+	if (vfio_device_has_container(vdev))
+		return vfio_dma_rw(vdev, iova, buf, len, write);
+
+	if (!current->mm)
+		flags |= IOMMUFD_ACCESS_RW_KTHREAD;
+	if (write)
+		flags |= IOMMUFD_ACCESS_RW_WRITE;
+	return iommufd_access_rw(ice_vdev->user, iova, buf, len, flags);
+}
+
+/**
+ * ice_vfio_pci_load_state_init - VFIO device state reloading initialization
+ * @ice_vdev: pointer to ice vfio pci core device structure
+ *
+ * Initialization procedure before loading device state.
+ *
+ * Return 0 for success, negative value for failure.
+ */
+static int
+ice_vfio_pci_load_state_init(struct ice_vfio_pci_core_device *ice_vdev)
+{
+	struct device *dev = &ice_vdev->core_device.pdev->dev;
+	struct iommufd_access *user;
+	int pt_id = 0;
+	int ret;
+
+	if (vfio_device_has_container(&ice_vdev->core_device.vdev))
+		return 0;
+
+	user = iommufd_access_create(ice_vdev->ictx, &ice_vfio_user_ops,
+				     ice_vdev, &pt_id);
+	if (IS_ERR(user)) {
+		ret = PTR_ERR(user);
+		dev_err(dev, "iommufd_access_create() return %d", ret);
+		return ret;
+	}
+
+	ret = iommufd_access_attach(user, ice_vdev->pt_id);
+	if (ret) {
+		dev_err(dev, "iommufd_access_attach() return %d", ret);
+		iommufd_access_destroy(user);
+		return ret;
+	}
+
+	ice_vdev->user = user;
+	return 0;
+}
+
+/**
+ * ice_vfio_pci_load_state_exit - VFIO device state reloading exit
+ * @ice_vdev: pointer to ice vfio pci core device structure
+ *
+ * Exit procedure after loading device state.
+ */
+static void
+ice_vfio_pci_load_state_exit(struct ice_vfio_pci_core_device *ice_vdev)
+{
+	if (vfio_device_has_container(&ice_vdev->core_device.vdev))
+		return;
+
+	iommufd_access_destroy(ice_vdev->user);
+}
+
 /**
  * ice_vfio_pci_load_state - VFIO device state reloading
  * @ice_vdev: pointer to ice vfio pci core device structure
@@ -192,12 +305,19 @@ static int __must_check
 ice_vfio_pci_load_state(struct ice_vfio_pci_core_device *ice_vdev)
 {
 	struct ice_vfio_pci_migration_file *migf = ice_vdev->resuming_migf;
+	int ret;
 
+	ret = ice_vfio_pci_load_state_init(ice_vdev);
+	if (ret)
+		return ret;
 	ice_vfio_pci_load_regs(ice_vdev, &migf->mig_data.regs);
-	return ice_migration_restore_devstate(ice_vdev->vf_handle,
-					      migf->mig_data.dev_state,
-					      SZ_128K,
-					      &ice_vdev->core_device.vdev);
+	ret = ice_migration_restore_devstate(ice_vdev->vf_handle,
+					     migf->mig_data.dev_state,
+					     SZ_128K,
+					     ice_vfio_dma_rw, ice_vdev);
+	ice_vfio_pci_load_state_exit(ice_vdev);
+
+	return ret;
 }
 
 /**
@@ -744,6 +864,43 @@ static int ice_vfio_pci_core_init_dev(struct vfio_device *core_vdev)
 	return vfio_pci_core_init_dev(core_vdev);
 }
 
+static int ice_vfio_pci_attach_ioas(struct vfio_device *core_vdev, u32 *pt_id)
+{
+	struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
+			struct ice_vfio_pci_core_device, core_device.vdev);
+
+	ice_vdev->pt_id = *pt_id;
+	return vfio_iommufd_physical_attach_ioas(core_vdev, pt_id);
+}
+
+static int ice_vfio_pci_bind(struct vfio_device *core_vdev,
+			     struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+	struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
+			struct ice_vfio_pci_core_device, core_device.vdev);
+	int ret;
+
+	ice_vdev->ictx = ictx;
+	iommufd_ctx_get(ictx);
+
+	ret = vfio_iommufd_physical_bind(core_vdev, ictx, out_device_id);
+	if (ret)
+		iommufd_ctx_put(ictx);
+
+	return ret;
+}
+
+static void ice_vfio_pci_unbind(struct vfio_device *core_vdev)
+{
+	struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
+			struct ice_vfio_pci_core_device, core_device.vdev);
+
+	vfio_iommufd_physical_unbind(core_vdev);
+
+	iommufd_ctx_put(ice_vdev->ictx);
+	ice_vdev->ictx = NULL;
+}
+
 static const struct vfio_device_ops ice_vfio_pci_ops = {
 	.name		= "ice-vfio-pci",
 	.init		= ice_vfio_pci_core_init_dev,
@@ -757,9 +914,9 @@ static const struct vfio_device_ops ice_vfio_pci_ops = {
 	.mmap		= vfio_pci_core_mmap,
 	.request	= vfio_pci_core_request,
 	.match		= vfio_pci_core_match,
-	.bind_iommufd	= vfio_iommufd_physical_bind,
-	.unbind_iommufd	= vfio_iommufd_physical_unbind,
-	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
+	.bind_iommufd	= ice_vfio_pci_bind,
+	.unbind_iommufd	= ice_vfio_pci_unbind,
+	.attach_ioas	= ice_vfio_pci_attach_ioas,
 };
 
 /**
diff --git a/include/linux/net/intel/ice_migration.h b/include/linux/net/intel/ice_migration.h
index 45c3469df55d..f97ed6940afd 100644
--- a/include/linux/net/intel/ice_migration.h
+++ b/include/linux/net/intel/ice_migration.h
@@ -7,6 +7,8 @@
 
 #if IS_ENABLED(CONFIG_ICE_VFIO_PCI)
 
+typedef int (*dma_rw_handler_t)(void *data, dma_addr_t iova, void *buf,
+				size_t len, bool write);
 #define IAVF_QRX_TAIL_MAX 256
 #define QTX_HEAD_RESTORE_DELAY_MAX 100
 #define QTX_HEAD_RESTORE_DELAY_SLEEP_US_MIN 10
@@ -19,7 +21,7 @@ void ice_migration_uninit_vf(void *opaque);
 int ice_migration_suspend_vf(void *opaque, bool mig_dst);
 int ice_migration_save_devstate(void *opaque, u8 *buf, u64 buf_sz);
 int ice_migration_restore_devstate(void *opaque, const u8 *buf, u64 buf_sz,
-				   struct vfio_device *vdev);
+				   dma_rw_handler_t handler, void *data);
 
 #else
 static inline void *ice_migration_get_vf(struct pci_dev *vf_pdev)
-- 
2.25.1

_______________________________________________
Intel-wired-lan mailing list
Intel-wired-lan@osuosl.org
https://lists.osuosl.org/mailman/listinfo/intel-wired-lan

  parent reply	other threads:[~2023-06-21  9:12 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-21  9:10 [Intel-wired-lan] [PATCH iwl-next V2 00/15] Add E800 live migration driver Lingyu Liu
2023-06-21  9:10 ` [Intel-wired-lan] [PATCH iwl-next V2 01/15] ice: Fix missing legacy 32byte RXDID in the supported bitmap Lingyu Liu
2023-06-21  9:10 ` [Intel-wired-lan] [PATCH iwl-next V2 02/15] ice: add function to get rxq context Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 03/15] ice: check VF migration status before sending messages to VF Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 04/15] ice: add migration init field and helper functions Lingyu Liu
2023-06-21 13:35   ` Jason Gunthorpe
2023-06-27  7:50     ` Cao, Yahui
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 05/15] ice: save VF messages as device state Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 06/15] ice: save and restore " Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 07/15] ice: do not notify VF link state during migration Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 08/15] ice: change VSI id in virtual channel message after migration Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 09/15] ice: save and restore RX queue head Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 10/15] ice: save and restore TX " Lingyu Liu
2023-06-21 14:37   ` Jason Gunthorpe
2023-06-27  6:55     ` Tian, Kevin
2023-07-03  5:27       ` Cao, Yahui
2023-07-03 21:03         ` Jason Gunthorpe
2023-07-04  7:35           ` Tian, Kevin
2023-06-28  8:11     ` Liu, Yi L
2023-06-28 12:39       ` Jason Gunthorpe
2023-07-03 12:54         ` Liu, Yi L
2023-07-04  7:38           ` Tian, Kevin
2023-07-04 17:59             ` Peter Xu
2023-07-10 15:54               ` Jason Gunthorpe
2023-07-17 21:43                 ` Peter Xu
2023-07-18 15:38                   ` Jason Gunthorpe
2023-07-18 17:36                     ` Peter Xu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 11/15] ice: stop device before saving device states Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 12/15] ice: mask VF advanced capabilities if live migration is activated Lingyu Liu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 13/15] vfio/ice: implement vfio_pci driver for E800 devices Lingyu Liu
2023-06-21 14:23   ` Jason Gunthorpe
2023-06-27  9:00     ` Liu, Lingyu
2023-06-21  9:11 ` [Intel-wired-lan] [PATCH iwl-next V2 14/15] vfio: Expose vfio_device_has_container() Lingyu Liu
2023-06-21  9:11 ` Lingyu Liu [this message]
2023-06-21 14:40   ` [Intel-wired-lan] [PATCH iwl-next V2 15/15] vfio/ice: support iommufd vfio compat mode Jason Gunthorpe
2023-06-27  8:09     ` Cao, Yahui

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230621091112.44945-16-lingyu.liu@intel.com \
    --to=lingyu.liu@intel.com \
    --cc=intel-wired-lan@lists.osuosl.org \
    --cc=kevin.tian@intel.com \
    --cc=phani.r.burra@intel.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox