From: Lingyu Liu <lingyu.liu@intel.com>
To: intel-wired-lan@lists.osuosl.org
Cc: kevin.tian@intel.com, yi.l.liu@intel.com, phani.r.burra@intel.com
Subject: [Intel-wired-lan] [PATCH iwl-next V1 15/15] vfio/ice: support iommufd vfio compat mode
Date: Tue, 20 Jun 2023 10:00:01 +0000 [thread overview]
Message-ID: <20230620100001.5331-16-lingyu.liu@intel.com> (raw)
In-Reply-To: <20230620100001.5331-1-lingyu.liu@intel.com>
From: Yahui Cao <yahui.cao@intel.com>
In iommufd vfio compat mode, vfio_dma_rw() will return failure, since
vfio_device_has_container() returns false and device->iommufd_access is
NULL.
Currently device->iommufd_access will not be created if vfio device is
backed by pci device. To support IOVA access, manually create
iommufd_access context by iommufd_access_create/attach() and access IOVA
by iommufd_access_rw(). And in order to minimize the iommufd_access's
impact, store the iommufd_access context in driver data, create it only
before loading the device state and destroy it once finishing loading
the device state.
To be compatible with legacy vfio, use vfio_device_has_container() to
check the vfio uAPI. If in legacy vfio mode, call vfio_dma_rw()
directly, otherwise call iommufd_access_rw().
Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Lingyu Liu <lingyu.liu@intel.com>
---
.../net/ethernet/intel/ice/ice_migration.c | 23 +--
drivers/vfio/pci/ice/ice_vfio_pci.c | 171 +++++++++++++++++-
include/linux/net/intel/ice_migration.h | 4 +-
3 files changed, 179 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_migration.c b/drivers/net/ethernet/intel/ice/ice_migration.c
index 0bc897ab0dc2..c5bdfee1e3b0 100644
--- a/drivers/net/ethernet/intel/ice/ice_migration.c
+++ b/drivers/net/ethernet/intel/ice/ice_migration.c
@@ -440,7 +440,7 @@ ice_migration_restore_rx_head(struct ice_vf *vf,
static int
ice_migration_restore_tx_head(struct ice_vf *vf,
struct ice_migration_dev_state *devstate,
- struct vfio_device *vdev)
+ dma_rw_handler_t handler, void *data)
{
struct ice_tx_desc *tx_desc_dummy, *tx_desc;
struct ice_vsi *vsi = ice_get_vf_vsi(vf);
@@ -509,15 +509,15 @@ ice_migration_restore_tx_head(struct ice_vf *vf,
ret = -EINVAL;
goto err;
}
- ret = vfio_dma_rw(vdev, tx_ring->dma, (void *)tx_desc,
- tx_ring->count * sizeof(tx_desc[0]), false);
+ ret = handler(data, tx_ring->dma, (void *)tx_desc,
+ tx_ring->count * sizeof(tx_desc[0]), false);
if (ret) {
dev_err(dev, "kvm read guest tx ring error: %d\n",
ret);
goto err;
}
- ret = vfio_dma_rw(vdev, tx_ring->dma, (void *)tx_desc_dummy,
- tx_heads[i] * sizeof(tx_desc_dummy[0]), true);
+ ret = handler(data, tx_ring->dma, (void *)tx_desc_dummy,
+ tx_heads[i] * sizeof(tx_desc_dummy[0]), true);
if (ret) {
dev_err(dev, "kvm write guest return error: %d\n",
ret);
@@ -546,8 +546,8 @@ ice_migration_restore_tx_head(struct ice_vf *vf,
vf->vf_id, i);
goto err;
}
- ret = vfio_dma_rw(vdev, tx_ring->dma, (void *)tx_desc,
- tx_ring->count * sizeof(tx_desc[0]), true);
+ ret = handler(data, tx_ring->dma, (void *)tx_desc,
+ tx_ring->count * sizeof(tx_desc[0]), true);
if (ret) {
dev_err(dev, "kvm write guest tx ring error: %d\n",
ret);
@@ -567,7 +567,8 @@ ice_migration_restore_tx_head(struct ice_vf *vf,
* @opaque: pointer to VF handler in ice vdev
* @buf: pointer to device state buf in migration buffer
* @buf_sz: size of migration buffer
- * @vdev: pointer to vfio device
+ * @handler: dma_rw_handler
+ * @data: dma_rw_handler data
*
* This function uses the device state saved in migration buffer
* to restore device state at dst VM
@@ -575,7 +576,7 @@ ice_migration_restore_tx_head(struct ice_vf *vf,
* Return 0 for success, negative for error
*/
int ice_migration_restore_devstate(void *opaque, const u8 *buf, u64 buf_sz,
- struct vfio_device *vdev)
+ dma_rw_handler_t handler, void *data)
{
struct ice_migration_virtchnl_msg_slot *msg_slot;
struct ice_vf *vf = (struct ice_vf *)opaque;
@@ -587,7 +588,7 @@ int ice_migration_restore_devstate(void *opaque, const u8 *buf, u64 buf_sz,
u64 slot_sz;
int ret = 0;
- if (!buf || !vdev)
+ if (!buf)
return -EINVAL;
total_sz += sizeof(struct ice_migration_dev_state);
@@ -658,7 +659,7 @@ int ice_migration_restore_devstate(void *opaque, const u8 *buf, u64 buf_sz,
* After virtual channel replay completes, tx rings are enabled.
* Then restore tx head for tx rings by injecting dummy packets.
*/
- ret = ice_migration_restore_tx_head(vf, devstate, vdev);
+ ret = ice_migration_restore_tx_head(vf, devstate, handler, data);
if (ret) {
dev_err(dev, "failed to restore tx queue head\n");
goto err;
diff --git a/drivers/vfio/pci/ice/ice_vfio_pci.c b/drivers/vfio/pci/ice/ice_vfio_pci.c
index 389a2be41896..45b95d8eef5c 100644
--- a/drivers/vfio/pci/ice/ice_vfio_pci.c
+++ b/drivers/vfio/pci/ice/ice_vfio_pci.c
@@ -9,6 +9,9 @@
#include <linux/net/intel/ice_migration.h>
#include <linux/vfio_pci_core.h>
#include <linux/anon_inodes.h>
+#include <linux/iommufd.h>
+
+MODULE_IMPORT_NS(IOMMUFD);
#define DRIVER_DESC "ICE VFIO PCI - User Level meta-driver for Intel E800 device family"
@@ -90,6 +93,10 @@ struct ice_vfio_pci_core_device {
u8 __iomem *io_base;
void *vf_handle;
bool is_dst;
+
+ u32 pt_id;
+ struct iommufd_ctx *ictx;
+ struct iommufd_access *user;
};
/**
@@ -176,6 +183,112 @@ ice_vfio_pci_load_regs(struct ice_vfio_pci_core_device *ice_vdev,
writel(regs->rx_tail[i], io_base + IAVF_QRX_TAIL1(i));
}
+/**
+ * ice_vfio_pci_emulated_unmap - callback to unmap IOVA
+ * @data: function handler data
+ * @iova: I/O virtuall address
+ * @len: IOVA length
+ *
+ * This function is called when application are doing DMA unmap and in some
+ * cases driver needs to explicitly do some unmap ops if this device does not
+ * have backed iommu. Nothing is required here since this is pci baseed vfio
+ * device, which has backed iommu.
+ */
+static void
+ice_vfio_pci_emulated_unmap(void *data, unsigned long iova, unsigned long len)
+{
+}
+
+static const struct iommufd_access_ops ice_vfio_user_ops = {
+ .needs_pin_pages = 1,
+ .unmap = ice_vfio_pci_emulated_unmap,
+};
+
+/**
+ * ice_vfio_dma_rw - read/write function for device IOVA address space
+ * @data: function handler data
+ * @iova: I/O virtuall address
+ * @buf: buffer for read/write access
+ * @len: buffer length
+ * @write: true for write, false for read
+ *
+ * Read/write function for device IOVA access. Since vfio_dma_rw() may fail
+ * at iommufd vfio compatiable mode, we need runtime check what uAPI it is
+ * using and use corresponding access method for IOVA access.
+ *
+ * Return 0 for success, negative value for failure.
+ */
+static int ice_vfio_dma_rw(void *data, dma_addr_t iova,
+ void *buf, size_t len, bool write)
+{
+ struct ice_vfio_pci_core_device *ice_vdev =
+ (struct ice_vfio_pci_core_device *)data;
+ struct vfio_device *vdev = &ice_vdev->core_device.vdev;
+ unsigned int flags = 0;
+
+ if (vfio_device_has_container(vdev))
+ return vfio_dma_rw(vdev, iova, buf, len, write);
+
+ if (!current->mm)
+ flags |= IOMMUFD_ACCESS_RW_KTHREAD;
+ if (write)
+ flags |= IOMMUFD_ACCESS_RW_WRITE;
+ return iommufd_access_rw(ice_vdev->user, iova, buf, len, flags);
+}
+
+/**
+ * ice_vfio_pci_load_state_init - VFIO device state reloading initialization
+ * @ice_vdev: pointer to ice vfio pci core device structure
+ *
+ * Initialization procedure before loading device state.
+ *
+ * Return 0 for success, negative value for failure.
+ */
+static int
+ice_vfio_pci_load_state_init(struct ice_vfio_pci_core_device *ice_vdev)
+{
+ struct device *dev = &ice_vdev->core_device.pdev->dev;
+ struct iommufd_access *user;
+ int pt_id = 0;
+ int ret;
+
+ if (vfio_device_has_container(&ice_vdev->core_device.vdev))
+ return 0;
+
+ user = iommufd_access_create(ice_vdev->ictx, &ice_vfio_user_ops,
+ ice_vdev, &pt_id);
+ if (IS_ERR(user)) {
+ ret = PTR_ERR(user);
+ dev_err(dev, "iommufd_access_create() return %d", ret);
+ return ret;
+ }
+
+ ret = iommufd_access_attach(user, ice_vdev->pt_id);
+ if (ret) {
+ dev_err(dev, "iommufd_access_attach() return %d", ret);
+ iommufd_access_destroy(user);
+ return ret;
+ }
+
+ ice_vdev->user = user;
+ return 0;
+}
+
+/**
+ * ice_vfio_pci_load_state_exit - VFIO device state reloading exit
+ * @ice_vdev: pointer to ice vfio pci core device structure
+ *
+ * Exit procedure after loading device state.
+ */
+static void
+ice_vfio_pci_load_state_exit(struct ice_vfio_pci_core_device *ice_vdev)
+{
+ if (vfio_device_has_container(&ice_vdev->core_device.vdev))
+ return;
+
+ iommufd_access_destroy(ice_vdev->user);
+}
+
/**
* ice_vfio_pci_load_state - VFIO device state reloading
* @ice_vdev: pointer to ice vfio pci core device structure
@@ -192,12 +305,19 @@ static int __must_check
ice_vfio_pci_load_state(struct ice_vfio_pci_core_device *ice_vdev)
{
struct ice_vfio_pci_migration_file *migf = ice_vdev->resuming_migf;
+ int ret;
+ ret = ice_vfio_pci_load_state_init(ice_vdev);
+ if (ret)
+ return ret;
ice_vfio_pci_load_regs(ice_vdev, &migf->mig_data.regs);
- return ice_migration_restore_devstate(ice_vdev->vf_handle,
- migf->mig_data.dev_state,
- SZ_128K,
- &ice_vdev->core_device.vdev);
+ ret = ice_migration_restore_devstate(ice_vdev->vf_handle,
+ migf->mig_data.dev_state,
+ SZ_128K,
+ ice_vfio_dma_rw, ice_vdev);
+ ice_vfio_pci_load_state_exit(ice_vdev);
+
+ return ret;
}
/**
@@ -744,6 +864,43 @@ static int ice_vfio_pci_core_init_dev(struct vfio_device *core_vdev)
return vfio_pci_core_init_dev(core_vdev);
}
+static int ice_vfio_pci_attach_ioas(struct vfio_device *core_vdev, u32 *pt_id)
+{
+ struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
+ struct ice_vfio_pci_core_device, core_device.vdev);
+
+ ice_vdev->pt_id = *pt_id;
+ return vfio_iommufd_physical_attach_ioas(core_vdev, pt_id);
+}
+
+static int ice_vfio_pci_bind(struct vfio_device *core_vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+ struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
+ struct ice_vfio_pci_core_device, core_device.vdev);
+ int ret;
+
+ ice_vdev->ictx = ictx;
+ iommufd_ctx_get(ictx);
+
+ ret = vfio_iommufd_physical_bind(core_vdev, ictx, out_device_id);
+ if (ret)
+ iommufd_ctx_put(ictx);
+
+ return ret;
+}
+
+static void ice_vfio_pci_unbind(struct vfio_device *core_vdev)
+{
+ struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev,
+ struct ice_vfio_pci_core_device, core_device.vdev);
+
+ vfio_iommufd_physical_unbind(core_vdev);
+
+ iommufd_ctx_put(ice_vdev->ictx);
+ ice_vdev->ictx = NULL;
+}
+
static const struct vfio_device_ops ice_vfio_pci_ops = {
.name = "ice-vfio-pci",
.init = ice_vfio_pci_core_init_dev,
@@ -757,9 +914,9 @@ static const struct vfio_device_ops ice_vfio_pci_ops = {
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
- .bind_iommufd = vfio_iommufd_physical_bind,
- .unbind_iommufd = vfio_iommufd_physical_unbind,
- .attach_ioas = vfio_iommufd_physical_attach_ioas,
+ .bind_iommufd = ice_vfio_pci_bind,
+ .unbind_iommufd = ice_vfio_pci_unbind,
+ .attach_ioas = ice_vfio_pci_attach_ioas,
};
/**
diff --git a/include/linux/net/intel/ice_migration.h b/include/linux/net/intel/ice_migration.h
index 45c3469df55d..f97ed6940afd 100644
--- a/include/linux/net/intel/ice_migration.h
+++ b/include/linux/net/intel/ice_migration.h
@@ -7,6 +7,8 @@
#if IS_ENABLED(CONFIG_ICE_VFIO_PCI)
+typedef int (*dma_rw_handler_t)(void *data, dma_addr_t iova, void *buf,
+ size_t len, bool write);
#define IAVF_QRX_TAIL_MAX 256
#define QTX_HEAD_RESTORE_DELAY_MAX 100
#define QTX_HEAD_RESTORE_DELAY_SLEEP_US_MIN 10
@@ -19,7 +21,7 @@ void ice_migration_uninit_vf(void *opaque);
int ice_migration_suspend_vf(void *opaque, bool mig_dst);
int ice_migration_save_devstate(void *opaque, u8 *buf, u64 buf_sz);
int ice_migration_restore_devstate(void *opaque, const u8 *buf, u64 buf_sz,
- struct vfio_device *vdev);
+ dma_rw_handler_t handler, void *data);
#else
static inline void *ice_migration_get_vf(struct pci_dev *vf_pdev)
--
2.25.1
_______________________________________________
Intel-wired-lan mailing list
Intel-wired-lan@osuosl.org
https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
next prev parent reply other threads:[~2023-06-20 10:02 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-20 9:59 [Intel-wired-lan] [PATCH iwl-next V1 00/15] Add E800 live migration driver Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 01/15] ice: Fix missing legacy 32byte RXDID in the supported bitmap Lingyu Liu
2023-06-20 11:05 ` Przemek Kitszel
2023-06-21 9:16 ` Liu, Lingyu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 02/15] ice: add function to get rxq context Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 03/15] ice: check VF migration status before sending messages to VF Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 04/15] ice: add migration init field and helper functions Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 05/15] ice: save VF messages as device state Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 06/15] ice: save and restore " Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 07/15] ice: do not notify VF link state during migration Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 08/15] ice: change VSI id in virtual channel message after migration Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 09/15] ice: save and restore RX queue head Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 10/15] ice: save and restore TX " Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 11/15] ice: stop device before saving device states Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 12/15] ice: mask VF advanced capabilities if live migration is activated Lingyu Liu
2023-06-20 9:59 ` [Intel-wired-lan] [PATCH iwl-next V1 13/15] vfio/ice: implement vfio_pci driver for E800 devices Lingyu Liu
2023-06-20 10:00 ` [Intel-wired-lan] [PATCH iwl-next V1 14/15] vfio: Expose vfio_device_has_container() Lingyu Liu
2023-06-20 10:00 ` Lingyu Liu [this message]
2023-06-20 11:08 ` [Intel-wired-lan] [PATCH iwl-next V1 00/15] Add E800 live migration driver Paul Menzel
2023-06-27 9:06 ` Liu, Lingyu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230620100001.5331-16-lingyu.liu@intel.com \
--to=lingyu.liu@intel.com \
--cc=intel-wired-lan@lists.osuosl.org \
--cc=kevin.tian@intel.com \
--cc=phani.r.burra@intel.com \
--cc=yi.l.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox