* [PATCH v5 0/5] bugfix some driver issues
@ 2025-03-13 7:20 Longfang Liu
2025-03-13 7:20 ` [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error Longfang Liu
` (4 more replies)
0 siblings, 5 replies; 10+ messages in thread
From: Longfang Liu @ 2025-03-13 7:20 UTC (permalink / raw)
To: alex.williamson, jgg, shameerali.kolothum.thodi, jonathan.cameron
Cc: kvm, linux-kernel, linuxarm, liulongfang
As the test scenarios for the live migration function become
more and more extensive. Some previously undiscovered driver
issues were found.
Update and fix through this patchset.
Change v4 -> v5
Update version matching strategy
Change v3 -> v4
Modify version matching scheme
Change v2 -> v3
Modify the magic digital field segment
Change v1 -> v2
Add fixes line for patch comment
Longfang Liu (5):
hisi_acc_vfio_pci: fix XQE dma address error
hisi_acc_vfio_pci: add eq and aeq interruption restore
hisi_acc_vfio_pci: bugfix cache write-back issue
hisi_acc_vfio_pci: bugfix the problem of uninstalling driver
hisi_acc_vfio_pci: bugfix live migration function without VF device
driver
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 97 +++++++++++++++----
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 14 ++-
2 files changed, 89 insertions(+), 22 deletions(-)
--
2.24.0
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error
2025-03-13 7:20 [PATCH v5 0/5] bugfix some driver issues Longfang Liu
@ 2025-03-13 7:20 ` Longfang Liu
2025-03-14 8:01 ` Shameerali Kolothum Thodi
2025-03-13 7:20 ` [PATCH v5 2/5] hisi_acc_vfio_pci: add eq and aeq interruption restore Longfang Liu
` (3 subsequent siblings)
4 siblings, 1 reply; 10+ messages in thread
From: Longfang Liu @ 2025-03-13 7:20 UTC (permalink / raw)
To: alex.williamson, jgg, shameerali.kolothum.thodi, jonathan.cameron
Cc: kvm, linux-kernel, linuxarm, liulongfang
The dma addresses of EQE and AEQE are wrong after migration and
results in guest kernel-mode encryption services failure.
Comparing the definition of hardware registers, we found that
there was an error when the data read from the register was
combined into an address. Therefore, the address combination
sequence needs to be corrected.
Even after fixing the above problem, we still have an issue
where the Guest from an old kernel can get migrated to
new kernel and may result in wrong data.
In order to ensure that the address is correct after migration,
if an old magic number is detected, the dma address needs to be
updated.
Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
---
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 41 ++++++++++++++++---
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 14 ++++++-
2 files changed, 47 insertions(+), 8 deletions(-)
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 451c639299eb..304dbdfa0e95 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -350,6 +350,32 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
}
+static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev)
+{
+ switch (vf_data->acc_magic) {
+ case ACC_DEV_MAGIC_V2:
+ if (vf_data->major_ver != ACC_DRV_MAJOR_VER) {
+ dev_info(dev, "migration driver version<%u.%u> not match!\n",
+ vf_data->major_ver, vf_data->minor_ver);
+ return -EINVAL;
+ }
+ break;
+ case ACC_DEV_MAGIC_V1:
+ /* Correct dma address */
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
+ vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
+ vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
struct hisi_acc_vf_migration_file *migf)
{
@@ -363,7 +389,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
return 0;
- if (vf_data->acc_magic != ACC_DEV_MAGIC) {
+ ret = vf_qm_version_check(vf_data, dev);
+ if (ret) {
dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
return -EINVAL;
}
@@ -418,7 +445,9 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
int vf_id = hisi_acc_vdev->vf_id;
int ret;
- vf_data->acc_magic = ACC_DEV_MAGIC;
+ vf_data->acc_magic = ACC_DEV_MAGIC_V2;
+ vf_data->major_ver = ACC_DRV_MAJOR_VER;
+ vf_data->minor_ver = ACC_DRV_MINOR_VER;
/* Save device id */
vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
@@ -496,12 +525,12 @@ static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)
return -EINVAL;
/* Every reg is 32 bit, the dma address is 64 bit. */
- vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
- vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
- vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
- vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
/* Through SQC_BT/CQC_BT to get sqc and cqc address */
ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 245d7537b2bc..91002ceeebc1 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -39,6 +39,9 @@
#define QM_REG_ADDR_OFFSET 0x0004
#define QM_XQC_ADDR_OFFSET 32U
+#define QM_XQC_ADDR_LOW 0x1
+#define QM_XQC_ADDR_HIGH 0x2
+
#define QM_VF_AEQ_INT_MASK 0x0004
#define QM_VF_EQ_INT_MASK 0x000c
#define QM_IFC_INT_SOURCE_V 0x0020
@@ -50,10 +53,15 @@
#define QM_EQC_DW0 0X8000
#define QM_AEQC_DW0 0X8020
+#define ACC_DRV_MAJOR_VER 1
+#define ACC_DRV_MINOR_VER 0
+
+#define ACC_DEV_MAGIC_V1 0XCDCDCDCDFEEDAACC
+#define ACC_DEV_MAGIC_V2 0xAACCFEEDDECADEDE
+
struct acc_vf_data {
#define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
/* QM match information */
-#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC
u64 acc_magic;
u32 qp_num;
u32 dev_id;
@@ -61,7 +69,9 @@ struct acc_vf_data {
u32 qp_base;
u32 vf_qm_state;
/* QM reserved match information */
- u32 qm_rsv_state[3];
+ u16 major_ver;
+ u16 minor_ver;
+ u32 qm_rsv_state[2];
/* QM RW regs */
u32 aeq_int_mask;
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v5 2/5] hisi_acc_vfio_pci: add eq and aeq interruption restore
2025-03-13 7:20 [PATCH v5 0/5] bugfix some driver issues Longfang Liu
2025-03-13 7:20 ` [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error Longfang Liu
@ 2025-03-13 7:20 ` Longfang Liu
2025-03-13 7:20 ` [PATCH v5 3/5] hisi_acc_vfio_pci: bugfix cache write-back issue Longfang Liu
` (2 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Longfang Liu @ 2025-03-13 7:20 UTC (permalink / raw)
To: alex.williamson, jgg, shameerali.kolothum.thodi, jonathan.cameron
Cc: kvm, linux-kernel, linuxarm, liulongfang
In order to ensure that the task packets of the accelerator
device are not lost during the migration process, it is necessary
to send an EQ and AEQ command to the device after the live migration
is completed and to update the completion position of the task queue.
Let the device recheck the completed tasks data and if there are
uncollected packets, device resend a task completion interrupt
to the software.
Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 304dbdfa0e95..80217aea5475 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -470,6 +470,19 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return 0;
}
+static void vf_qm_xeqc_save(struct hisi_qm *qm,
+ struct hisi_acc_vf_migration_file *migf)
+{
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ u16 eq_head, aeq_head;
+
+ eq_head = vf_data->qm_eqc_dw[0] & 0xFFFF;
+ qm_db(qm, 0, QM_DOORBELL_CMD_EQ, eq_head, 0);
+
+ aeq_head = vf_data->qm_aeqc_dw[0] & 0xFFFF;
+ qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, aeq_head, 0);
+}
+
static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
struct hisi_acc_vf_migration_file *migf)
{
@@ -578,6 +591,9 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return -EINVAL;
migf->total_length = sizeof(struct acc_vf_data);
+ /* Save eqc and aeqc interrupt information */
+ vf_qm_xeqc_save(vf_qm, migf);
+
return 0;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v5 3/5] hisi_acc_vfio_pci: bugfix cache write-back issue
2025-03-13 7:20 [PATCH v5 0/5] bugfix some driver issues Longfang Liu
2025-03-13 7:20 ` [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error Longfang Liu
2025-03-13 7:20 ` [PATCH v5 2/5] hisi_acc_vfio_pci: add eq and aeq interruption restore Longfang Liu
@ 2025-03-13 7:20 ` Longfang Liu
2025-03-13 7:20 ` [PATCH v5 4/5] hisi_acc_vfio_pci: bugfix the problem of uninstalling driver Longfang Liu
2025-03-13 7:20 ` [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function without VF device driver Longfang Liu
4 siblings, 0 replies; 10+ messages in thread
From: Longfang Liu @ 2025-03-13 7:20 UTC (permalink / raw)
To: alex.williamson, jgg, shameerali.kolothum.thodi, jonathan.cameron
Cc: kvm, linux-kernel, linuxarm, liulongfang
At present, cache write-back is placed in the device data
copy stage after stopping the device operation.
Writing back to the cache at this stage will cause the data
obtained by the cache to be written back to be empty.
In order to ensure that the cache data is written back
successfully, the data needs to be written back into the
stop device stage.
Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 80217aea5475..d96446f499ed 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -566,7 +566,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
{
struct acc_vf_data *vf_data = &migf->vf_data;
struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
- struct device *dev = &vf_qm->pdev->dev;
int ret;
if (unlikely(qm_wait_dev_not_ready(vf_qm))) {
@@ -580,12 +579,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
vf_data->vf_qm_state = QM_READY;
hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
- ret = vf_qm_cache_wb(vf_qm);
- if (ret) {
- dev_err(dev, "failed to writeback QM Cache!\n");
- return ret;
- }
-
ret = vf_qm_read_data(vf_qm, vf_data);
if (ret)
return -EINVAL;
@@ -1012,6 +1005,13 @@ static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev
dev_err(dev, "failed to check QM INT state!\n");
return ret;
}
+
+ ret = vf_qm_cache_wb(vf_qm);
+ if (ret) {
+ dev_err(dev, "failed to writeback QM cache!\n");
+ return ret;
+ }
+
return 0;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v5 4/5] hisi_acc_vfio_pci: bugfix the problem of uninstalling driver
2025-03-13 7:20 [PATCH v5 0/5] bugfix some driver issues Longfang Liu
` (2 preceding siblings ...)
2025-03-13 7:20 ` [PATCH v5 3/5] hisi_acc_vfio_pci: bugfix cache write-back issue Longfang Liu
@ 2025-03-13 7:20 ` Longfang Liu
2025-03-13 7:20 ` [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function without VF device driver Longfang Liu
4 siblings, 0 replies; 10+ messages in thread
From: Longfang Liu @ 2025-03-13 7:20 UTC (permalink / raw)
To: alex.williamson, jgg, shameerali.kolothum.thodi, jonathan.cameron
Cc: kvm, linux-kernel, linuxarm, liulongfang
In a live migration scenario. If the number of VFs at the
destination is greater than the source, the recovery operation
will fail and qemu will not be able to complete the process and
exit after shutting down the device FD.
This will cause the driver to be unable to be unloaded normally due
to abnormal reference counting of the live migration driver caused
by the abnormal closing operation of fd.
Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index d96446f499ed..cadc82419dca 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -1508,6 +1508,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
mutex_lock(&hisi_acc_vdev->open_mutex);
hisi_acc_vdev->dev_opened = false;
iounmap(vf_qm->io_base);
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function without VF device driver
2025-03-13 7:20 [PATCH v5 0/5] bugfix some driver issues Longfang Liu
` (3 preceding siblings ...)
2025-03-13 7:20 ` [PATCH v5 4/5] hisi_acc_vfio_pci: bugfix the problem of uninstalling driver Longfang Liu
@ 2025-03-13 7:20 ` Longfang Liu
2025-03-14 8:11 ` Shameerali Kolothum Thodi
4 siblings, 1 reply; 10+ messages in thread
From: Longfang Liu @ 2025-03-13 7:20 UTC (permalink / raw)
To: alex.williamson, jgg, shameerali.kolothum.thodi, jonathan.cameron
Cc: kvm, linux-kernel, linuxarm, liulongfang
If the VF device driver is not loaded in the Guest OS and we attempt to
perform device data migration, the address of the migrated data will
be NULL.
The live migration recovery operation on the destination side will
access a null address value, which will cause access errors.
Therefore, live migration of VMs without added VF device drivers
does not require device data migration.
In addition, when the queue address data obtained by the destination
is empty, device queue recovery processing will not be performed.
Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
---
.../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 25 +++++++++++++------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index cadc82419dca..44fa2d16bbcc 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -426,13 +426,6 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return -EINVAL;
}
- ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
- if (ret) {
- dev_err(dev, "failed to write QM_VF_STATE\n");
- return ret;
- }
-
- hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
hisi_acc_vdev->match_done = true;
return 0;
}
@@ -498,6 +491,13 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
if (migf->total_length < sizeof(struct acc_vf_data))
return -EINVAL;
+ ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_STATE\n");
+ return -EINVAL;
+ }
+ hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+
qm->eqe_dma = vf_data->eqe_dma;
qm->aeqe_dma = vf_data->aeqe_dma;
qm->sqc_dma = vf_data->sqc_dma;
@@ -506,6 +506,12 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
qm->qp_base = vf_data->qp_base;
qm->qp_num = vf_data->qp_num;
+ if (!vf_data->eqe_dma || !vf_data->aeqe_dma ||
+ !vf_data->sqc_dma || !vf_data->cqc_dma) {
+ dev_err(dev, "resume dma addr is NULL!\n");
+ return -EINVAL;
+ }
+
ret = qm_set_regs(qm, vf_data);
if (ret) {
dev_err(dev, "set VF regs failed\n");
@@ -726,8 +732,12 @@ static int hisi_acc_vf_load_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
{
struct device *dev = &hisi_acc_vdev->vf_dev->dev;
struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->resuming_migf;
+ struct acc_vf_data *vf_data = &migf->vf_data;
int ret;
+ if (vf_data->vf_qm_state != QM_READY)
+ return 0;
+
/* Recover data to VF */
ret = vf_qm_load_data(hisi_acc_vdev, migf);
if (ret) {
@@ -1531,6 +1541,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1;
hisi_acc_vdev->pf_qm = pf_qm;
hisi_acc_vdev->vf_dev = pdev;
+ hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
mutex_init(&hisi_acc_vdev->state_mutex);
mutex_init(&hisi_acc_vdev->open_mutex);
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* RE: [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error
2025-03-13 7:20 ` [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error Longfang Liu
@ 2025-03-14 8:01 ` Shameerali Kolothum Thodi
2025-03-17 8:21 ` liulongfang
0 siblings, 1 reply; 10+ messages in thread
From: Shameerali Kolothum Thodi @ 2025-03-14 8:01 UTC (permalink / raw)
To: liulongfang, alex.williamson@redhat.com, jgg@nvidia.com,
Jonathan Cameron
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
linuxarm@openeuler.org
> -----Original Message-----
> From: liulongfang <liulongfang@huawei.com>
> Sent: Thursday, March 13, 2025 7:20 AM
> To: alex.williamson@redhat.com; jgg@nvidia.com; Shameerali Kolothum
> Thodi <shameerali.kolothum.thodi@huawei.com>; Jonathan Cameron
> <jonathan.cameron@huawei.com>
> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org;
> linuxarm@openeuler.org; liulongfang <liulongfang@huawei.com>
> Subject: [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error
>
> The dma addresses of EQE and AEQE are wrong after migration and
> results in guest kernel-mode encryption services failure.
> Comparing the definition of hardware registers, we found that
> there was an error when the data read from the register was
> combined into an address. Therefore, the address combination
> sequence needs to be corrected.
>
> Even after fixing the above problem, we still have an issue
> where the Guest from an old kernel can get migrated to
> new kernel and may result in wrong data.
>
> In order to ensure that the address is correct after migration,
> if an old magic number is detected, the dma address needs to be
> updated.
>
> Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live
> migration")
> Signed-off-by: Longfang Liu <liulongfang@huawei.com>
LGTM,
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Thanks,
Shameer
> ---
> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 41 ++++++++++++++++---
> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 14 ++++++-
> 2 files changed, 47 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> index 451c639299eb..304dbdfa0e95 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> @@ -350,6 +350,32 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
> return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
> }
>
> +static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device
> *dev)
> +{
> + switch (vf_data->acc_magic) {
> + case ACC_DEV_MAGIC_V2:
> + if (vf_data->major_ver != ACC_DRV_MAJOR_VER) {
> + dev_info(dev, "migration driver version<%u.%u> not
> match!\n",
> + vf_data->major_ver, vf_data->minor_ver);
> + return -EINVAL;
> + }
> + break;
> + case ACC_DEV_MAGIC_V1:
> + /* Correct dma address */
> + vf_data->eqe_dma = vf_data-
> >qm_eqc_dw[QM_XQC_ADDR_HIGH];
> + vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> + vf_data->eqe_dma |= vf_data-
> >qm_eqc_dw[QM_XQC_ADDR_LOW];
> + vf_data->aeqe_dma = vf_data-
> >qm_aeqc_dw[QM_XQC_ADDR_HIGH];
> + vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> + vf_data->aeqe_dma |= vf_data-
> >qm_aeqc_dw[QM_XQC_ADDR_LOW];
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> static int vf_qm_check_match(struct hisi_acc_vf_core_device
> *hisi_acc_vdev,
> struct hisi_acc_vf_migration_file *migf)
> {
> @@ -363,7 +389,8 @@ static int vf_qm_check_match(struct
> hisi_acc_vf_core_device *hisi_acc_vdev,
> if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev-
> >match_done)
> return 0;
>
> - if (vf_data->acc_magic != ACC_DEV_MAGIC) {
> + ret = vf_qm_version_check(vf_data, dev);
> + if (ret) {
> dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
> return -EINVAL;
> }
> @@ -418,7 +445,9 @@ static int vf_qm_get_match_data(struct
> hisi_acc_vf_core_device *hisi_acc_vdev,
> int vf_id = hisi_acc_vdev->vf_id;
> int ret;
>
> - vf_data->acc_magic = ACC_DEV_MAGIC;
> + vf_data->acc_magic = ACC_DEV_MAGIC_V2;
> + vf_data->major_ver = ACC_DRV_MAJOR_VER;
> + vf_data->minor_ver = ACC_DRV_MINOR_VER;
> /* Save device id */
> vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
>
> @@ -496,12 +525,12 @@ static int vf_qm_read_data(struct hisi_qm
> *vf_qm, struct acc_vf_data *vf_data)
> return -EINVAL;
>
> /* Every reg is 32 bit, the dma address is 64 bit. */
> - vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
> + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
> vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> - vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
> - vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
> + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
> + vf_data->aeqe_dma = vf_data-
> >qm_aeqc_dw[QM_XQC_ADDR_HIGH];
> vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> - vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
> + vf_data->aeqe_dma |= vf_data-
> >qm_aeqc_dw[QM_XQC_ADDR_LOW];
>
> /* Through SQC_BT/CQC_BT to get sqc and cqc address */
> ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> index 245d7537b2bc..91002ceeebc1 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> @@ -39,6 +39,9 @@
> #define QM_REG_ADDR_OFFSET 0x0004
>
> #define QM_XQC_ADDR_OFFSET 32U
> +#define QM_XQC_ADDR_LOW 0x1
> +#define QM_XQC_ADDR_HIGH 0x2
> +
> #define QM_VF_AEQ_INT_MASK 0x0004
> #define QM_VF_EQ_INT_MASK 0x000c
> #define QM_IFC_INT_SOURCE_V 0x0020
> @@ -50,10 +53,15 @@
> #define QM_EQC_DW0 0X8000
> #define QM_AEQC_DW0 0X8020
>
> +#define ACC_DRV_MAJOR_VER 1
> +#define ACC_DRV_MINOR_VER 0
> +
> +#define ACC_DEV_MAGIC_V1 0XCDCDCDCDFEEDAACC
> +#define ACC_DEV_MAGIC_V2 0xAACCFEEDDECADEDE
> +
> struct acc_vf_data {
> #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
> /* QM match information */
> -#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC
> u64 acc_magic;
> u32 qp_num;
> u32 dev_id;
> @@ -61,7 +69,9 @@ struct acc_vf_data {
> u32 qp_base;
> u32 vf_qm_state;
> /* QM reserved match information */
> - u32 qm_rsv_state[3];
> + u16 major_ver;
> + u16 minor_ver;
> + u32 qm_rsv_state[2];
>
> /* QM RW regs */
> u32 aeq_int_mask;
> --
> 2.24.0
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function without VF device driver
2025-03-13 7:20 ` [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function without VF device driver Longfang Liu
@ 2025-03-14 8:11 ` Shameerali Kolothum Thodi
2025-03-18 6:20 ` liulongfang
0 siblings, 1 reply; 10+ messages in thread
From: Shameerali Kolothum Thodi @ 2025-03-14 8:11 UTC (permalink / raw)
To: liulongfang, alex.williamson@redhat.com, jgg@nvidia.com,
Jonathan Cameron
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
linuxarm@openeuler.org
> -----Original Message-----
> From: liulongfang <liulongfang@huawei.com>
> Sent: Thursday, March 13, 2025 7:20 AM
> To: alex.williamson@redhat.com; jgg@nvidia.com; Shameerali Kolothum
> Thodi <shameerali.kolothum.thodi@huawei.com>; Jonathan Cameron
> <jonathan.cameron@huawei.com>
> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org;
> linuxarm@openeuler.org; liulongfang <liulongfang@huawei.com>
> Subject: [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function
> without VF device driver
>
> If the VF device driver is not loaded in the Guest OS and we attempt to
> perform device data migration, the address of the migrated data will
> be NULL.
> The live migration recovery operation on the destination side will
> access a null address value, which will cause access errors.
>
> Therefore, live migration of VMs without added VF device drivers
> does not require device data migration.
> In addition, when the queue address data obtained by the destination
> is empty, device queue recovery processing will not be performed.
>
> Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live
> migration")
> Signed-off-by: Longfang Liu <liulongfang@huawei.com>
> ---
> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 25 +++++++++++++------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> index cadc82419dca..44fa2d16bbcc 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> @@ -426,13 +426,6 @@ static int vf_qm_check_match(struct
> hisi_acc_vf_core_device *hisi_acc_vdev,
> return -EINVAL;
> }
>
> - ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state,
> 1);
> - if (ret) {
> - dev_err(dev, "failed to write QM_VF_STATE\n");
> - return ret;
> - }
> -
> - hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
> hisi_acc_vdev->match_done = true;
> return 0;
> }
> @@ -498,6 +491,13 @@ static int vf_qm_load_data(struct
> hisi_acc_vf_core_device *hisi_acc_vdev,
> if (migf->total_length < sizeof(struct acc_vf_data))
> return -EINVAL;
>
> + ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
> + if (ret) {
> + dev_err(dev, "failed to write QM_VF_STATE\n");
> + return -EINVAL;
> + }
> + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
> +
> qm->eqe_dma = vf_data->eqe_dma;
> qm->aeqe_dma = vf_data->aeqe_dma;
> qm->sqc_dma = vf_data->sqc_dma;
> @@ -506,6 +506,12 @@ static int vf_qm_load_data(struct
> hisi_acc_vf_core_device *hisi_acc_vdev,
> qm->qp_base = vf_data->qp_base;
> qm->qp_num = vf_data->qp_num;
>
> + if (!vf_data->eqe_dma || !vf_data->aeqe_dma ||
> + !vf_data->sqc_dma || !vf_data->cqc_dma) {
> + dev_err(dev, "resume dma addr is NULL!\n");
> + return -EINVAL;
> + }
> +
> ret = qm_set_regs(qm, vf_data);
> if (ret) {
> dev_err(dev, "set VF regs failed\n");
> @@ -726,8 +732,12 @@ static int hisi_acc_vf_load_state(struct
> hisi_acc_vf_core_device *hisi_acc_vdev)
> {
> struct device *dev = &hisi_acc_vdev->vf_dev->dev;
> struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev-
> >resuming_migf;
> + struct acc_vf_data *vf_data = &migf->vf_data;
> int ret;
>
> + if (vf_data->vf_qm_state != QM_READY)
> + return 0;
I don't think we need to check the above. In vf_qm_satte_save(),
If vf_qm_state != QM_READY, we set the
migf->total_length = QM_MATCH_SIZE.
Hence it will return 0 in the below vf_qm_load_data() anyway.
With that corrected,
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Thanks,
Shameer
> +
> /* Recover data to VF */
> ret = vf_qm_load_data(hisi_acc_vdev, migf);
> if (ret) {
> @@ -1531,6 +1541,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct
> vfio_device *core_vdev)
> hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1;
> hisi_acc_vdev->pf_qm = pf_qm;
> hisi_acc_vdev->vf_dev = pdev;
> + hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
> mutex_init(&hisi_acc_vdev->state_mutex);
> mutex_init(&hisi_acc_vdev->open_mutex);
>
> --
> 2.24.0
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error
2025-03-14 8:01 ` Shameerali Kolothum Thodi
@ 2025-03-17 8:21 ` liulongfang
0 siblings, 0 replies; 10+ messages in thread
From: liulongfang @ 2025-03-17 8:21 UTC (permalink / raw)
To: Shameerali Kolothum Thodi, alex.williamson@redhat.com,
jgg@nvidia.com, Jonathan Cameron
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
linuxarm@openeuler.org
On 2025/3/14 16:01, Shameerali Kolothum Thodi wrote:
>
>
>> -----Original Message-----
>> From: liulongfang <liulongfang@huawei.com>
>> Sent: Thursday, March 13, 2025 7:20 AM
>> To: alex.williamson@redhat.com; jgg@nvidia.com; Shameerali Kolothum
>> Thodi <shameerali.kolothum.thodi@huawei.com>; Jonathan Cameron
>> <jonathan.cameron@huawei.com>
>> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org;
>> linuxarm@openeuler.org; liulongfang <liulongfang@huawei.com>
>> Subject: [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error
>>
>> The dma addresses of EQE and AEQE are wrong after migration and
>> results in guest kernel-mode encryption services failure.
>> Comparing the definition of hardware registers, we found that
>> there was an error when the data read from the register was
>> combined into an address. Therefore, the address combination
>> sequence needs to be corrected.
>>
>> Even after fixing the above problem, we still have an issue
>> where the Guest from an old kernel can get migrated to
>> new kernel and may result in wrong data.
>>
>> In order to ensure that the address is correct after migration,
>> if an old magic number is detected, the dma address needs to be
>> updated.
>>
>> Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live
>> migration")
>> Signed-off-by: Longfang Liu <liulongfang@huawei.com>
>
> LGTM,
>
> Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
>
> Thanks,
> Shameer
>
Thanks.
Longfang.
>> ---
>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 41 ++++++++++++++++---
>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 14 ++++++-
>> 2 files changed, 47 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> index 451c639299eb..304dbdfa0e95 100644
>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> @@ -350,6 +350,32 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
>> return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
>> }
>>
>> +static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device
>> *dev)
>> +{
>> + switch (vf_data->acc_magic) {
>> + case ACC_DEV_MAGIC_V2:
>> + if (vf_data->major_ver != ACC_DRV_MAJOR_VER) {
>> + dev_info(dev, "migration driver version<%u.%u> not
>> match!\n",
>> + vf_data->major_ver, vf_data->minor_ver);
>> + return -EINVAL;
>> + }
>> + break;
>> + case ACC_DEV_MAGIC_V1:
>> + /* Correct dma address */
>> + vf_data->eqe_dma = vf_data-
>>> qm_eqc_dw[QM_XQC_ADDR_HIGH];
>> + vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
>> + vf_data->eqe_dma |= vf_data-
>>> qm_eqc_dw[QM_XQC_ADDR_LOW];
>> + vf_data->aeqe_dma = vf_data-
>>> qm_aeqc_dw[QM_XQC_ADDR_HIGH];
>> + vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
>> + vf_data->aeqe_dma |= vf_data-
>>> qm_aeqc_dw[QM_XQC_ADDR_LOW];
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int vf_qm_check_match(struct hisi_acc_vf_core_device
>> *hisi_acc_vdev,
>> struct hisi_acc_vf_migration_file *migf)
>> {
>> @@ -363,7 +389,8 @@ static int vf_qm_check_match(struct
>> hisi_acc_vf_core_device *hisi_acc_vdev,
>> if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev-
>>> match_done)
>> return 0;
>>
>> - if (vf_data->acc_magic != ACC_DEV_MAGIC) {
>> + ret = vf_qm_version_check(vf_data, dev);
>> + if (ret) {
>> dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
>> return -EINVAL;
>> }
>> @@ -418,7 +445,9 @@ static int vf_qm_get_match_data(struct
>> hisi_acc_vf_core_device *hisi_acc_vdev,
>> int vf_id = hisi_acc_vdev->vf_id;
>> int ret;
>>
>> - vf_data->acc_magic = ACC_DEV_MAGIC;
>> + vf_data->acc_magic = ACC_DEV_MAGIC_V2;
>> + vf_data->major_ver = ACC_DRV_MAJOR_VER;
>> + vf_data->minor_ver = ACC_DRV_MINOR_VER;
>> /* Save device id */
>> vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
>>
>> @@ -496,12 +525,12 @@ static int vf_qm_read_data(struct hisi_qm
>> *vf_qm, struct acc_vf_data *vf_data)
>> return -EINVAL;
>>
>> /* Every reg is 32 bit, the dma address is 64 bit. */
>> - vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
>> + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
>> vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
>> - vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
>> - vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
>> + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
>> + vf_data->aeqe_dma = vf_data-
>>> qm_aeqc_dw[QM_XQC_ADDR_HIGH];
>> vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
>> - vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
>> + vf_data->aeqe_dma |= vf_data-
>>> qm_aeqc_dw[QM_XQC_ADDR_LOW];
>>
>> /* Through SQC_BT/CQC_BT to get sqc and cqc address */
>> ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> index 245d7537b2bc..91002ceeebc1 100644
>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
>> @@ -39,6 +39,9 @@
>> #define QM_REG_ADDR_OFFSET 0x0004
>>
>> #define QM_XQC_ADDR_OFFSET 32U
>> +#define QM_XQC_ADDR_LOW 0x1
>> +#define QM_XQC_ADDR_HIGH 0x2
>> +
>> #define QM_VF_AEQ_INT_MASK 0x0004
>> #define QM_VF_EQ_INT_MASK 0x000c
>> #define QM_IFC_INT_SOURCE_V 0x0020
>> @@ -50,10 +53,15 @@
>> #define QM_EQC_DW0 0X8000
>> #define QM_AEQC_DW0 0X8020
>>
>> +#define ACC_DRV_MAJOR_VER 1
>> +#define ACC_DRV_MINOR_VER 0
>> +
>> +#define ACC_DEV_MAGIC_V1 0XCDCDCDCDFEEDAACC
>> +#define ACC_DEV_MAGIC_V2 0xAACCFEEDDECADEDE
>> +
>> struct acc_vf_data {
>> #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
>> /* QM match information */
>> -#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC
>> u64 acc_magic;
>> u32 qp_num;
>> u32 dev_id;
>> @@ -61,7 +69,9 @@ struct acc_vf_data {
>> u32 qp_base;
>> u32 vf_qm_state;
>> /* QM reserved match information */
>> - u32 qm_rsv_state[3];
>> + u16 major_ver;
>> + u16 minor_ver;
>> + u32 qm_rsv_state[2];
>>
>> /* QM RW regs */
>> u32 aeq_int_mask;
>> --
>> 2.24.0
>
> .
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function without VF device driver
2025-03-14 8:11 ` Shameerali Kolothum Thodi
@ 2025-03-18 6:20 ` liulongfang
0 siblings, 0 replies; 10+ messages in thread
From: liulongfang @ 2025-03-18 6:20 UTC (permalink / raw)
To: Shameerali Kolothum Thodi, alex.williamson@redhat.com,
jgg@nvidia.com, Jonathan Cameron
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
linuxarm@openeuler.org
On 2025/3/14 16:11, Shameerali Kolothum Thodi wrote:
>
>
>> -----Original Message-----
>> From: liulongfang <liulongfang@huawei.com>
>> Sent: Thursday, March 13, 2025 7:20 AM
>> To: alex.williamson@redhat.com; jgg@nvidia.com; Shameerali Kolothum
>> Thodi <shameerali.kolothum.thodi@huawei.com>; Jonathan Cameron
>> <jonathan.cameron@huawei.com>
>> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org;
>> linuxarm@openeuler.org; liulongfang <liulongfang@huawei.com>
>> Subject: [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function
>> without VF device driver
>>
>> If the VF device driver is not loaded in the Guest OS and we attempt to
>> perform device data migration, the address of the migrated data will
>> be NULL.
>> The live migration recovery operation on the destination side will
>> access a null address value, which will cause access errors.
>>
>> Therefore, live migration of VMs without added VF device drivers
>> does not require device data migration.
>> In addition, when the queue address data obtained by the destination
>> is empty, device queue recovery processing will not be performed.
>>
>> Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live
>> migration")
>> Signed-off-by: Longfang Liu <liulongfang@huawei.com>
>> ---
>> .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 25 +++++++++++++------
>> 1 file changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> index cadc82419dca..44fa2d16bbcc 100644
>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> @@ -426,13 +426,6 @@ static int vf_qm_check_match(struct
>> hisi_acc_vf_core_device *hisi_acc_vdev,
>> return -EINVAL;
>> }
>>
>> - ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state,
>> 1);
>> - if (ret) {
>> - dev_err(dev, "failed to write QM_VF_STATE\n");
>> - return ret;
>> - }
>> -
>> - hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
>> hisi_acc_vdev->match_done = true;
>> return 0;
>> }
>> @@ -498,6 +491,13 @@ static int vf_qm_load_data(struct
>> hisi_acc_vf_core_device *hisi_acc_vdev,
>> if (migf->total_length < sizeof(struct acc_vf_data))
>> return -EINVAL;
>>
>> + ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
>> + if (ret) {
>> + dev_err(dev, "failed to write QM_VF_STATE\n");
>> + return -EINVAL;
>> + }
>> + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
>> +
>> qm->eqe_dma = vf_data->eqe_dma;
>> qm->aeqe_dma = vf_data->aeqe_dma;
>> qm->sqc_dma = vf_data->sqc_dma;
>> @@ -506,6 +506,12 @@ static int vf_qm_load_data(struct
>> hisi_acc_vf_core_device *hisi_acc_vdev,
>> qm->qp_base = vf_data->qp_base;
>> qm->qp_num = vf_data->qp_num;
>>
>> + if (!vf_data->eqe_dma || !vf_data->aeqe_dma ||
>> + !vf_data->sqc_dma || !vf_data->cqc_dma) {
>> + dev_err(dev, "resume dma addr is NULL!\n");
>> + return -EINVAL;
>> + }
>> +
>> ret = qm_set_regs(qm, vf_data);
>> if (ret) {
>> dev_err(dev, "set VF regs failed\n");
>> @@ -726,8 +732,12 @@ static int hisi_acc_vf_load_state(struct
>> hisi_acc_vf_core_device *hisi_acc_vdev)
>> {
>> struct device *dev = &hisi_acc_vdev->vf_dev->dev;
>> struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev-
>>> resuming_migf;
>> + struct acc_vf_data *vf_data = &migf->vf_data;
>> int ret;
>>
>> + if (vf_data->vf_qm_state != QM_READY)
>> + return 0;
>
> I don't think we need to check the above. In vf_qm_satte_save(),
> If vf_qm_state != QM_READY, we set the
> migf->total_length = QM_MATCH_SIZE.
>
> Hence it will return 0 in the below vf_qm_load_data() anyway.
>
After removing this judgment code, the live migration function works normally
without loading the VM driver.
I will remove this judgment in the next version.
Thanks.
Longfang.
> With that corrected,
>
> Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
>
> Thanks,
> Shameer
>
>> +
>> /* Recover data to VF */
>> ret = vf_qm_load_data(hisi_acc_vdev, migf);
>> if (ret) {
>> @@ -1531,6 +1541,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct
>> vfio_device *core_vdev)
>> hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1;
>> hisi_acc_vdev->pf_qm = pf_qm;
>> hisi_acc_vdev->vf_dev = pdev;
>> + hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
>> mutex_init(&hisi_acc_vdev->state_mutex);
>> mutex_init(&hisi_acc_vdev->open_mutex);
>>
>> --
>> 2.24.0
>
> .
>
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-03-18 6:20 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-13 7:20 [PATCH v5 0/5] bugfix some driver issues Longfang Liu
2025-03-13 7:20 ` [PATCH v5 1/5] hisi_acc_vfio_pci: fix XQE dma address error Longfang Liu
2025-03-14 8:01 ` Shameerali Kolothum Thodi
2025-03-17 8:21 ` liulongfang
2025-03-13 7:20 ` [PATCH v5 2/5] hisi_acc_vfio_pci: add eq and aeq interruption restore Longfang Liu
2025-03-13 7:20 ` [PATCH v5 3/5] hisi_acc_vfio_pci: bugfix cache write-back issue Longfang Liu
2025-03-13 7:20 ` [PATCH v5 4/5] hisi_acc_vfio_pci: bugfix the problem of uninstalling driver Longfang Liu
2025-03-13 7:20 ` [PATCH v5 5/5] hisi_acc_vfio_pci: bugfix live migration function without VF device driver Longfang Liu
2025-03-14 8:11 ` Shameerali Kolothum Thodi
2025-03-18 6:20 ` liulongfang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox