* [PATCH for-9.2 v11 01/11] hw/pci: Rename has_power to enabled
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 02/11] hw/ppc/spapr_pci: Do not create DT for disabled PCI device Akihiko Odaki
` (9 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
The renamed state will not only represent powering state of PFs, but
also represent SR-IOV VF enablement in the future.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
include/hw/pci/pci.h | 7 ++++++-
include/hw/pci/pci_device.h | 2 +-
hw/pci/pci.c | 14 +++++++-------
hw/pci/pci_host.c | 4 ++--
4 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index eb26cac81098..fe04b4fafd04 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -678,6 +678,11 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
}
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
-void pci_set_power(PCIDevice *pci_dev, bool state);
+void pci_set_enabled(PCIDevice *pci_dev, bool state);
+
+static inline void pci_set_power(PCIDevice *pci_dev, bool state)
+{
+ pci_set_enabled(pci_dev, state);
+}
#endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 15694f248948..f38fb3111954 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -57,7 +57,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
struct PCIDevice {
DeviceState qdev;
bool partially_hotplugged;
- bool has_power;
+ bool enabled;
/* PCI config space */
uint8_t *config;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index fab86d056721..b532888e8f6c 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1525,7 +1525,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->has_power) {
+ if (!d->enabled) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1613,7 +1613,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
pci_update_irq_disabled(d, was_irq_disabled);
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
+ & PCI_COMMAND_MASTER) && d->enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2884,18 +2884,18 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
return msg;
}
-void pci_set_power(PCIDevice *d, bool state)
+void pci_set_enabled(PCIDevice *d, bool state)
{
- if (d->has_power == state) {
+ if (d->enabled == state) {
return;
}
- d->has_power = state;
+ d->enabled = state;
pci_update_mappings(d);
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
- if (!d->has_power) {
+ & PCI_COMMAND_MASTER) && d->enabled);
+ if (!d->enabled) {
pci_device_reset(d);
}
}
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index dfe6fe618401..0d82727cc9dd 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
* allowing direct removal of unexposed functions.
*/
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
+ !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
return;
}
@@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
* allowing direct removal of unexposed functions.
*/
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
+ !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
return ~0x0;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 02/11] hw/ppc/spapr_pci: Do not create DT for disabled PCI device
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 01/11] hw/pci: Rename has_power to enabled Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 03/11] hw/ppc/spapr_pci: Do not reject VFs created after a PF Akihiko Odaki
` (8 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
Disabled means it is a disabled SR-IOV VF or it is powered off, and
hidden from the guest.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
hw/ppc/spapr_pci.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 7cf9904c3546..f63182a03c41 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1296,6 +1296,10 @@ static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
return;
}
+ if (!pdev->enabled) {
+ return;
+ }
+
err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset);
if (err < 0) {
p->err = err;
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 03/11] hw/ppc/spapr_pci: Do not reject VFs created after a PF
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 01/11] hw/pci: Rename has_power to enabled Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 02/11] hw/ppc/spapr_pci: Do not create DT for disabled PCI device Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 04/11] pcie_sriov: Do not manually unrealize Akihiko Odaki
` (7 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
A PF may automatically create VFs and the PF may be function 0.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
hw/ppc/spapr_pci.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index f63182a03c41..ed4454bbf79e 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1573,7 +1573,9 @@ static void spapr_pci_pre_plug(HotplugHandler *plug_handler,
* hotplug, we do not allow functions to be hotplugged to a
* slot that already has function 0 present
*/
- if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
+ if (plugged_dev->hotplugged &&
+ !pci_is_vf(pdev) &&
+ bus->devices[PCI_DEVFN(slotnr, 0)] &&
PCI_FUNC(pdev->devfn) != 0) {
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
" additional functions can no longer be exposed to guest.",
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 04/11] pcie_sriov: Do not manually unrealize
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
` (2 preceding siblings ...)
2024-08-02 5:17 ` [PATCH for-9.2 v11 03/11] hw/ppc/spapr_pci: Do not reject VFs created after a PF Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 05/11] pcie_sriov: Ensure VF function number does not overflow Akihiko Odaki
` (6 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
A device gets automatically unrealized when being unparented.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
hw/pci/pcie_sriov.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index e9b23221d713..499becd5273f 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -204,11 +204,7 @@ static void unregister_vfs(PCIDevice *dev)
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- Error *err = NULL;
PCIDevice *vf = dev->exp.sriov_pf.vf[i];
- if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
- error_reportf_err(err, "Failed to unplug: ");
- }
object_unparent(OBJECT(vf));
object_unref(OBJECT(vf));
}
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 05/11] pcie_sriov: Ensure VF function number does not overflow
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
` (3 preceding siblings ...)
2024-08-02 5:17 ` [PATCH for-9.2 v11 04/11] pcie_sriov: Do not manually unrealize Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 06/11] pcie_sriov: Reuse SR-IOV VF device instances Akihiko Odaki
` (5 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
pci_new() aborts when creating a VF with a function number equals to or
is greater than PCI_DEVFN_MAX.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
docs/pcie_sriov.txt | 8 +++++---
include/hw/pci/pcie_sriov.h | 5 +++--
hw/net/igb.c | 13 ++++++++++---
hw/nvme/ctrl.c | 24 ++++++++++++++++--------
hw/pci/pcie_sriov.c | 19 +++++++++++++++++--
5 files changed, 51 insertions(+), 18 deletions(-)
diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
index a47aad0bfab0..ab2142807f79 100644
--- a/docs/pcie_sriov.txt
+++ b/docs/pcie_sriov.txt
@@ -52,9 +52,11 @@ setting up a BAR for a VF.
...
/* Add and initialize the SR/IOV capability */
- pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
- vf_devid, initial_vfs, total_vfs,
- fun_offset, stride);
+ if (!pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
+ vf_devid, initial_vfs, total_vfs,
+ fun_offset, stride, errp)) {
+ return;
+ }
/* Set up individual VF BARs (parameters as for normal BARs) */
pcie_sriov_pf_init_vf_bar( ... )
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index 450cbef6c201..aa704e8f9d9f 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -27,10 +27,11 @@ typedef struct PCIESriovVF {
uint16_t vf_number; /* Logical VF number of this function */
} PCIESriovVF;
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
const char *vfname, uint16_t vf_dev_id,
uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride);
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp);
void pcie_sriov_pf_exit(PCIDevice *dev);
/* Set up a VF bar in the SR/IOV bar area */
diff --git a/hw/net/igb.c b/hw/net/igb.c
index b92bba402e0d..b6ca2f1b8aee 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -446,9 +446,16 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
pcie_ari_init(pci_dev, 0x150);
- pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
- IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
- IGB_VF_OFFSET, IGB_VF_STRIDE);
+ if (!pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET,
+ TYPE_IGBVF, IGB_82576_VF_DEV_ID,
+ IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
+ IGB_VF_OFFSET, IGB_VF_STRIDE,
+ errp)) {
+ pcie_cap_exit(pci_dev);
+ igb_cleanup_msix(s);
+ msi_uninit(pci_dev);
+ return;
+ }
pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX,
PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index c6d4f61a47f9..e86ea2e7ce57 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8271,7 +8271,8 @@ out:
return pow2ceil(bar_size);
}
-static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
+static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
+ Error **errp)
{
uint16_t vf_dev_id = n->params.use_intel_id ?
PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
@@ -8280,12 +8281,17 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
le16_to_cpu(cap->vifrsm),
NULL, NULL);
- pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
- n->params.sriov_max_vfs, n->params.sriov_max_vfs,
- NVME_VF_OFFSET, NVME_VF_STRIDE);
+ if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
+ n->params.sriov_max_vfs, n->params.sriov_max_vfs,
+ NVME_VF_OFFSET, NVME_VF_STRIDE,
+ errp)) {
+ return false;
+ }
pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
+
+ return true;
}
static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
@@ -8410,6 +8416,12 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
return false;
}
+ if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs &&
+ !nvme_init_sriov(n, pci_dev, 0x120, errp)) {
+ msix_uninit(pci_dev, &n->bar0, &n->bar0);
+ return false;
+ }
+
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
pcie_cap_deverr_init(pci_dev);
@@ -8439,10 +8451,6 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
nvme_init_pmr(n, pci_dev);
}
- if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
- nvme_init_sriov(n, pci_dev, 0x120);
- }
-
return true;
}
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 499becd5273f..f0bde0d3fc79 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -24,14 +24,27 @@ static PCIDevice *register_vf(PCIDevice *pf, int devfn,
const char *name, uint16_t vf_num);
static void unregister_vfs(PCIDevice *dev);
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
const char *vfname, uint16_t vf_dev_id,
uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride)
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
{
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
+ if (total_vfs) {
+ uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI);
+ uint16_t first_vf_devfn = dev->devfn + vf_offset;
+ uint16_t last_vf_devfn = first_vf_devfn + vf_stride * (total_vfs - 1);
+
+ if ((!ari_cap && PCI_SLOT(dev->devfn) != PCI_SLOT(last_vf_devfn)) ||
+ last_vf_devfn >= PCI_DEVFN_MAX) {
+ error_setg(errp, "VF function number overflows");
+ return false;
+ }
+ }
+
pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
dev->exp.sriov_cap = offset;
@@ -69,6 +82,8 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+
+ return true;
}
void pcie_sriov_pf_exit(PCIDevice *dev)
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 06/11] pcie_sriov: Reuse SR-IOV VF device instances
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
` (4 preceding siblings ...)
2024-08-02 5:17 ` [PATCH for-9.2 v11 05/11] pcie_sriov: Ensure VF function number does not overflow Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 16:54 ` Michael S. Tsirkin
2024-08-02 5:17 ` [PATCH for-9.2 v11 07/11] pcie_sriov: Release VFs failed to realize Akihiko Odaki
` (4 subsequent siblings)
10 siblings, 1 reply; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
Disable SR-IOV VF devices by reusing code to power down PCI devices
instead of removing them when the guest requests to disable VFs. This
allows to realize devices and report VF realization errors at PF
realization time.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
include/hw/pci/pci.h | 5 ---
include/hw/pci/pci_device.h | 15 +++++++
include/hw/pci/pcie_sriov.h | 1 -
hw/pci/pci.c | 2 +-
hw/pci/pcie_sriov.c | 95 +++++++++++++++++++--------------------------
5 files changed, 56 insertions(+), 62 deletions(-)
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index fe04b4fafd04..14a869eeaa71 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -680,9 +680,4 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
void pci_set_enabled(PCIDevice *pci_dev, bool state);
-static inline void pci_set_power(PCIDevice *pci_dev, bool state)
-{
- pci_set_enabled(pci_dev, state);
-}
-
#endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index f38fb3111954..1ff3ce94e25b 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -212,6 +212,21 @@ static inline uint16_t pci_get_bdf(PCIDevice *dev)
return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn);
}
+static inline void pci_set_power(PCIDevice *pci_dev, bool state)
+{
+ /*
+ * Don't change the enabled state of VFs when powering on/off the device.
+ *
+ * When powering on, VFs must not be enabled immediately but they must
+ * wait until the guest configures SR-IOV.
+ * When powering off, their corresponding PFs will be reset and disable
+ * VFs.
+ */
+ if (!pci_is_vf(pci_dev)) {
+ pci_set_enabled(pci_dev, state);
+ }
+}
+
uint16_t pci_requester_id(PCIDevice *dev);
/* DMA access functions */
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index aa704e8f9d9f..70649236c18a 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -18,7 +18,6 @@
typedef struct PCIESriovPF {
uint16_t num_vfs; /* Number of virtual functions created */
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
- const char *vfname; /* Reference to the device type used for the VFs */
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
} PCIESriovPF;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b532888e8f6c..5c0050e1786a 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2895,7 +2895,7 @@ void pci_set_enabled(PCIDevice *d, bool state)
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
& PCI_COMMAND_MASTER) && d->enabled);
- if (!d->enabled) {
+ if (d->qdev.realized) {
pci_device_reset(d);
}
}
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index f0bde0d3fc79..faadb0d2ea85 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,9 +20,16 @@
#include "qapi/error.h"
#include "trace.h"
-static PCIDevice *register_vf(PCIDevice *pf, int devfn,
- const char *name, uint16_t vf_num);
-static void unregister_vfs(PCIDevice *dev);
+static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
+{
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+ object_unparent(OBJECT(vf));
+ object_unref(OBJECT(vf));
+ }
+ g_free(dev->exp.sriov_pf.vf);
+ dev->exp.sriov_pf.vf = NULL;
+}
bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
const char *vfname, uint16_t vf_dev_id,
@@ -30,6 +37,8 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
uint16_t vf_offset, uint16_t vf_stride,
Error **errp)
{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
@@ -49,7 +58,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
dev->exp.sriov_cap = offset;
dev->exp.sriov_pf.num_vfs = 0;
- dev->exp.sriov_pf.vfname = g_strdup(vfname);
dev->exp.sriov_pf.vf = NULL;
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -83,14 +91,34 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+ dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = pci_new(devfn, vfname);
+ vf->exp.sriov_vf.pf = dev;
+ vf->exp.sriov_vf.vf_number = i;
+
+ if (!qdev_realize(&vf->qdev, bus, errp)) {
+ unparent_vfs(dev, i);
+ return false;
+ }
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vf->config, 0xffff);
+ pci_config_set_device_id(vf->config, 0xffff);
+
+ dev->exp.sriov_pf.vf[i] = vf;
+ devfn += vf_stride;
+ }
+
return true;
}
void pcie_sriov_pf_exit(PCIDevice *dev)
{
- unregister_vfs(dev);
- g_free((char *)dev->exp.sriov_pf.vfname);
- dev->exp.sriov_pf.vfname = NULL;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -156,38 +184,11 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
}
}
-static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
- uint16_t vf_num)
-{
- PCIDevice *dev = pci_new(devfn, name);
- dev->exp.sriov_vf.pf = pf;
- dev->exp.sriov_vf.vf_number = vf_num;
- PCIBus *bus = pci_get_bus(pf);
- Error *local_err = NULL;
-
- qdev_realize(&dev->qdev, &bus->qbus, &local_err);
- if (local_err) {
- error_report_err(local_err);
- return NULL;
- }
-
- /* set vid/did according to sr/iov spec - they are not used */
- pci_config_set_vendor_id(dev->config, 0xffff);
- pci_config_set_device_id(dev->config, 0xffff);
-
- return dev;
-}
-
static void register_vfs(PCIDevice *dev)
{
uint16_t num_vfs;
uint16_t i;
uint16_t sriov_cap = dev->exp.sriov_cap;
- uint16_t vf_offset =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
- uint16_t vf_stride =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- int32_t devfn = dev->devfn + vf_offset;
assert(sriov_cap > 0);
num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
@@ -195,18 +196,10 @@ static void register_vfs(PCIDevice *dev)
return;
}
- dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
-
trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
- dev->exp.sriov_pf.vfname, i);
- if (!dev->exp.sriov_pf.vf[i]) {
- num_vfs = i;
- break;
- }
- devfn += vf_stride;
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
}
dev->exp.sriov_pf.num_vfs = num_vfs;
}
@@ -219,12 +212,8 @@ static void unregister_vfs(PCIDevice *dev)
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- PCIDevice *vf = dev->exp.sriov_pf.vf[i];
- object_unparent(OBJECT(vf));
- object_unref(OBJECT(vf));
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
}
- g_free(dev->exp.sriov_pf.vf);
- dev->exp.sriov_pf.vf = NULL;
dev->exp.sriov_pf.num_vfs = 0;
}
@@ -246,14 +235,10 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
PCI_FUNC(dev->devfn), off, val, len);
if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
- if (dev->exp.sriov_pf.num_vfs) {
- if (!(val & PCI_SRIOV_CTRL_VFE)) {
- unregister_vfs(dev);
- }
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ register_vfs(dev);
} else {
- if (val & PCI_SRIOV_CTRL_VFE) {
- register_vfs(dev);
- }
+ unregister_vfs(dev);
}
}
}
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH for-9.2 v11 06/11] pcie_sriov: Reuse SR-IOV VF device instances
2024-08-02 5:17 ` [PATCH for-9.2 v11 06/11] pcie_sriov: Reuse SR-IOV VF device instances Akihiko Odaki
@ 2024-08-02 16:54 ` Michael S. Tsirkin
2024-08-04 6:55 ` Akihiko Odaki
0 siblings, 1 reply; 20+ messages in thread
From: Michael S. Tsirkin @ 2024-08-02 16:54 UTC (permalink / raw)
To: Akihiko Odaki
Cc: Philippe Mathieu-Daudé, Marcel Apfelbaum, Alex Williamson,
Cédric Le Goater, Paolo Bonzini, Daniel P. Berrangé,
Eduardo Habkost, Sriram Yagnaraman, Jason Wang, Keith Busch,
Klaus Jensen, Markus Armbruster, qemu-devel, qemu-block
On Fri, Aug 02, 2024 at 02:17:56PM +0900, Akihiko Odaki wrote:
> Disable SR-IOV VF devices by reusing code to power down PCI devices
> instead of removing them when the guest requests to disable VFs. This
> allows to realize devices and report VF realization errors at PF
> realization time.
What kind of errors do you have in mind?
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
> include/hw/pci/pci.h | 5 ---
> include/hw/pci/pci_device.h | 15 +++++++
> include/hw/pci/pcie_sriov.h | 1 -
> hw/pci/pci.c | 2 +-
> hw/pci/pcie_sriov.c | 95 +++++++++++++++++++--------------------------
> 5 files changed, 56 insertions(+), 62 deletions(-)
>
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index fe04b4fafd04..14a869eeaa71 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -680,9 +680,4 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
> MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
> void pci_set_enabled(PCIDevice *pci_dev, bool state);
>
> -static inline void pci_set_power(PCIDevice *pci_dev, bool state)
> -{
> - pci_set_enabled(pci_dev, state);
> -}
> -
> #endif
> diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
> index f38fb3111954..1ff3ce94e25b 100644
> --- a/include/hw/pci/pci_device.h
> +++ b/include/hw/pci/pci_device.h
> @@ -212,6 +212,21 @@ static inline uint16_t pci_get_bdf(PCIDevice *dev)
> return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn);
> }
>
> +static inline void pci_set_power(PCIDevice *pci_dev, bool state)
> +{
> + /*
> + * Don't change the enabled state of VFs when powering on/off the device.
> + *
> + * When powering on, VFs must not be enabled immediately but they must
> + * wait until the guest configures SR-IOV.
> + * When powering off, their corresponding PFs will be reset and disable
> + * VFs.
> + */
> + if (!pci_is_vf(pci_dev)) {
> + pci_set_enabled(pci_dev, state);
> + }
> +}
> +
> uint16_t pci_requester_id(PCIDevice *dev);
>
> /* DMA access functions */
> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
> index aa704e8f9d9f..70649236c18a 100644
> --- a/include/hw/pci/pcie_sriov.h
> +++ b/include/hw/pci/pcie_sriov.h
> @@ -18,7 +18,6 @@
> typedef struct PCIESriovPF {
> uint16_t num_vfs; /* Number of virtual functions created */
> uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
> - const char *vfname; /* Reference to the device type used for the VFs */
> PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
> } PCIESriovPF;
>
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index b532888e8f6c..5c0050e1786a 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -2895,7 +2895,7 @@ void pci_set_enabled(PCIDevice *d, bool state)
> memory_region_set_enabled(&d->bus_master_enable_region,
> (pci_get_word(d->config + PCI_COMMAND)
> & PCI_COMMAND_MASTER) && d->enabled);
> - if (!d->enabled) {
> + if (d->qdev.realized) {
> pci_device_reset(d);
> }
> }
> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
> index f0bde0d3fc79..faadb0d2ea85 100644
> --- a/hw/pci/pcie_sriov.c
> +++ b/hw/pci/pcie_sriov.c
> @@ -20,9 +20,16 @@
> #include "qapi/error.h"
> #include "trace.h"
>
> -static PCIDevice *register_vf(PCIDevice *pf, int devfn,
> - const char *name, uint16_t vf_num);
> -static void unregister_vfs(PCIDevice *dev);
> +static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
> +{
> + for (uint16_t i = 0; i < total_vfs; i++) {
> + PCIDevice *vf = dev->exp.sriov_pf.vf[i];
> + object_unparent(OBJECT(vf));
> + object_unref(OBJECT(vf));
> + }
> + g_free(dev->exp.sriov_pf.vf);
> + dev->exp.sriov_pf.vf = NULL;
> +}
>
> bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> const char *vfname, uint16_t vf_dev_id,
> @@ -30,6 +37,8 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> uint16_t vf_offset, uint16_t vf_stride,
> Error **errp)
> {
> + BusState *bus = qdev_get_parent_bus(&dev->qdev);
> + int32_t devfn = dev->devfn + vf_offset;
> uint8_t *cfg = dev->config + offset;
> uint8_t *wmask;
>
> @@ -49,7 +58,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> offset, PCI_EXT_CAP_SRIOV_SIZEOF);
> dev->exp.sriov_cap = offset;
> dev->exp.sriov_pf.num_vfs = 0;
> - dev->exp.sriov_pf.vfname = g_strdup(vfname);
> dev->exp.sriov_pf.vf = NULL;
>
> pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
> @@ -83,14 +91,34 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
>
> qdev_prop_set_bit(&dev->qdev, "multifunction", true);
>
> + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
> +
> + for (uint16_t i = 0; i < total_vfs; i++) {
> + PCIDevice *vf = pci_new(devfn, vfname);
> + vf->exp.sriov_vf.pf = dev;
> + vf->exp.sriov_vf.vf_number = i;
> +
> + if (!qdev_realize(&vf->qdev, bus, errp)) {
> + unparent_vfs(dev, i);
> + return false;
> + }
> +
> + /* set vid/did according to sr/iov spec - they are not used */
> + pci_config_set_vendor_id(vf->config, 0xffff);
> + pci_config_set_device_id(vf->config, 0xffff);
> +
> + dev->exp.sriov_pf.vf[i] = vf;
> + devfn += vf_stride;
> + }
> +
> return true;
> }
>
> void pcie_sriov_pf_exit(PCIDevice *dev)
> {
> - unregister_vfs(dev);
> - g_free((char *)dev->exp.sriov_pf.vfname);
> - dev->exp.sriov_pf.vfname = NULL;
> + uint8_t *cfg = dev->config + dev->exp.sriov_cap;
> +
> + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
> }
>
> void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
> @@ -156,38 +184,11 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
> }
> }
>
> -static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
> - uint16_t vf_num)
> -{
> - PCIDevice *dev = pci_new(devfn, name);
> - dev->exp.sriov_vf.pf = pf;
> - dev->exp.sriov_vf.vf_number = vf_num;
> - PCIBus *bus = pci_get_bus(pf);
> - Error *local_err = NULL;
> -
> - qdev_realize(&dev->qdev, &bus->qbus, &local_err);
> - if (local_err) {
> - error_report_err(local_err);
> - return NULL;
> - }
> -
> - /* set vid/did according to sr/iov spec - they are not used */
> - pci_config_set_vendor_id(dev->config, 0xffff);
> - pci_config_set_device_id(dev->config, 0xffff);
> -
> - return dev;
> -}
> -
> static void register_vfs(PCIDevice *dev)
> {
> uint16_t num_vfs;
> uint16_t i;
> uint16_t sriov_cap = dev->exp.sriov_cap;
> - uint16_t vf_offset =
> - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
> - uint16_t vf_stride =
> - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
> - int32_t devfn = dev->devfn + vf_offset;
>
> assert(sriov_cap > 0);
> num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
> @@ -195,18 +196,10 @@ static void register_vfs(PCIDevice *dev)
> return;
> }
>
> - dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
> -
> trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
> PCI_FUNC(dev->devfn), num_vfs);
> for (i = 0; i < num_vfs; i++) {
> - dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
> - dev->exp.sriov_pf.vfname, i);
> - if (!dev->exp.sriov_pf.vf[i]) {
> - num_vfs = i;
> - break;
> - }
> - devfn += vf_stride;
> + pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
> }
> dev->exp.sriov_pf.num_vfs = num_vfs;
> }
> @@ -219,12 +212,8 @@ static void unregister_vfs(PCIDevice *dev)
> trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
> PCI_FUNC(dev->devfn), num_vfs);
> for (i = 0; i < num_vfs; i++) {
> - PCIDevice *vf = dev->exp.sriov_pf.vf[i];
> - object_unparent(OBJECT(vf));
> - object_unref(OBJECT(vf));
> + pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
> }
> - g_free(dev->exp.sriov_pf.vf);
> - dev->exp.sriov_pf.vf = NULL;
> dev->exp.sriov_pf.num_vfs = 0;
> }
>
> @@ -246,14 +235,10 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
> PCI_FUNC(dev->devfn), off, val, len);
>
> if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
> - if (dev->exp.sriov_pf.num_vfs) {
> - if (!(val & PCI_SRIOV_CTRL_VFE)) {
> - unregister_vfs(dev);
> - }
> + if (val & PCI_SRIOV_CTRL_VFE) {
> + register_vfs(dev);
> } else {
> - if (val & PCI_SRIOV_CTRL_VFE) {
> - register_vfs(dev);
> - }
> + unregister_vfs(dev);
> }
> }
> }
>
> --
> 2.45.2
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH for-9.2 v11 06/11] pcie_sriov: Reuse SR-IOV VF device instances
2024-08-02 16:54 ` Michael S. Tsirkin
@ 2024-08-04 6:55 ` Akihiko Odaki
0 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-04 6:55 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Philippe Mathieu-Daudé, Marcel Apfelbaum, Alex Williamson,
Cédric Le Goater, Paolo Bonzini, Daniel P. Berrangé,
Eduardo Habkost, Sriram Yagnaraman, Jason Wang, Keith Busch,
Klaus Jensen, Markus Armbruster, qemu-devel, qemu-block
On 2024/08/03 1:54, Michael S. Tsirkin wrote:
> On Fri, Aug 02, 2024 at 02:17:56PM +0900, Akihiko Odaki wrote:
>> Disable SR-IOV VF devices by reusing code to power down PCI devices
>> instead of removing them when the guest requests to disable VFs. This
>> allows to realize devices and report VF realization errors at PF
>> realization time.
>
> What kind of errors do you have in mind?
The type of errors common for any SR-IOV device would be the conflict of
PCI function numbers; all function numbers that will be used by VFs
should be reserved when realizing the PF. With the SR-IOV support of
virtio-net-pci I'm adding, a user may specify invalid netdev for VFs.
Regards,
Akihiko Odaki
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 07/11] pcie_sriov: Release VFs failed to realize
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
` (5 preceding siblings ...)
2024-08-02 5:17 ` [PATCH for-9.2 v11 06/11] pcie_sriov: Reuse SR-IOV VF device instances Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF Akihiko Odaki
` (3 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
Release VFs failed to realize just as we do in unregister_vfs().
Fixes: 7c0fa8dff811 ("pcie: Add support for Single Root I/O Virtualization (SR/IOV)")
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
hw/pci/pcie_sriov.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index faadb0d2ea85..9bd7f8acc3f4 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -99,6 +99,8 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
vf->exp.sriov_vf.vf_number = i;
if (!qdev_realize(&vf->qdev, bus, errp)) {
+ object_unparent(OBJECT(vf));
+ object_unref(vf);
unparent_vfs(dev, i);
return false;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
` (6 preceding siblings ...)
2024-08-02 5:17 ` [PATCH for-9.2 v11 07/11] pcie_sriov: Release VFs failed to realize Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 12:58 ` Michael S. Tsirkin
2024-08-02 5:17 ` [PATCH for-9.2 v11 09/11] pcie_sriov: Register VFs after migration Akihiko Odaki
` (2 subsequent siblings)
10 siblings, 1 reply; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF
instead.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
include/hw/pci/pcie_sriov.h | 1 -
hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++--------
hw/pci/trace-events | 2 +-
3 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index 70649236c18a..5148c5b77dd1 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -16,7 +16,6 @@
#include "hw/pci/pci.h"
typedef struct PCIESriovPF {
- uint16_t num_vfs; /* Number of virtual functions created */
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
} PCIESriovPF;
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 9bd7f8acc3f4..fae6acea4acb 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
dev->exp.sriov_cap = offset;
- dev->exp.sriov_pf.num_vfs = 0;
dev->exp.sriov_pf.vf = NULL;
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
}
}
+static void clear_ctrl_vfe(PCIDevice *dev)
+{
+ uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
+ pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
+}
+
static void register_vfs(PCIDevice *dev)
{
uint16_t num_vfs;
@@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev)
assert(sriov_cap > 0);
num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
+ clear_ctrl_vfe(dev);
return;
}
@@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev)
for (i = 0; i < num_vfs; i++) {
pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
}
- dev->exp.sriov_pf.num_vfs = num_vfs;
}
static void unregister_vfs(PCIDevice *dev)
{
- uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
uint16_t i;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
+ PCI_FUNC(dev->devfn));
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
}
- dev->exp.sriov_pf.num_vfs = 0;
}
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
@@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
} else {
unregister_vfs(dev);
}
+ } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
+ clear_ctrl_vfe(dev);
+ unregister_vfs(dev);
}
}
@@ -304,7 +311,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
{
assert(!pci_is_vf(dev));
- if (n < dev->exp.sriov_pf.num_vfs) {
+ if (n < pcie_sriov_num_vfs(dev)) {
return dev->exp.sriov_pf.vf[n];
}
return NULL;
@@ -312,5 +319,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
{
- return dev->exp.sriov_pf.num_vfs;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ uint8_t *cfg = dev->config + sriov_cap;
+
+ return sriov_cap &&
+ (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ?
+ pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0;
}
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 19643aa8c6b0..e98f575a9d19 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask
# hw/pci/pcie_sriov.c
sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
-sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
+sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs"
sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
# pcie.c
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF
2024-08-02 5:17 ` [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF Akihiko Odaki
@ 2024-08-02 12:58 ` Michael S. Tsirkin
2024-08-02 15:38 ` Akihiko Odaki
0 siblings, 1 reply; 20+ messages in thread
From: Michael S. Tsirkin @ 2024-08-02 12:58 UTC (permalink / raw)
To: Akihiko Odaki
Cc: Philippe Mathieu-Daudé, Marcel Apfelbaum, Alex Williamson,
Cédric Le Goater, Paolo Bonzini, Daniel P. Berrangé,
Eduardo Habkost, Sriram Yagnaraman, Jason Wang, Keith Busch,
Klaus Jensen, Markus Armbruster, qemu-devel, qemu-block
On Fri, Aug 02, 2024 at 02:17:58PM +0900, Akihiko Odaki wrote:
> num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF
> instead.
>
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
> include/hw/pci/pcie_sriov.h | 1 -
> hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++--------
> hw/pci/trace-events | 2 +-
> 3 files changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
> index 70649236c18a..5148c5b77dd1 100644
> --- a/include/hw/pci/pcie_sriov.h
> +++ b/include/hw/pci/pcie_sriov.h
> @@ -16,7 +16,6 @@
> #include "hw/pci/pci.h"
>
> typedef struct PCIESriovPF {
> - uint16_t num_vfs; /* Number of virtual functions created */
> uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
> PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
> } PCIESriovPF;
> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
> index 9bd7f8acc3f4..fae6acea4acb 100644
> --- a/hw/pci/pcie_sriov.c
> +++ b/hw/pci/pcie_sriov.c
> @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
> offset, PCI_EXT_CAP_SRIOV_SIZEOF);
> dev->exp.sriov_cap = offset;
> - dev->exp.sriov_pf.num_vfs = 0;
> dev->exp.sriov_pf.vf = NULL;
>
> pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
> @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
> }
> }
>
> +static void clear_ctrl_vfe(PCIDevice *dev)
> +{
> + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
space here, after definition
> + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
> +}
> +
Pls use pci_word_test_and_clear_mask
> static void register_vfs(PCIDevice *dev)
> {
> uint16_t num_vfs;
> @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev)
> assert(sriov_cap > 0);
> num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
> if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
> + clear_ctrl_vfe(dev);
> return;
> }
>
> @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev)
> for (i = 0; i < num_vfs; i++) {
> pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
> }
> - dev->exp.sriov_pf.num_vfs = num_vfs;
> }
>
> static void unregister_vfs(PCIDevice *dev)
> {
> - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
> uint16_t i;
> + uint8_t *cfg = dev->config + dev->exp.sriov_cap;
>
> trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
> - PCI_FUNC(dev->devfn), num_vfs);
> - for (i = 0; i < num_vfs; i++) {
> + PCI_FUNC(dev->devfn));
> + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
Why PCI_SRIOV_TOTAL_VF not PCI_SRIOV_NUM_VF/pcie_sriov_num_vfs?
> pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
> }
> - dev->exp.sriov_pf.num_vfs = 0;
> }
>
> void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
> @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
> } else {
> unregister_vfs(dev);
> }
> + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
> + clear_ctrl_vfe(dev);
> + unregister_vfs(dev);
So any write into PCI_SRIOV_NUM_VF automatically clears VFE?
Yes writing into PCI_SRIOV_NUM_VF should not happen when VFE
is set, but spec does not say we need to clear it automatically.
Why come up with random rules? just don't special case it,
whatever happens, let it happen.
And what does this change have to do with getting rid of
num_vfs?
> }
> }
>
> @@ -304,7 +311,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
> PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
> {
> assert(!pci_is_vf(dev));
> - if (n < dev->exp.sriov_pf.num_vfs) {
> + if (n < pcie_sriov_num_vfs(dev)) {
> return dev->exp.sriov_pf.vf[n];
> }
> return NULL;
> @@ -312,5 +319,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
>
> uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
> {
> - return dev->exp.sriov_pf.num_vfs;
> + uint16_t sriov_cap = dev->exp.sriov_cap;
> + uint8_t *cfg = dev->config + sriov_cap;
> +
> + return sriov_cap &&
> + (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ?
> + pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0;
> }
> diff --git a/hw/pci/trace-events b/hw/pci/trace-events
> index 19643aa8c6b0..e98f575a9d19 100644
> --- a/hw/pci/trace-events
> +++ b/hw/pci/trace-events
> @@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask
>
> # hw/pci/pcie_sriov.c
> sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
> -sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
> +sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs"
> sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
>
> # pcie.c
>
> --
> 2.45.2
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF
2024-08-02 12:58 ` Michael S. Tsirkin
@ 2024-08-02 15:38 ` Akihiko Odaki
2024-08-02 16:52 ` Michael S. Tsirkin
0 siblings, 1 reply; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 15:38 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Philippe Mathieu-Daudé, Marcel Apfelbaum, Alex Williamson,
Cédric Le Goater, Paolo Bonzini, Daniel P. Berrangé,
Eduardo Habkost, Sriram Yagnaraman, Jason Wang, Keith Busch,
Klaus Jensen, Markus Armbruster, qemu-devel, qemu-block
On 2024/08/02 21:58, Michael S. Tsirkin wrote:
> On Fri, Aug 02, 2024 at 02:17:58PM +0900, Akihiko Odaki wrote:
>> num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF
>> instead.
>>
>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>> ---
>> include/hw/pci/pcie_sriov.h | 1 -
>> hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++--------
>> hw/pci/trace-events | 2 +-
>> 3 files changed, 21 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
>> index 70649236c18a..5148c5b77dd1 100644
>> --- a/include/hw/pci/pcie_sriov.h
>> +++ b/include/hw/pci/pcie_sriov.h
>> @@ -16,7 +16,6 @@
>> #include "hw/pci/pci.h"
>>
>> typedef struct PCIESriovPF {
>> - uint16_t num_vfs; /* Number of virtual functions created */
>> uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
>> PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
>> } PCIESriovPF;
>> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
>> index 9bd7f8acc3f4..fae6acea4acb 100644
>> --- a/hw/pci/pcie_sriov.c
>> +++ b/hw/pci/pcie_sriov.c
>> @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
>> pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
>> offset, PCI_EXT_CAP_SRIOV_SIZEOF);
>> dev->exp.sriov_cap = offset;
>> - dev->exp.sriov_pf.num_vfs = 0;
>> dev->exp.sriov_pf.vf = NULL;
>>
>> pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
>> @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
>> }
>> }
>>
>> +static void clear_ctrl_vfe(PCIDevice *dev)
>> +{
>> + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
>
> space here, after definition
>
>> + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
>> +}
>> +
>
> Pls use pci_word_test_and_clear_mask
That sounds good. I'll do so with the next version.
>
>
>> static void register_vfs(PCIDevice *dev)
>> {
>> uint16_t num_vfs;
>> @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev)
>> assert(sriov_cap > 0);
>> num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
>> if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
>> + clear_ctrl_vfe(dev);
>> return;
>> }
>>
>> @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev)
>> for (i = 0; i < num_vfs; i++) {
>> pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
>> }
>> - dev->exp.sriov_pf.num_vfs = num_vfs;
>> }
>>
>> static void unregister_vfs(PCIDevice *dev)
>> {
>> - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
>> uint16_t i;
>> + uint8_t *cfg = dev->config + dev->exp.sriov_cap;
>>
>> trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
>> - PCI_FUNC(dev->devfn), num_vfs);
>> - for (i = 0; i < num_vfs; i++) {
>> + PCI_FUNC(dev->devfn));
>> + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
>
> Why PCI_SRIOV_TOTAL_VF not PCI_SRIOV_NUM_VF/pcie_sriov_num_vfs?
Because PCI_SRIOV_NUM_VF is overwritten when unregister_vfs() is called.
>
>
>> pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
>> }
>> - dev->exp.sriov_pf.num_vfs = 0;
>> }
>>
>> void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
>> @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
>> } else {
>> unregister_vfs(dev);
>> }
>> + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
>> + clear_ctrl_vfe(dev);
>> + unregister_vfs(dev);
>
> So any write into PCI_SRIOV_NUM_VF automatically clears VFE?
>
> Yes writing into PCI_SRIOV_NUM_VF should not happen when VFE
> is set, but spec does not say we need to clear it automatically.
> Why come up with random rules? just don't special case it,
> whatever happens, let it happen.
>
> And what does this change have to do with getting rid of
> num_vfs?
Keeping VFs working requires to know the number of VFs, but we do no
longer know it because PCI_SRIOV_NUM_VF is overwritten. This disables
all VFs instead of trying to keep VFs alive.
Regards,
Akihiko Odaki
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF
2024-08-02 15:38 ` Akihiko Odaki
@ 2024-08-02 16:52 ` Michael S. Tsirkin
2024-08-04 9:11 ` Akihiko Odaki
0 siblings, 1 reply; 20+ messages in thread
From: Michael S. Tsirkin @ 2024-08-02 16:52 UTC (permalink / raw)
To: Akihiko Odaki
Cc: Philippe Mathieu-Daudé, Marcel Apfelbaum, Alex Williamson,
Cédric Le Goater, Paolo Bonzini, Daniel P. Berrangé,
Eduardo Habkost, Sriram Yagnaraman, Jason Wang, Keith Busch,
Klaus Jensen, Markus Armbruster, qemu-devel, qemu-block
On Sat, Aug 03, 2024 at 12:38:10AM +0900, Akihiko Odaki wrote:
> On 2024/08/02 21:58, Michael S. Tsirkin wrote:
> > On Fri, Aug 02, 2024 at 02:17:58PM +0900, Akihiko Odaki wrote:
> > > num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF
> > > instead.
> > >
> > > Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> > > ---
> > > include/hw/pci/pcie_sriov.h | 1 -
> > > hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++--------
> > > hw/pci/trace-events | 2 +-
> > > 3 files changed, 21 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
> > > index 70649236c18a..5148c5b77dd1 100644
> > > --- a/include/hw/pci/pcie_sriov.h
> > > +++ b/include/hw/pci/pcie_sriov.h
> > > @@ -16,7 +16,6 @@
> > > #include "hw/pci/pci.h"
> > > typedef struct PCIESriovPF {
> > > - uint16_t num_vfs; /* Number of virtual functions created */
> > > uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
> > > PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
> > > } PCIESriovPF;
> > > diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
> > > index 9bd7f8acc3f4..fae6acea4acb 100644
> > > --- a/hw/pci/pcie_sriov.c
> > > +++ b/hw/pci/pcie_sriov.c
> > > @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> > > pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
> > > offset, PCI_EXT_CAP_SRIOV_SIZEOF);
> > > dev->exp.sriov_cap = offset;
> > > - dev->exp.sriov_pf.num_vfs = 0;
> > > dev->exp.sriov_pf.vf = NULL;
> > > pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
> > > @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
> > > }
> > > }
> > > +static void clear_ctrl_vfe(PCIDevice *dev)
> > > +{
> > > + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
> >
> > space here, after definition
> >
> > > + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
> > > +}
> > > +
> >
> > Pls use pci_word_test_and_clear_mask
>
> That sounds good. I'll do so with the next version.
>
> >
> >
> > > static void register_vfs(PCIDevice *dev)
> > > {
> > > uint16_t num_vfs;
> > > @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev)
> > > assert(sriov_cap > 0);
> > > num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
> > > if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
> > > + clear_ctrl_vfe(dev);
> > > return;
> > > }
> > > @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev)
> > > for (i = 0; i < num_vfs; i++) {
> > > pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
> > > }
> > > - dev->exp.sriov_pf.num_vfs = num_vfs;
> > > }
> > > static void unregister_vfs(PCIDevice *dev)
> > > {
> > > - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
> > > uint16_t i;
> > > + uint8_t *cfg = dev->config + dev->exp.sriov_cap;
> > > trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
> > > - PCI_FUNC(dev->devfn), num_vfs);
> > > - for (i = 0; i < num_vfs; i++) {
> > > + PCI_FUNC(dev->devfn));
> > > + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
> >
> > Why PCI_SRIOV_TOTAL_VF not PCI_SRIOV_NUM_VF/pcie_sriov_num_vfs?
>
> Because PCI_SRIOV_NUM_VF is overwritten when unregister_vfs() is called.
maybe this function should get the range of VFs to unregister, then.
> >
> >
> > > pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
> > > }
> > > - dev->exp.sriov_pf.num_vfs = 0;
> > > }
> > > void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
> > > @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
> > > } else {
> > > unregister_vfs(dev);
> > > }
> > > + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
> > > + clear_ctrl_vfe(dev);
> > > + unregister_vfs(dev);
> >
> > So any write into PCI_SRIOV_NUM_VF automatically clears VFE?
> >
> > Yes writing into PCI_SRIOV_NUM_VF should not happen when VFE
> > is set, but spec does not say we need to clear it automatically.
> > Why come up with random rules? just don't special case it,
> > whatever happens, let it happen.
> >
> > And what does this change have to do with getting rid of
> > num_vfs?
>
> Keeping VFs working requires to know the number of VFs, but we do no longer
> know it because PCI_SRIOV_NUM_VF is overwritten. This disables all VFs
> instead of trying to keep VFs alive.
>
> Regards,
> Akihiko Odaki
However, we then get into a situation where VFE is set but
PCI_SRIOV_NUM_VF no longer reflects the # of registered VFs.
Given you removed num_vfs which was exactly
the # of registered VFs, it is hard to say if that will lead to
confusion now or later.
--
MST
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF
2024-08-02 16:52 ` Michael S. Tsirkin
@ 2024-08-04 9:11 ` Akihiko Odaki
0 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-04 9:11 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Philippe Mathieu-Daudé, Marcel Apfelbaum, Alex Williamson,
Cédric Le Goater, Paolo Bonzini, Daniel P. Berrangé,
Eduardo Habkost, Sriram Yagnaraman, Jason Wang, Keith Busch,
Klaus Jensen, Markus Armbruster, qemu-devel, qemu-block
On 2024/08/03 1:52, Michael S. Tsirkin wrote:
> On Sat, Aug 03, 2024 at 12:38:10AM +0900, Akihiko Odaki wrote:
>> On 2024/08/02 21:58, Michael S. Tsirkin wrote:
>>> On Fri, Aug 02, 2024 at 02:17:58PM +0900, Akihiko Odaki wrote:
>>>> num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF
>>>> instead.
>>>>
>>>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>> ---
>>>> include/hw/pci/pcie_sriov.h | 1 -
>>>> hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++--------
>>>> hw/pci/trace-events | 2 +-
>>>> 3 files changed, 21 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
>>>> index 70649236c18a..5148c5b77dd1 100644
>>>> --- a/include/hw/pci/pcie_sriov.h
>>>> +++ b/include/hw/pci/pcie_sriov.h
>>>> @@ -16,7 +16,6 @@
>>>> #include "hw/pci/pci.h"
>>>> typedef struct PCIESriovPF {
>>>> - uint16_t num_vfs; /* Number of virtual functions created */
>>>> uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
>>>> PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
>>>> } PCIESriovPF;
>>>> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
>>>> index 9bd7f8acc3f4..fae6acea4acb 100644
>>>> --- a/hw/pci/pcie_sriov.c
>>>> +++ b/hw/pci/pcie_sriov.c
>>>> @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
>>>> pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
>>>> offset, PCI_EXT_CAP_SRIOV_SIZEOF);
>>>> dev->exp.sriov_cap = offset;
>>>> - dev->exp.sriov_pf.num_vfs = 0;
>>>> dev->exp.sriov_pf.vf = NULL;
>>>> pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
>>>> @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
>>>> }
>>>> }
>>>> +static void clear_ctrl_vfe(PCIDevice *dev)
>>>> +{
>>>> + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
>>>
>>> space here, after definition
>>>
>>>> + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
>>>> +}
>>>> +
>>>
>>> Pls use pci_word_test_and_clear_mask
>>
>> That sounds good. I'll do so with the next version.
>>
>>>
>>>
>>>> static void register_vfs(PCIDevice *dev)
>>>> {
>>>> uint16_t num_vfs;
>>>> @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev)
>>>> assert(sriov_cap > 0);
>>>> num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
>>>> if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
>>>> + clear_ctrl_vfe(dev);
>>>> return;
>>>> }
>>>> @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev)
>>>> for (i = 0; i < num_vfs; i++) {
>>>> pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
>>>> }
>>>> - dev->exp.sriov_pf.num_vfs = num_vfs;
>>>> }
>>>> static void unregister_vfs(PCIDevice *dev)
>>>> {
>>>> - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
>>>> uint16_t i;
>>>> + uint8_t *cfg = dev->config + dev->exp.sriov_cap;
>>>> trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
>>>> - PCI_FUNC(dev->devfn), num_vfs);
>>>> - for (i = 0; i < num_vfs; i++) {
>>>> + PCI_FUNC(dev->devfn));
>>>> + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
>>>
>>> Why PCI_SRIOV_TOTAL_VF not PCI_SRIOV_NUM_VF/pcie_sriov_num_vfs?
>>
>> Because PCI_SRIOV_NUM_VF is overwritten when unregister_vfs() is called.
>
>
> maybe this function should get the range of VFs to unregister, then.
PCI_SRIOV_TOTAL_VF gives always a valid value, and it is the only value
known to valid when PCI_SRIOV_CTRL gets written with a value clearing
PCI_SRIOV_CTRL_VFE.
>
>>>
>>>
>>>> pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
>>>> }
>>>> - dev->exp.sriov_pf.num_vfs = 0;
>>>> }
>>>> void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
>>>> @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
>>>> } else {
>>>> unregister_vfs(dev);
>>>> }
>>>> + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
>>>> + clear_ctrl_vfe(dev);
>>>> + unregister_vfs(dev);
>>>
>>> So any write into PCI_SRIOV_NUM_VF automatically clears VFE?
>>>
>>> Yes writing into PCI_SRIOV_NUM_VF should not happen when VFE
>>> is set, but spec does not say we need to clear it automatically.
>>> Why come up with random rules? just don't special case it,
>>> whatever happens, let it happen.
>>>
>>> And what does this change have to do with getting rid of
>>> num_vfs?
>>
>> Keeping VFs working requires to know the number of VFs, but we do no longer
>> know it because PCI_SRIOV_NUM_VF is overwritten. This disables all VFs
>> instead of trying to keep VFs alive.
>>
>> Regards,
>> Akihiko Odaki
>
> However, we then get into a situation where VFE is set but
> PCI_SRIOV_NUM_VF no longer reflects the # of registered VFs.
> Given you removed num_vfs which was exactly
> the # of registered VFs, it is hard to say if that will lead to
> confusion now or later.
I masked writes to PCI_SRIOV_NUM_VF when PCI_SRIOV_CTRL_VFE is set in v12.
Regards,
Akihiko Odaki
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 09/11] pcie_sriov: Register VFs after migration
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
` (7 preceding siblings ...)
2024-08-02 5:17 ` [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF Akihiko Odaki
@ 2024-08-02 5:17 ` Akihiko Odaki
2024-08-02 5:18 ` [PATCH for-9.2 v11 10/11] hw/pci: Use -1 as the default value for rombar Akihiko Odaki
2024-08-02 5:18 ` [PATCH for-9.2 v11 11/11] hw/qdev: Remove opts member Akihiko Odaki
10 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:17 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
pcie_sriov doesn't have code to restore its state after migration, but
igb, which uses pcie_sriov, naively claimed its migration capability.
Add code to register VFs after migration and fix igb migration.
Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
include/hw/pci/pcie_sriov.h | 2 ++
hw/pci/pci.c | 7 +++++++
hw/pci/pcie_sriov.c | 7 +++++++
3 files changed, 16 insertions(+)
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index 5148c5b77dd1..c5d2d318d330 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -57,6 +57,8 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize);
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
uint32_t val, int len);
+void pcie_sriov_pf_post_load(PCIDevice *dev);
+
/* Reset SR/IOV */
void pcie_sriov_pf_reset(PCIDevice *dev);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 5c0050e1786a..4c7be5295110 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -733,10 +733,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id)
return !pci_is_express((PCIDevice *)opaque);
}
+static int pci_post_load(void *opaque, int version_id)
+{
+ pcie_sriov_pf_post_load(opaque);
+ return 0;
+}
+
const VMStateDescription vmstate_pci_device = {
.name = "PCIDevice",
.version_id = 2,
.minimum_version_id = 1,
+ .post_load = pci_post_load,
.fields = (const VMStateField[]) {
VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index fae6acea4acb..56523ab4e833 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -252,6 +252,13 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
}
}
+void pcie_sriov_pf_post_load(PCIDevice *dev)
+{
+ if (dev->exp.sriov_cap) {
+ register_vfs(dev);
+ }
+}
+
/* Reset SR/IOV */
void pcie_sriov_pf_reset(PCIDevice *dev)
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 10/11] hw/pci: Use -1 as the default value for rombar
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
` (8 preceding siblings ...)
2024-08-02 5:17 ` [PATCH for-9.2 v11 09/11] pcie_sriov: Register VFs after migration Akihiko Odaki
@ 2024-08-02 5:18 ` Akihiko Odaki
2024-08-02 10:54 ` Markus Armbruster
2024-08-02 5:18 ` [PATCH for-9.2 v11 11/11] hw/qdev: Remove opts member Akihiko Odaki
10 siblings, 1 reply; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:18 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
vfio_pci_size_rom() distinguishes whether rombar is explicitly set to 1
by checking dev->opts, bypassing the QOM property infrastructure.
Use -1 as the default value for rombar to tell if the user explicitly
set it to 1. The property is also converted from unsigned to signed.
-1 is signed so it is safe to give it a new meaning. The values in
[2 ^ 31, 2 ^ 32) will be invalid, but nobody should have typed these
values by chance.
Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
include/hw/pci/pci_device.h | 2 +-
hw/pci/pci.c | 2 +-
hw/vfio/pci.c | 5 ++---
3 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 1ff3ce94e25b..8fa845beee5e 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -148,7 +148,7 @@ struct PCIDevice {
uint32_t romsize;
bool has_rom;
MemoryRegion rom;
- uint32_t rom_bar;
+ int32_t rom_bar;
/* INTx routing notifier */
PCIINTxRoutingNotifier intx_routing_notifier;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 4c7be5295110..d2eaf0c51dde 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -71,7 +71,7 @@ static Property pci_props[] = {
DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, UINT32_MAX),
- DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1),
+ DEFINE_PROP_INT32("rombar", PCIDevice, rom_bar, -1),
DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present,
QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present,
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 2407720c3530..dc53837eac73 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1012,7 +1012,6 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
{
uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
- DeviceState *dev = DEVICE(vdev);
char *name;
int fd = vdev->vbasedev.fd;
@@ -1046,12 +1045,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
}
if (vfio_opt_rom_in_denylist(vdev)) {
- if (dev->opts && qdict_haskey(dev->opts, "rombar")) {
+ if (vdev->pdev.rom_bar > 0) {
warn_report("Device at %s is known to cause system instability"
" issues during option rom execution",
vdev->vbasedev.name);
error_printf("Proceeding anyway since user specified"
- " non zero value for rombar\n");
+ " positive value for rombar\n");
} else {
warn_report("Rom loading for device at %s has been disabled"
" due to system instability issues",
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH for-9.2 v11 10/11] hw/pci: Use -1 as the default value for rombar
2024-08-02 5:18 ` [PATCH for-9.2 v11 10/11] hw/pci: Use -1 as the default value for rombar Akihiko Odaki
@ 2024-08-02 10:54 ` Markus Armbruster
2024-08-04 6:27 ` Akihiko Odaki
0 siblings, 1 reply; 20+ messages in thread
From: Markus Armbruster @ 2024-08-02 10:54 UTC (permalink / raw)
To: Akihiko Odaki
Cc: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, qemu-devel, qemu-block
Akihiko Odaki <akihiko.odaki@daynix.com> writes:
> vfio_pci_size_rom() distinguishes whether rombar is explicitly set to 1
> by checking dev->opts, bypassing the QOM property infrastructure.
>
> Use -1 as the default value for rombar to tell if the user explicitly
> set it to 1. The property is also converted from unsigned to signed.
> -1 is signed so it is safe to give it a new meaning. The values in
> [2 ^ 31, 2 ^ 32) will be invalid, but nobody should have typed these
> values by chance.
s/will be/become invalid/
Should we document the change somewhere? I'm not sure. Opinions?
> Suggested-by: Markus Armbruster <armbru@redhat.com>
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
> include/hw/pci/pci_device.h | 2 +-
> hw/pci/pci.c | 2 +-
> hw/vfio/pci.c | 5 ++---
> 3 files changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
> index 1ff3ce94e25b..8fa845beee5e 100644
> --- a/include/hw/pci/pci_device.h
> +++ b/include/hw/pci/pci_device.h
> @@ -148,7 +148,7 @@ struct PCIDevice {
> uint32_t romsize;
> bool has_rom;
> MemoryRegion rom;
> - uint32_t rom_bar;
> + int32_t rom_bar;
>
> /* INTx routing notifier */
> PCIINTxRoutingNotifier intx_routing_notifier;
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 4c7be5295110..d2eaf0c51dde 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -71,7 +71,7 @@ static Property pci_props[] = {
> DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
> DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, UINT32_MAX),
> - DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1),
> + DEFINE_PROP_INT32("rombar", PCIDevice, rom_bar, -1),
> DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present,
> QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
> DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present,
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 2407720c3530..dc53837eac73 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1012,7 +1012,6 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
> {
> uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
> off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
> - DeviceState *dev = DEVICE(vdev);
> char *name;
> int fd = vdev->vbasedev.fd;
>
> @@ -1046,12 +1045,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
> }
>
> if (vfio_opt_rom_in_denylist(vdev)) {
> - if (dev->opts && qdict_haskey(dev->opts, "rombar")) {
> + if (vdev->pdev.rom_bar > 0) {
> warn_report("Device at %s is known to cause system instability"
> " issues during option rom execution",
> vdev->vbasedev.name);
> error_printf("Proceeding anyway since user specified"
> - " non zero value for rombar\n");
> + " positive value for rombar\n");
> } else {
> warn_report("Rom loading for device at %s has been disabled"
> " due to system instability issues",
Preferably with the commit message tweak:
Reviewed-by: Markus Armbruster <armbru@redhat.com>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH for-9.2 v11 10/11] hw/pci: Use -1 as the default value for rombar
2024-08-02 10:54 ` Markus Armbruster
@ 2024-08-04 6:27 ` Akihiko Odaki
0 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-04 6:27 UTC (permalink / raw)
To: Markus Armbruster
Cc: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, qemu-devel, qemu-block
On 2024/08/02 19:54, Markus Armbruster wrote:
> Akihiko Odaki <akihiko.odaki@daynix.com> writes:
>
>> vfio_pci_size_rom() distinguishes whether rombar is explicitly set to 1
>> by checking dev->opts, bypassing the QOM property infrastructure.
>>
>> Use -1 as the default value for rombar to tell if the user explicitly
>> set it to 1. The property is also converted from unsigned to signed.
>> -1 is signed so it is safe to give it a new meaning. The values in
>> [2 ^ 31, 2 ^ 32) will be invalid, but nobody should have typed these
>> values by chance.
>
> s/will be/become invalid/
I'll make this change in the next version.
>
> Should we document the change somewhere? I'm not sure. Opinions?
I think it is fine whether it is documented or not. As the commit
message says, I expect nobody will be impacted with this change. But
documenting this change will hurt nobody either.
Regards,
Akihiko Odaki
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH for-9.2 v11 11/11] hw/qdev: Remove opts member
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
` (9 preceding siblings ...)
2024-08-02 5:18 ` [PATCH for-9.2 v11 10/11] hw/pci: Use -1 as the default value for rombar Akihiko Odaki
@ 2024-08-02 5:18 ` Akihiko Odaki
10 siblings, 0 replies; 20+ messages in thread
From: Akihiko Odaki @ 2024-08-02 5:18 UTC (permalink / raw)
To: Philippe Mathieu-Daudé, Michael S. Tsirkin, Marcel Apfelbaum,
Alex Williamson, Cédric Le Goater, Paolo Bonzini,
Daniel P. Berrangé, Eduardo Habkost, Sriram Yagnaraman,
Jason Wang, Keith Busch, Klaus Jensen, Markus Armbruster
Cc: qemu-devel, qemu-block, Akihiko Odaki
It is no longer used.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
---
include/hw/qdev-core.h | 4 ----
hw/core/qdev.c | 1 -
system/qdev-monitor.c | 12 +++++++-----
3 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 77bfcbdf732a..a3757e6769f8 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -237,10 +237,6 @@ struct DeviceState {
* @pending_deleted_expires_ms: optional timeout for deletion events
*/
int64_t pending_deleted_expires_ms;
- /**
- * @opts: QDict of options for the device
- */
- QDict *opts;
/**
* @hotplugged: was device added after PHASE_MACHINE_READY?
*/
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index f3a996f57dee..2fc84699d432 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -706,7 +706,6 @@ static void device_finalize(Object *obj)
dev->canonical_path = NULL;
}
- qobject_unref(dev->opts);
g_free(dev->id);
}
diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
index 6af6ef7d667f..3551989d5153 100644
--- a/system/qdev-monitor.c
+++ b/system/qdev-monitor.c
@@ -624,6 +624,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
char *id;
DeviceState *dev = NULL;
BusState *bus = NULL;
+ QDict *properties;
driver = qdict_get_try_str(opts, "driver");
if (!driver) {
@@ -705,13 +706,14 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
}
/* set properties */
- dev->opts = qdict_clone_shallow(opts);
- qdict_del(dev->opts, "driver");
- qdict_del(dev->opts, "bus");
- qdict_del(dev->opts, "id");
+ properties = qdict_clone_shallow(opts);
+ qdict_del(properties, "driver");
+ qdict_del(properties, "bus");
+ qdict_del(properties, "id");
- object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json,
+ object_set_properties_from_keyval(&dev->parent_obj, properties, from_json,
errp);
+ qobject_unref(properties);
if (*errp) {
goto err_del_dev;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread