From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
Akihiko Odaki <akihiko.odaki@daynix.com>,
Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Subject: [PULL 10/41] pcie_sriov: Reuse SR-IOV VF device instances
Date: Fri, 21 Feb 2025 07:23:04 -0500 [thread overview]
Message-ID: <cab1398a60eb0cb2d2d1998c9b46aaa5e0bf3ee8.1740140520.git.mst@redhat.com> (raw)
In-Reply-To: <cover.1740140520.git.mst@redhat.com>
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Disable SR-IOV VF devices by reusing code to power down PCI devices
instead of removing them when the guest requests to disable VFs. This
allows to realize devices and report VF realization errors at PF
realization time.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Message-Id: <20250116-reuse-v20-8-7cb370606368@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/pci/pcie_sriov.h | 1 -
hw/pci/pci.c | 14 +++++-
hw/pci/pcie_sriov.c | 94 +++++++++++++++----------------------
3 files changed, 51 insertions(+), 58 deletions(-)
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index aa704e8f9d..70649236c1 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -18,7 +18,6 @@
typedef struct PCIESriovPF {
uint16_t num_vfs; /* Number of virtual functions created */
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
- const char *vfname; /* Reference to the device type used for the VFs */
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
} PCIESriovPF;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 2afa423925..3e29b30d55 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2963,7 +2963,17 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
void pci_set_power(PCIDevice *d, bool state)
{
- pci_set_enabled(d, state);
+ /*
+ * Don't change the enabled state of VFs when powering on/off the device.
+ *
+ * When powering on, VFs must not be enabled immediately but they must
+ * wait until the guest configures SR-IOV.
+ * When powering off, their corresponding PFs will be reset and disable
+ * VFs.
+ */
+ if (!pci_is_vf(d)) {
+ pci_set_enabled(d, state);
+ }
}
void pci_set_enabled(PCIDevice *d, bool state)
@@ -2977,7 +2987,7 @@ void pci_set_enabled(PCIDevice *d, bool state)
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
& PCI_COMMAND_MASTER) && d->enabled);
- if (!d->enabled) {
+ if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
}
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 91c64c988e..f1993bc553 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,9 +20,16 @@
#include "qapi/error.h"
#include "trace.h"
-static PCIDevice *register_vf(PCIDevice *pf, int devfn,
- const char *name, uint16_t vf_num);
-static void unregister_vfs(PCIDevice *dev);
+static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
+{
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+ object_unparent(OBJECT(vf));
+ object_unref(OBJECT(vf));
+ }
+ g_free(dev->exp.sriov_pf.vf);
+ dev->exp.sriov_pf.vf = NULL;
+}
bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
const char *vfname, uint16_t vf_dev_id,
@@ -30,6 +37,7 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
uint16_t vf_offset, uint16_t vf_stride,
Error **errp)
{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
@@ -44,7 +52,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
dev->exp.sriov_cap = offset;
dev->exp.sriov_pf.num_vfs = 0;
- dev->exp.sriov_pf.vfname = g_strdup(vfname);
dev->exp.sriov_pf.vf = NULL;
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -78,14 +85,34 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+ dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = pci_new(devfn, vfname);
+ vf->exp.sriov_vf.pf = dev;
+ vf->exp.sriov_vf.vf_number = i;
+
+ if (!qdev_realize(&vf->qdev, bus, errp)) {
+ unparent_vfs(dev, i);
+ return false;
+ }
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vf->config, 0xffff);
+ pci_config_set_device_id(vf->config, 0xffff);
+
+ dev->exp.sriov_pf.vf[i] = vf;
+ devfn += vf_stride;
+ }
+
return true;
}
void pcie_sriov_pf_exit(PCIDevice *dev)
{
- unregister_vfs(dev);
- g_free((char *)dev->exp.sriov_pf.vfname);
- dev->exp.sriov_pf.vfname = NULL;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -151,38 +178,11 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
}
}
-static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
- uint16_t vf_num)
-{
- PCIDevice *dev = pci_new(devfn, name);
- dev->exp.sriov_vf.pf = pf;
- dev->exp.sriov_vf.vf_number = vf_num;
- PCIBus *bus = pci_get_bus(pf);
- Error *local_err = NULL;
-
- qdev_realize(&dev->qdev, &bus->qbus, &local_err);
- if (local_err) {
- error_report_err(local_err);
- return NULL;
- }
-
- /* set vid/did according to sr/iov spec - they are not used */
- pci_config_set_vendor_id(dev->config, 0xffff);
- pci_config_set_device_id(dev->config, 0xffff);
-
- return dev;
-}
-
static void register_vfs(PCIDevice *dev)
{
uint16_t num_vfs;
uint16_t i;
uint16_t sriov_cap = dev->exp.sriov_cap;
- uint16_t vf_offset =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
- uint16_t vf_stride =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- int32_t devfn = dev->devfn + vf_offset;
assert(sriov_cap > 0);
num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
@@ -190,18 +190,10 @@ static void register_vfs(PCIDevice *dev)
return;
}
- dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
-
trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
- dev->exp.sriov_pf.vfname, i);
- if (!dev->exp.sriov_pf.vf[i]) {
- num_vfs = i;
- break;
- }
- devfn += vf_stride;
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
}
dev->exp.sriov_pf.num_vfs = num_vfs;
}
@@ -214,12 +206,8 @@ static void unregister_vfs(PCIDevice *dev)
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- PCIDevice *vf = dev->exp.sriov_pf.vf[i];
- object_unparent(OBJECT(vf));
- object_unref(OBJECT(vf));
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
}
- g_free(dev->exp.sriov_pf.vf);
- dev->exp.sriov_pf.vf = NULL;
dev->exp.sriov_pf.num_vfs = 0;
}
@@ -241,14 +229,10 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
PCI_FUNC(dev->devfn), off, val, len);
if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
- if (dev->exp.sriov_pf.num_vfs) {
- if (!(val & PCI_SRIOV_CTRL_VFE)) {
- unregister_vfs(dev);
- }
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ register_vfs(dev);
} else {
- if (val & PCI_SRIOV_CTRL_VFE) {
- register_vfs(dev);
- }
+ unregister_vfs(dev);
}
}
}
--
MST
next prev parent reply other threads:[~2025-02-21 12:23 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-21 12:22 [PULL 00/41] virtio,pc,pci: features, fixes, cleanups Michael S. Tsirkin
2025-02-21 12:22 ` [PULL 01/41] docs/about: Change notes on x86 machine type deprecation into a general one Michael S. Tsirkin
2025-02-21 12:22 ` [PULL 02/41] hw/net: Fix NULL dereference with software RSS Michael S. Tsirkin
2025-02-27 9:51 ` Michael Tokarev
2025-02-21 12:22 ` [PULL 03/41] hw/ppc/spapr_pci: Do not create DT for disabled PCI device Michael S. Tsirkin
2025-02-21 12:22 ` [PULL 04/41] hw/ppc/spapr_pci: Do not reject VFs created after a PF Michael S. Tsirkin
2025-02-21 12:22 ` [PULL 05/41] s390x/pci: Avoid creating zpci for VFs Michael S. Tsirkin
2025-02-21 12:22 ` [PULL 06/41] s390x/pci: Allow plugging SR-IOV devices Michael S. Tsirkin
2025-02-21 12:22 ` [PULL 07/41] s390x/pci: Check for multifunction after device realization Michael S. Tsirkin
2025-02-21 12:22 ` [PULL 08/41] pcie_sriov: Do not manually unrealize Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 09/41] pcie_sriov: Ensure VF addr does not overflow Michael S. Tsirkin
2025-02-21 12:23 ` Michael S. Tsirkin [this message]
2025-02-21 12:23 ` [PULL 11/41] pcie_sriov: Release VFs failed to realize Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 12/41] pcie_sriov: Remove num_vfs from PCIESriovPF Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 13/41] pcie_sriov: Register VFs after migration Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 14/41] qtest/libqos/pci: Do not write to PBA memory Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 15/41] hw/pci/msix: Warn on PBA writes Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 16/41] hw/pci: Assert a bar is not registered multiple times Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 17/41] hw/i386/pc: Fix crash that occurs when introspecting TYPE_PC_MACHINE machines Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 18/41] hw/i386/microvm: Fix crash that occurs when introspecting the microvm machine Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 19/41] tests/qtest/vhost-user-test: Use modern virtio for vhost-user tests Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 20/41] hw/cxl: Introduce CXL_T3_MSIX_VECTOR enumeration Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 21/41] hw/mem/cxl_type3: Add paired msix_uninit_exclusive_bar() call Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 22/41] hw/mem/cxl_type3: Fix special_ops memory leak on msix_init_exclusive_bar() failure Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 23/41] hw/mem/cxl_type3: Ensure errp is set on realization failure Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 24/41] mem/cxl_type3: support 3, 6, 12 and 16 interleave ways Michael S. Tsirkin
2025-02-21 12:23 ` [PULL 25/41] hw/virtio: reset virtio balloon stats on machine reset Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 26/41] amd_iommu: Use correct DTE field for interrupt passthrough Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 27/41] amd_iommu: Use correct bitmask to set capability BAR Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 28/41] vhost-iova-tree: Implement an IOVA-only tree Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 29/41] vhost-iova-tree, svq: Implement GPA->IOVA & partial IOVA->HVA trees Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 30/41] vhost-iova-tree: Update documentation Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 31/41] cryptodev/vhost: allocate CryptoDevBackendVhost using g_mem0() Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 32/41] MAINTAINERS: add more files to `vhost` Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 33/41] vdpa: Fix endian bugs in shadow virtqueue Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 34/41] hw/virtio/virtio-nsm: Respond with correct length Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 35/41] net: vhost-user: add QAPI events to report connection state Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 36/41] vhost-user-snd: correct the calculation of config_size Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 37/41] hw/virtio/virtio-iommu: Migrate to 3-phase reset Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 38/41] hw/i386/intel-iommu: " Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 39/41] hw/arm/smmuv3: Move reset to exit phase Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 40/41] hw/vfio/common: Add a trace point in vfio_reset_handler Michael S. Tsirkin
2025-02-21 12:24 ` [PULL 41/41] docs/devel/reset: Document reset expectations for DMA and IOMMU Michael S. Tsirkin
2025-02-21 23:17 ` [PULL 00/41] virtio,pc,pci: features, fixes, cleanups Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=cab1398a60eb0cb2d2d1998c9b46aaa5e0bf3ee8.1740140520.git.mst@redhat.com \
--to=mst@redhat.com \
--cc=akihiko.odaki@daynix.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).