From: Akihiko Odaki <akihiko.odaki@daynix.com>
To: "Michael S. Tsirkin" <mst@redhat.com>,
"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
"Alex Williamson" <alex.williamson@redhat.com>,
"Cédric Le Goater" <clg@redhat.com>,
"Paolo Bonzini" <pbonzini@redhat.com>,
"Daniel P. Berrangé" <berrange@redhat.com>,
"Eduardo Habkost" <eduardo@habkost.net>,
"Jason Wang" <jasowang@redhat.com>,
"Sriram Yagnaraman" <sriram.yagnaraman@est.tech>,
"Keith Busch" <kbusch@kernel.org>,
"Klaus Jensen" <its@irrelevant.dk>
Cc: qemu-devel@nongnu.org, qemu-block@nongnu.org,
Yui Washizu <yui.washidu@gmail.com>,
Akihiko Odaki <akihiko.odaki@daynix.com>
Subject: [PATCH RFC v2 06/12] pcie_sriov: Reuse SR-IOV VF device instances
Date: Sun, 10 Dec 2023 13:05:49 +0900 [thread overview]
Message-ID: <20231210-sriov-v2-6-b959e8a6dfaf@daynix.com> (raw)
In-Reply-To: <20231210-sriov-v2-0-b959e8a6dfaf@daynix.com>
Disable SR-IOV VF devices by reusing code to power down PCI devices
instead of removing them when the guest requests to disable VFs. This
allows to realize devices and report VF realization errors at PF
realization time.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
docs/pcie_sriov.txt | 8 ++--
include/hw/pci/pci.h | 2 +-
include/hw/pci/pci_device.h | 2 +-
include/hw/pci/pcie_sriov.h | 6 +--
hw/net/igb.c | 3 +-
hw/nvme/ctrl.c | 3 +-
hw/pci/pci.c | 18 ++++----
hw/pci/pci_host.c | 4 +-
hw/pci/pcie.c | 4 +-
hw/pci/pcie_sriov.c | 105 +++++++++++++++++++++-----------------------
10 files changed, 79 insertions(+), 76 deletions(-)
diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
index a47aad0bfa..ab2142807f 100644
--- a/docs/pcie_sriov.txt
+++ b/docs/pcie_sriov.txt
@@ -52,9 +52,11 @@ setting up a BAR for a VF.
...
/* Add and initialize the SR/IOV capability */
- pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
- vf_devid, initial_vfs, total_vfs,
- fun_offset, stride);
+ if (!pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
+ vf_devid, initial_vfs, total_vfs,
+ fun_offset, stride, errp)) {
+ return;
+ }
/* Set up individual VF BARs (parameters as for normal BARs) */
pcie_sriov_pf_init_vf_bar( ... )
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index fa6313aabc..fae83b9b72 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -643,6 +643,6 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
}
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
-void pci_set_power(PCIDevice *pci_dev, bool state);
+void pci_set_enabled(PCIDevice *pci_dev, bool state);
#endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 54fa0676ab..f5aba8ae26 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -56,7 +56,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
struct PCIDevice {
DeviceState qdev;
bool partially_hotplugged;
- bool has_power;
+ bool is_enabled;
/* PCI config space */
uint8_t *config;
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index 095fb0c9ed..d9a39dacca 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -18,7 +18,6 @@
struct PCIESriovPF {
uint16_t num_vfs; /* Number of virtual functions created */
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
- const char *vfname; /* Reference to the device type used for the VFs */
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
};
@@ -27,10 +26,11 @@ struct PCIESriovVF {
uint16_t vf_number; /* Logical VF number of this function */
};
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
const char *vfname, uint16_t vf_dev_id,
uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride);
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp);
void pcie_sriov_pf_exit(PCIDevice *dev);
/* Set up a VF bar in the SR/IOV bar area */
diff --git a/hw/net/igb.c b/hw/net/igb.c
index 8089acfea4..326e334a8d 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -449,7 +449,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
- IGB_VF_OFFSET, IGB_VF_STRIDE);
+ IGB_VF_OFFSET, IGB_VF_STRIDE,
+ &error_abort);
pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX,
PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f026245d1e..ea006e6175 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8042,7 +8042,8 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
n->params.sriov_max_vfs, n->params.sriov_max_vfs,
- NVME_VF_OFFSET, NVME_VF_STRIDE);
+ NVME_VF_OFFSET, NVME_VF_STRIDE,
+ &error_abort);
pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 5a93cc1681..eb351844ee 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1516,7 +1516,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->has_power) {
+ if (!d->is_enabled) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1604,7 +1604,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
pci_update_irq_disabled(d, was_irq_disabled);
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
+ & PCI_COMMAND_MASTER) && d->is_enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2180,7 +2180,9 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
return;
}
- pci_set_power(pci_dev, true);
+ if (!pci_is_vf(pci_dev)) {
+ pci_set_enabled(pci_dev, true);
+ }
pci_dev->msi_trigger = pci_msi_trigger;
}
@@ -2844,18 +2846,18 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
return msg;
}
-void pci_set_power(PCIDevice *d, bool state)
+void pci_set_enabled(PCIDevice *d, bool state)
{
- if (d->has_power == state) {
+ if (d->is_enabled == state) {
return;
}
- d->has_power = state;
+ d->is_enabled = state;
pci_update_mappings(d);
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
- if (!d->has_power) {
+ & PCI_COMMAND_MASTER) && d->is_enabled);
+ if (!d->is_enabled) {
pci_device_reset(d);
}
}
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index a18aa0a8d4..1f3030108c 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
* allowing direct removal of unexposed functions.
*/
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
+ !pci_dev->is_enabled || is_pci_dev_ejected(pci_dev)) {
return;
}
@@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
* allowing direct removal of unexposed functions.
*/
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
+ !pci_dev->is_enabled || is_pci_dev_ejected(pci_dev)) {
return ~0x0;
}
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6db0cf69cd..f34c157e1f 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -394,7 +394,9 @@ static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque)
{
bool *power = opaque;
- pci_set_power(dev, *power);
+ if (!pci_is_vf(dev)) {
+ pci_set_enabled(dev, *power);
+ }
}
static void pcie_cap_update_power(PCIDevice *hotplug_dev)
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 5ef8950940..5fc146efc4 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,15 +20,29 @@
#include "qapi/error.h"
#include "trace.h"
-static PCIDevice *register_vf(PCIDevice *pf, int devfn,
- const char *name, uint16_t vf_num);
-static void unregister_vfs(PCIDevice *dev);
+static void unrealize_vfs(PCIDevice *dev, uint16_t total_vfs)
+{
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ Error *err = NULL;
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+ if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
+ error_reportf_err(err, "Failed to unplug: ");
+ }
+ object_unparent(OBJECT(vf));
+ object_unref(OBJECT(vf));
+ }
+ g_free(dev->exp.sriov_pf.vf);
+ dev->exp.sriov_pf.vf = NULL;
+}
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
const char *vfname, uint16_t vf_dev_id,
uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride)
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
@@ -36,7 +50,6 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
dev->exp.sriov_cap = offset;
dev->exp.sriov_pf.num_vfs = 0;
- dev->exp.sriov_pf.vfname = g_strdup(vfname);
dev->exp.sriov_pf.vf = NULL;
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -69,13 +82,36 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+
+ dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
+ assert(dev->exp.sriov_pf.vf);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = pci_new(devfn, vfname);
+ vf->exp.sriov_vf.pf = dev;
+ vf->exp.sriov_vf.vf_number = i;
+
+ if (!qdev_realize(&vf->qdev, bus, errp)) {
+ unrealize_vfs(dev, i);
+ return false;
+ }
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vf->config, 0xffff);
+ pci_config_set_device_id(vf->config, 0xffff);
+
+ dev->exp.sriov_pf.vf[i] = vf;
+ devfn += vf_stride;
+ }
+
+ return true;
}
void pcie_sriov_pf_exit(PCIDevice *dev)
{
- unregister_vfs(dev);
- g_free((char *)dev->exp.sriov_pf.vfname);
- dev->exp.sriov_pf.vfname = NULL;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+ unrealize_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -141,55 +177,22 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
}
}
-static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
- uint16_t vf_num)
-{
- PCIDevice *dev = pci_new(devfn, name);
- dev->exp.sriov_vf.pf = pf;
- dev->exp.sriov_vf.vf_number = vf_num;
- PCIBus *bus = pci_get_bus(pf);
- Error *local_err = NULL;
-
- qdev_realize(&dev->qdev, &bus->qbus, &local_err);
- if (local_err) {
- error_report_err(local_err);
- return NULL;
- }
-
- /* set vid/did according to sr/iov spec - they are not used */
- pci_config_set_vendor_id(dev->config, 0xffff);
- pci_config_set_device_id(dev->config, 0xffff);
-
- return dev;
-}
-
static void register_vfs(PCIDevice *dev)
{
uint16_t num_vfs;
uint16_t i;
uint16_t sriov_cap = dev->exp.sriov_cap;
- uint16_t vf_offset =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
- uint16_t vf_stride =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- int32_t devfn = dev->devfn + vf_offset;
assert(sriov_cap > 0);
num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
-
- dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
- assert(dev->exp.sriov_pf.vf);
+ if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
+ return;
+ }
trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
- dev->exp.sriov_pf.vfname, i);
- if (!dev->exp.sriov_pf.vf[i]) {
- num_vfs = i;
- break;
- }
- devfn += vf_stride;
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
}
dev->exp.sriov_pf.num_vfs = num_vfs;
}
@@ -202,16 +205,8 @@ static void unregister_vfs(PCIDevice *dev)
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), num_vfs);
for (i = 0; i < num_vfs; i++) {
- Error *err = NULL;
- PCIDevice *vf = dev->exp.sriov_pf.vf[i];
- if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
- error_reportf_err(err, "Failed to unplug: ");
- }
- object_unparent(OBJECT(vf));
- object_unref(OBJECT(vf));
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
}
- g_free(dev->exp.sriov_pf.vf);
- dev->exp.sriov_pf.vf = NULL;
dev->exp.sriov_pf.num_vfs = 0;
pci_set_word(dev->config + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0);
}
--
2.43.0
next prev parent reply other threads:[~2023-12-10 4:10 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-10 4:05 [PATCH RFC v2 00/12] virtio-net: add support for SR-IOV emulation Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 01/12] hw/pci: Initialize PCI multifunction after realization Akihiko Odaki
2023-12-12 9:59 ` Philippe Mathieu-Daudé
2023-12-10 4:05 ` [PATCH RFC v2 02/12] hw/pci: Determine if rombar is explicitly enabled Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 03/12] hw/pci: Do not add ROM BAR for SR-IOV VF Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 04/12] vfio: Avoid inspecting option QDict for rombar Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 05/12] hw/qdev: Remove opts member Akihiko Odaki
2023-12-12 10:04 ` Philippe Mathieu-Daudé
2023-12-12 11:15 ` Akihiko Odaki
2023-12-10 4:05 ` Akihiko Odaki [this message]
2023-12-10 4:05 ` [PATCH RFC v2 07/12] pcie_sriov: Release VFs failed to realize Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 08/12] pcie_sriov: Ensure PF and VF are mutually exclusive Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 09/12] pcie_sriov: Check PCI Express for SR-IOV PF Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 10/12] pcie_sriov: Allow user to create SR-IOV device Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 11/12] virtio-pci: Implement SR-IOV PF Akihiko Odaki
2023-12-10 4:05 ` [PATCH RFC v2 12/12] virtio-net: Implement SR-IOV VF Akihiko Odaki
2023-12-11 2:52 ` [PATCH RFC v2 00/12] virtio-net: add support for SR-IOV emulation Jason Wang
2023-12-11 5:28 ` Akihiko Odaki
2023-12-11 7:26 ` Jason Wang
2023-12-11 8:29 ` Akihiko Odaki
2023-12-12 4:12 ` Jason Wang
2023-12-12 9:34 ` Akihiko Odaki
2023-12-19 8:37 ` Yui Washizu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231210-sriov-v2-6-b959e8a6dfaf@daynix.com \
--to=akihiko.odaki@daynix.com \
--cc=alex.williamson@redhat.com \
--cc=berrange@redhat.com \
--cc=clg@redhat.com \
--cc=eduardo@habkost.net \
--cc=its@irrelevant.dk \
--cc=jasowang@redhat.com \
--cc=kbusch@kernel.org \
--cc=marcel.apfelbaum@gmail.com \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=sriram.yagnaraman@est.tech \
--cc=yui.washidu@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).