From: "Michael S. Tsirkin" <mst@redhat.com>
To: Akihiko Odaki <akihiko.odaki@daynix.com>
Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org>,
"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
"Alex Williamson" <alex.williamson@redhat.com>,
"Cédric Le Goater" <clg@redhat.com>,
"Paolo Bonzini" <pbonzini@redhat.com>,
"Daniel P. Berrangé" <berrange@redhat.com>,
"Eduardo Habkost" <eduardo@habkost.net>,
"Sriram Yagnaraman" <sriram.yagnaraman@ericsson.com>,
"Jason Wang" <jasowang@redhat.com>,
"Keith Busch" <kbusch@kernel.org>,
"Klaus Jensen" <its@irrelevant.dk>,
"Markus Armbruster" <armbru@redhat.com>,
qemu-devel@nongnu.org, qemu-block@nongnu.org
Subject: Re: [PATCH for-9.2 v11 06/11] pcie_sriov: Reuse SR-IOV VF device instances
Date: Fri, 2 Aug 2024 12:54:18 -0400 [thread overview]
Message-ID: <20240802125342-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20240802-reuse-v11-6-fb83bb8c19fb@daynix.com>
On Fri, Aug 02, 2024 at 02:17:56PM +0900, Akihiko Odaki wrote:
> Disable SR-IOV VF devices by reusing code to power down PCI devices
> instead of removing them when the guest requests to disable VFs. This
> allows to realize devices and report VF realization errors at PF
> realization time.
What kind of errors do you have in mind?
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
> include/hw/pci/pci.h | 5 ---
> include/hw/pci/pci_device.h | 15 +++++++
> include/hw/pci/pcie_sriov.h | 1 -
> hw/pci/pci.c | 2 +-
> hw/pci/pcie_sriov.c | 95 +++++++++++++++++++--------------------------
> 5 files changed, 56 insertions(+), 62 deletions(-)
>
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index fe04b4fafd04..14a869eeaa71 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -680,9 +680,4 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
> MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
> void pci_set_enabled(PCIDevice *pci_dev, bool state);
>
> -static inline void pci_set_power(PCIDevice *pci_dev, bool state)
> -{
> - pci_set_enabled(pci_dev, state);
> -}
> -
> #endif
> diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
> index f38fb3111954..1ff3ce94e25b 100644
> --- a/include/hw/pci/pci_device.h
> +++ b/include/hw/pci/pci_device.h
> @@ -212,6 +212,21 @@ static inline uint16_t pci_get_bdf(PCIDevice *dev)
> return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn);
> }
>
> +static inline void pci_set_power(PCIDevice *pci_dev, bool state)
> +{
> + /*
> + * Don't change the enabled state of VFs when powering on/off the device.
> + *
> + * When powering on, VFs must not be enabled immediately but they must
> + * wait until the guest configures SR-IOV.
> + * When powering off, their corresponding PFs will be reset and disable
> + * VFs.
> + */
> + if (!pci_is_vf(pci_dev)) {
> + pci_set_enabled(pci_dev, state);
> + }
> +}
> +
> uint16_t pci_requester_id(PCIDevice *dev);
>
> /* DMA access functions */
> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
> index aa704e8f9d9f..70649236c18a 100644
> --- a/include/hw/pci/pcie_sriov.h
> +++ b/include/hw/pci/pcie_sriov.h
> @@ -18,7 +18,6 @@
> typedef struct PCIESriovPF {
> uint16_t num_vfs; /* Number of virtual functions created */
> uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
> - const char *vfname; /* Reference to the device type used for the VFs */
> PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
> } PCIESriovPF;
>
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index b532888e8f6c..5c0050e1786a 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -2895,7 +2895,7 @@ void pci_set_enabled(PCIDevice *d, bool state)
> memory_region_set_enabled(&d->bus_master_enable_region,
> (pci_get_word(d->config + PCI_COMMAND)
> & PCI_COMMAND_MASTER) && d->enabled);
> - if (!d->enabled) {
> + if (d->qdev.realized) {
> pci_device_reset(d);
> }
> }
> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
> index f0bde0d3fc79..faadb0d2ea85 100644
> --- a/hw/pci/pcie_sriov.c
> +++ b/hw/pci/pcie_sriov.c
> @@ -20,9 +20,16 @@
> #include "qapi/error.h"
> #include "trace.h"
>
> -static PCIDevice *register_vf(PCIDevice *pf, int devfn,
> - const char *name, uint16_t vf_num);
> -static void unregister_vfs(PCIDevice *dev);
> +static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
> +{
> + for (uint16_t i = 0; i < total_vfs; i++) {
> + PCIDevice *vf = dev->exp.sriov_pf.vf[i];
> + object_unparent(OBJECT(vf));
> + object_unref(OBJECT(vf));
> + }
> + g_free(dev->exp.sriov_pf.vf);
> + dev->exp.sriov_pf.vf = NULL;
> +}
>
> bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> const char *vfname, uint16_t vf_dev_id,
> @@ -30,6 +37,8 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> uint16_t vf_offset, uint16_t vf_stride,
> Error **errp)
> {
> + BusState *bus = qdev_get_parent_bus(&dev->qdev);
> + int32_t devfn = dev->devfn + vf_offset;
> uint8_t *cfg = dev->config + offset;
> uint8_t *wmask;
>
> @@ -49,7 +58,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
> offset, PCI_EXT_CAP_SRIOV_SIZEOF);
> dev->exp.sriov_cap = offset;
> dev->exp.sriov_pf.num_vfs = 0;
> - dev->exp.sriov_pf.vfname = g_strdup(vfname);
> dev->exp.sriov_pf.vf = NULL;
>
> pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
> @@ -83,14 +91,34 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
>
> qdev_prop_set_bit(&dev->qdev, "multifunction", true);
>
> + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
> +
> + for (uint16_t i = 0; i < total_vfs; i++) {
> + PCIDevice *vf = pci_new(devfn, vfname);
> + vf->exp.sriov_vf.pf = dev;
> + vf->exp.sriov_vf.vf_number = i;
> +
> + if (!qdev_realize(&vf->qdev, bus, errp)) {
> + unparent_vfs(dev, i);
> + return false;
> + }
> +
> + /* set vid/did according to sr/iov spec - they are not used */
> + pci_config_set_vendor_id(vf->config, 0xffff);
> + pci_config_set_device_id(vf->config, 0xffff);
> +
> + dev->exp.sriov_pf.vf[i] = vf;
> + devfn += vf_stride;
> + }
> +
> return true;
> }
>
> void pcie_sriov_pf_exit(PCIDevice *dev)
> {
> - unregister_vfs(dev);
> - g_free((char *)dev->exp.sriov_pf.vfname);
> - dev->exp.sriov_pf.vfname = NULL;
> + uint8_t *cfg = dev->config + dev->exp.sriov_cap;
> +
> + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
> }
>
> void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
> @@ -156,38 +184,11 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
> }
> }
>
> -static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
> - uint16_t vf_num)
> -{
> - PCIDevice *dev = pci_new(devfn, name);
> - dev->exp.sriov_vf.pf = pf;
> - dev->exp.sriov_vf.vf_number = vf_num;
> - PCIBus *bus = pci_get_bus(pf);
> - Error *local_err = NULL;
> -
> - qdev_realize(&dev->qdev, &bus->qbus, &local_err);
> - if (local_err) {
> - error_report_err(local_err);
> - return NULL;
> - }
> -
> - /* set vid/did according to sr/iov spec - they are not used */
> - pci_config_set_vendor_id(dev->config, 0xffff);
> - pci_config_set_device_id(dev->config, 0xffff);
> -
> - return dev;
> -}
> -
> static void register_vfs(PCIDevice *dev)
> {
> uint16_t num_vfs;
> uint16_t i;
> uint16_t sriov_cap = dev->exp.sriov_cap;
> - uint16_t vf_offset =
> - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
> - uint16_t vf_stride =
> - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
> - int32_t devfn = dev->devfn + vf_offset;
>
> assert(sriov_cap > 0);
> num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
> @@ -195,18 +196,10 @@ static void register_vfs(PCIDevice *dev)
> return;
> }
>
> - dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
> -
> trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
> PCI_FUNC(dev->devfn), num_vfs);
> for (i = 0; i < num_vfs; i++) {
> - dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
> - dev->exp.sriov_pf.vfname, i);
> - if (!dev->exp.sriov_pf.vf[i]) {
> - num_vfs = i;
> - break;
> - }
> - devfn += vf_stride;
> + pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
> }
> dev->exp.sriov_pf.num_vfs = num_vfs;
> }
> @@ -219,12 +212,8 @@ static void unregister_vfs(PCIDevice *dev)
> trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
> PCI_FUNC(dev->devfn), num_vfs);
> for (i = 0; i < num_vfs; i++) {
> - PCIDevice *vf = dev->exp.sriov_pf.vf[i];
> - object_unparent(OBJECT(vf));
> - object_unref(OBJECT(vf));
> + pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
> }
> - g_free(dev->exp.sriov_pf.vf);
> - dev->exp.sriov_pf.vf = NULL;
> dev->exp.sriov_pf.num_vfs = 0;
> }
>
> @@ -246,14 +235,10 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
> PCI_FUNC(dev->devfn), off, val, len);
>
> if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
> - if (dev->exp.sriov_pf.num_vfs) {
> - if (!(val & PCI_SRIOV_CTRL_VFE)) {
> - unregister_vfs(dev);
> - }
> + if (val & PCI_SRIOV_CTRL_VFE) {
> + register_vfs(dev);
> } else {
> - if (val & PCI_SRIOV_CTRL_VFE) {
> - register_vfs(dev);
> - }
> + unregister_vfs(dev);
> }
> }
> }
>
> --
> 2.45.2
next prev parent reply other threads:[~2024-08-02 16:54 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-02 5:17 [PATCH for-9.2 v11 00/11] hw/pci: SR-IOV related fixes and improvements Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 01/11] hw/pci: Rename has_power to enabled Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 02/11] hw/ppc/spapr_pci: Do not create DT for disabled PCI device Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 03/11] hw/ppc/spapr_pci: Do not reject VFs created after a PF Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 04/11] pcie_sriov: Do not manually unrealize Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 05/11] pcie_sriov: Ensure VF function number does not overflow Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 06/11] pcie_sriov: Reuse SR-IOV VF device instances Akihiko Odaki
2024-08-02 16:54 ` Michael S. Tsirkin [this message]
2024-08-04 6:55 ` Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 07/11] pcie_sriov: Release VFs failed to realize Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 08/11] pcie_sriov: Remove num_vfs from PCIESriovPF Akihiko Odaki
2024-08-02 12:58 ` Michael S. Tsirkin
2024-08-02 15:38 ` Akihiko Odaki
2024-08-02 16:52 ` Michael S. Tsirkin
2024-08-04 9:11 ` Akihiko Odaki
2024-08-02 5:17 ` [PATCH for-9.2 v11 09/11] pcie_sriov: Register VFs after migration Akihiko Odaki
2024-08-02 5:18 ` [PATCH for-9.2 v11 10/11] hw/pci: Use -1 as the default value for rombar Akihiko Odaki
2024-08-02 10:54 ` Markus Armbruster
2024-08-04 6:27 ` Akihiko Odaki
2024-08-02 5:18 ` [PATCH for-9.2 v11 11/11] hw/qdev: Remove opts member Akihiko Odaki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240802125342-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=akihiko.odaki@daynix.com \
--cc=alex.williamson@redhat.com \
--cc=armbru@redhat.com \
--cc=berrange@redhat.com \
--cc=clg@redhat.com \
--cc=eduardo@habkost.net \
--cc=its@irrelevant.dk \
--cc=jasowang@redhat.com \
--cc=kbusch@kernel.org \
--cc=marcel.apfelbaum@gmail.com \
--cc=pbonzini@redhat.com \
--cc=philmd@linaro.org \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=sriram.yagnaraman@ericsson.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).