qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Cédric Le Goater" <clg@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Alex Williamson" <alex.williamson@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
	"Eric Auger" <eric.auger@redhat.com>,
	"Cédric Le Goater" <clg@redhat.com>
Subject: [PULL 03/42] hw/pci: Basic support for PCI power management
Date: Thu,  6 Mar 2025 15:13:39 +0100	[thread overview]
Message-ID: <20250306141419.2015340-4-clg@redhat.com> (raw)
In-Reply-To: <20250306141419.2015340-1-clg@redhat.com>

From: Alex Williamson <alex.williamson@redhat.com>

The memory and IO BARs for devices are only accessible in the D0 power
state.  In other power states the PCI spec defines that the device
responds to TLPs and messages with an Unsupported Request response.

To approximate this behavior, consider the BARs as unmapped when the
device is not in the D0 power state.  This makes the BARs inaccessible
and has the additional bonus for vfio-pci that we don't attempt to DMA
map BARs for devices in a non-D0 power state.

To support this, an interface is added for devices to register the PM
capability, which allows central tracking to enforce valid transitions
and unmap BARs in non-D0 states.

NB. We currently have device models (eepro100 and pcie_pci_bridge)
that register a PM capability but do not set wmask to enable writes to
the power state field.  In order to maintain migration compatibility,
this new helper does not manage the wmask to enable guest writes to
initiate a power state change.  The contents and write access of the
PM capability are still managed by the caller.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-2-alex.williamson@redhat.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
 include/hw/pci/pci.h        |  3 ++
 include/hw/pci/pci_device.h |  3 ++
 hw/pci/pci.c                | 93 ++++++++++++++++++++++++++++++++++++-
 hw/pci/trace-events         |  2 +
 4 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4002bbeebde0968aec07146f045d0dcfabcda36e..c220cc844962416b6b3eaac4283548e66a00ca61 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -216,6 +216,8 @@ enum {
     QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
 #define QEMU_PCIE_EXT_TAG_BITNR 13
     QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR),
+#define QEMU_PCI_CAP_PM_BITNR 14
+    QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR),
 };
 
 typedef struct PCIINTxRoute {
@@ -676,5 +678,6 @@ static inline void pci_irq_deassert(PCIDevice *pci_dev)
 MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
 void pci_set_enabled(PCIDevice *pci_dev, bool state);
 void pci_set_power(PCIDevice *pci_dev, bool state);
+int pci_pm_init(PCIDevice *pci_dev, uint8_t offset, Error **errp);
 
 #endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index add208edfabdae8902598042bbcd4efbec0a80b8..345b12eaac1a192bbeace8ff5227bf04c5edbcc7 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -105,6 +105,9 @@ struct PCIDevice {
     /* Capability bits */
     uint32_t cap_present;
 
+    /* Offset of PM capability in config space */
+    uint8_t pm_cap;
+
     /* Offset of MSI-X capability in config space */
     uint8_t msix_cap;
 
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 1d42847ef04457e49af535b646c0beb0c543dedf..2ef7f617491498d6e6853e91e9fb7321037475f5 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -435,6 +435,84 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
                          attrs, NULL);
 }
 
+/*
+ * Register and track a PM capability.  If wmask is also enabled for the power
+ * state field of the pmcsr register, guest writes may change the device PM
+ * state.  BAR access is only enabled while the device is in the D0 state.
+ * Return the capability offset or negative error code.
+ */
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
+{
+    int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
+
+    if (cap < 0) {
+        return cap;
+    }
+
+    d->pm_cap = cap;
+    d->cap_present |= QEMU_PCI_CAP_PM;
+
+    return cap;
+}
+
+static uint8_t pci_pm_state(PCIDevice *d)
+{
+    uint16_t pmcsr;
+
+    if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
+        return 0;
+    }
+
+    pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
+
+    return pmcsr & PCI_PM_CTRL_STATE_MASK;
+}
+
+/*
+ * Update the PM capability state based on the new value stored in config
+ * space respective to the old, pre-write state provided.  If the new value
+ * is rejected (unsupported or invalid transition) restore the old value.
+ * Return the resulting PM state.
+ */
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
+{
+    uint16_t pmc;
+    uint8_t new;
+
+    if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
+        !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
+        return old;
+    }
+
+    new = pci_pm_state(d);
+    if (new == old) {
+        return old;
+    }
+
+    pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
+
+    /*
+     * Transitions to D1 & D2 are only allowed if supported.  Devices may
+     * only transition to higher D-states or to D0.
+     */
+    if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
+        (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
+        (old && new && new < old)) {
+        pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+                                     PCI_PM_CTRL_STATE_MASK);
+        pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+                                   old);
+        trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
+                                    PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+                                    old, new);
+        return old;
+    }
+
+    trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
+                            PCI_FUNC(d->devfn), old, new);
+    return new;
+}
+
 static void pci_reset_regions(PCIDevice *dev)
 {
     int r;
@@ -474,6 +552,11 @@ static void pci_do_device_reset(PCIDevice *dev)
                               pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
                               pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
     dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+    /* Default PM state is D0 */
+    if (dev->cap_present & QEMU_PCI_CAP_PM) {
+        pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
+                                     PCI_PM_CTRL_STATE_MASK);
+    }
     pci_reset_regions(dev);
     pci_update_mappings(dev);
 
@@ -1606,7 +1689,7 @@ static void pci_update_mappings(PCIDevice *d)
             continue;
 
         new_addr = pci_bar_address(d, i, r->type, r->size);
-        if (!d->enabled) {
+        if (!d->enabled || pci_pm_state(d)) {
             new_addr = PCI_BAR_UNMAPPED;
         }
 
@@ -1672,6 +1755,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
 
 void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
 {
+    uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
     int i, was_irq_disabled = pci_irq_disabled(d);
     uint32_t val = val_in;
 
@@ -1684,11 +1768,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
         d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
         d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
     }
+
+    new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
+
     if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
         ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
         ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
-        range_covers_byte(addr, l, PCI_COMMAND))
+        range_covers_byte(addr, l, PCI_COMMAND) ||
+        !!new_pm_state != !!old_pm_state) {
         pci_update_mappings(d);
+    }
 
     if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
         pci_update_irq_disabled(d, was_irq_disabled);
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index e98f575a9d19ba9ce8d391fef1326aa317c322f9..6a9968962f4afcc2c0a33ced25794bea7282efbb 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -1,6 +1,8 @@
 # See docs/devel/tracing.rst for syntax documentation.
 
 # pci.c
+pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d"
+pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d"
 pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
 pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
 pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
-- 
2.48.1



  parent reply	other threads:[~2025-03-06 14:15 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-06 14:13 [PULL 00/42] vfio queue Cédric Le Goater
2025-03-06 14:13 ` [PULL 01/42] vfio: Add property documentation Cédric Le Goater
2025-03-06 14:13 ` [PULL 02/42] vfio/ccw: Replace warn_once_pfch() with warn_report_once() Cédric Le Goater
2025-03-06 14:13 ` Cédric Le Goater [this message]
2025-03-06 14:13 ` [PULL 04/42] pci: Use PCI PM capability initializer Cédric Le Goater
2025-03-06 14:13 ` [PULL 05/42] vfio/pci: Delete local pm_cap Cédric Le Goater
2025-03-06 14:13 ` [PULL 06/42] pcie, virtio: Remove redundant pm_cap Cédric Le Goater
2025-03-06 14:13 ` [PULL 07/42] hw/vfio/pci: Re-order pre-reset Cédric Le Goater
2025-03-06 14:13 ` [PULL 08/42] MAINTAINERS: Add myself as vfio-igd maintainer Cédric Le Goater
2025-03-06 14:13 ` [PULL 09/42] vfio-platform: Deprecate all forms of vfio-platform devices Cédric Le Goater
2025-03-06 14:13 ` [PULL 10/42] migration: Clarify that {load, save}_cleanup handlers can run without setup Cédric Le Goater
2025-03-06 14:13 ` [PULL 11/42] thread-pool: Remove thread_pool_submit() function Cédric Le Goater
2025-03-06 14:13 ` [PULL 12/42] thread-pool: Rename AIO pool functions to *_aio() and data types to *Aio Cédric Le Goater
2025-03-06 14:13 ` [PULL 13/42] thread-pool: Implement generic (non-AIO) pool support Cédric Le Goater
2025-03-06 14:13 ` [PULL 14/42] migration: Add MIG_CMD_SWITCHOVER_START and its load handler Cédric Le Goater
2025-03-06 14:13 ` [PULL 15/42] migration: Add qemu_loadvm_load_state_buffer() and its handler Cédric Le Goater
2025-03-06 14:13 ` [PULL 16/42] migration: Always take BQL for migration_incoming_state_destroy() Cédric Le Goater
2025-03-06 14:13 ` [PULL 17/42] error: define g_autoptr() cleanup function for the Error type Cédric Le Goater
2025-03-06 14:13 ` [PULL 18/42] migration: Add thread pool of optional load threads Cédric Le Goater
2025-03-06 14:13 ` [PULL 19/42] migration/multifd: Split packet into header and RAM data Cédric Le Goater
2025-03-06 14:13 ` [PULL 20/42] migration/multifd: Device state transfer support - receive side Cédric Le Goater
2025-03-06 14:13 ` [PULL 21/42] migration/multifd: Make multifd_send() thread safe Cédric Le Goater
2025-03-06 14:13 ` [PULL 22/42] migration/multifd: Add an explicit MultiFDSendData destructor Cédric Le Goater
2025-03-06 14:13 ` [PULL 23/42] migration/multifd: Device state transfer support - send side Cédric Le Goater
2025-03-06 14:14 ` [PULL 24/42] migration/multifd: Make MultiFDSendData a struct Cédric Le Goater
2025-03-06 14:14 ` [PULL 25/42] migration/multifd: Add multifd_device_state_supported() Cédric Le Goater
2025-03-06 14:14 ` [PULL 26/42] migration: Add save_live_complete_precopy_thread handler Cédric Le Goater
2025-03-06 14:14 ` [PULL 27/42] vfio/migration: Add load_device_config_state_start trace event Cédric Le Goater
2025-03-06 14:14 ` [PULL 28/42] vfio/migration: Convert bytes_transferred counter to atomic Cédric Le Goater
2025-03-06 14:14 ` [PULL 29/42] vfio/migration: Add vfio_add_bytes_transferred() Cédric Le Goater
2025-03-06 14:14 ` [PULL 30/42] vfio/migration: Move migration channel flags to vfio-common.h header file Cédric Le Goater
2025-03-06 14:14 ` [PULL 31/42] vfio/migration: Multifd device state transfer support - basic types Cédric Le Goater
2025-03-06 14:14 ` [PULL 32/42] vfio/migration: Multifd device state transfer - add support checking function Cédric Le Goater
2025-03-06 14:14 ` [PULL 33/42] vfio/migration: Multifd setup/cleanup functions and associated VFIOMultifd Cédric Le Goater
2025-03-06 14:14 ` [PULL 34/42] vfio/migration: Setup and cleanup multifd transfer in these general methods Cédric Le Goater
2025-03-06 14:14 ` [PULL 35/42] vfio/migration: Multifd device state transfer support - received buffers queuing Cédric Le Goater
2025-03-06 14:14 ` [PULL 36/42] vfio/migration: Multifd device state transfer support - load thread Cédric Le Goater
2025-03-06 14:14 ` [PULL 37/42] migration/qemu-file: Define g_autoptr() cleanup function for QEMUFile Cédric Le Goater
2025-03-06 14:14 ` [PULL 38/42] vfio/migration: Multifd device state transfer support - config loading support Cédric Le Goater
2025-03-06 14:14 ` [PULL 39/42] vfio/migration: Multifd device state transfer support - send side Cédric Le Goater
2025-03-06 14:14 ` [PULL 40/42] vfio/migration: Add x-migration-multifd-transfer VFIO property Cédric Le Goater
2025-03-06 14:14 ` [PULL 41/42] vfio/migration: Make x-migration-multifd-transfer VFIO property mutable Cédric Le Goater
2025-03-06 14:14 ` [PULL 42/42] hw/core/machine: Add compat for x-migration-multifd-transfer VFIO property Cédric Le Goater
2025-03-07  7:18 ` [PULL 00/42] vfio queue Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250306141419.2015340-4-clg@redhat.com \
    --to=clg@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).