From: Alex Williamson <alex.williamson@redhat.com>
To: qemu-devel@nongnu.org
Cc: Alex Williamson <alex.williamson@redhat.com>,
eric.auger.pro@gmail.com, eric.auger@redhat.com, clg@redhat.com,
zhenzhong.duan@intel.com, mst@redhat.com,
marcel.apfelbaum@gmail.com
Subject: [PATCH 1/5] hw/pci: Basic support for PCI power management
Date: Thu, 20 Feb 2025 15:48:54 -0700 [thread overview]
Message-ID: <20250220224918.2520417-2-alex.williamson@redhat.com> (raw)
In-Reply-To: <20250220224918.2520417-1-alex.williamson@redhat.com>
The memory and IO BARs for devices are only accessible in the D0
power state. In other power states the PCI spec defines that the
device should respond to TLPs and messages with an Unsupported
Request response. The closest we can come to emulating this
behavior is to consider the BARs as unmapped, removing them from
the address space.
Introduce an interface to register the PM capability such that
the device power state is considered relative to the BAR mapping
state.
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/pci/pci.c | 83 ++++++++++++++++++++++++++++++++++++-
hw/pci/trace-events | 2 +
include/hw/pci/pci.h | 3 ++
include/hw/pci/pci_device.h | 3 ++
4 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 2afa423925c5..4f2554dd63ac 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -435,6 +435,74 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
attrs, NULL);
}
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
+{
+ int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
+
+ if (cap < 0) {
+ return cap;
+ }
+
+ d->pm_cap = cap;
+ d->cap_present |= QEMU_PCI_CAP_PM;
+ pci_set_word(d->wmask + cap + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK);
+
+ return cap;
+}
+
+static uint8_t pci_pm_state(PCIDevice *d)
+{
+ uint16_t pmcsr;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
+ return 0;
+ }
+
+ pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
+
+ return pmcsr & PCI_PM_CTRL_STATE_MASK;
+}
+
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
+{
+ uint16_t pmc;
+ uint8_t new;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
+ !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
+ return old;
+ }
+
+ new = pci_pm_state(d);
+ if (new == old) {
+ return old;
+ }
+
+ pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
+
+ /*
+ * Transitions to D1 & D2 are only allowed if supported. Devices may
+ * only transition to higher D-states or to D0. The write is dropped
+ * for rejected transitions by restoring the old state.
+ */
+ if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
+ (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
+ (old && new && new < old)) {
+ pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ old);
+ trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
+ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+ old, new);
+ return old;
+ }
+
+ trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
+ PCI_FUNC(d->devfn), old, new);
+ return new;
+}
+
static void pci_reset_regions(PCIDevice *dev)
{
int r;
@@ -474,6 +542,11 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+ /* Default PM state is D0 */
+ if (dev->cap_present & QEMU_PCI_CAP_PM) {
+ pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ }
pci_reset_regions(dev);
pci_update_mappings(dev);
@@ -1598,7 +1671,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->enabled) {
+ if (!d->enabled || pci_pm_state(d)) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1664,6 +1737,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
{
+ uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
int i, was_irq_disabled = pci_irq_disabled(d);
uint32_t val = val_in;
@@ -1676,11 +1750,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
}
+
+ new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
+
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
- range_covers_byte(addr, l, PCI_COMMAND))
+ range_covers_byte(addr, l, PCI_COMMAND) ||
+ !!new_pm_state != !!old_pm_state) {
pci_update_mappings(d);
+ }
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 19643aa8c6b0..811354f29031 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -1,6 +1,8 @@
# See docs/devel/tracing.rst for syntax documentation.
# pci.c
+pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x bad PM transition D%d->D%d"
+pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d"
pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4002bbeebde0..c220cc844962 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -216,6 +216,8 @@ enum {
QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
#define QEMU_PCIE_EXT_TAG_BITNR 13
QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR),
+#define QEMU_PCI_CAP_PM_BITNR 14
+ QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR),
};
typedef struct PCIINTxRoute {
@@ -676,5 +678,6 @@ static inline void pci_irq_deassert(PCIDevice *pci_dev)
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
void pci_set_enabled(PCIDevice *pci_dev, bool state);
void pci_set_power(PCIDevice *pci_dev, bool state);
+int pci_pm_init(PCIDevice *pci_dev, uint8_t offset, Error **errp);
#endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index add208edfabd..345b12eaac1a 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -105,6 +105,9 @@ struct PCIDevice {
/* Capability bits */
uint32_t cap_present;
+ /* Offset of PM capability in config space */
+ uint8_t pm_cap;
+
/* Offset of MSI-X capability in config space */
uint8_t msix_cap;
--
2.48.1
next prev parent reply other threads:[~2025-02-20 22:51 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-20 22:48 [PATCH 0/5] PCI: Implement basic PCI PM capability backing Alex Williamson
2025-02-20 22:48 ` Alex Williamson [this message]
2025-02-24 19:03 ` [PATCH 1/5] hw/pci: Basic support for PCI power management Eric Auger
2025-02-25 5:24 ` Alex Williamson
2025-02-25 9:45 ` Eric Auger
2025-02-20 22:48 ` [PATCH 2/5] pci: Use PCI PM capability initializer Alex Williamson
2025-02-24 18:37 ` Eric Auger
2025-02-24 19:03 ` Alex Williamson
2025-02-20 22:48 ` [PATCH 3/5] vfio/pci: Delete local pm_cap Alex Williamson
2025-02-24 18:38 ` Eric Auger
2025-02-20 22:48 ` [PATCH 4/5] pcie, virtio: Remove redundant pm_cap Alex Williamson
2025-02-21 6:12 ` Duan, Zhenzhong
2025-02-22 6:00 ` Cédric Le Goater
2025-02-24 1:45 ` Duan, Zhenzhong
2025-02-24 18:40 ` Eric Auger
2025-02-20 22:48 ` [PATCH 5/5] hw/vfio/pci: Re-order pre-reset Alex Williamson
2025-02-24 20:16 ` Eric Auger
2025-02-20 22:54 ` [PATCH 0/5] PCI: Implement basic PCI PM capability backing Michael S. Tsirkin
2025-02-24 8:21 ` Cédric Le Goater
2025-02-24 1:43 ` Duan, Zhenzhong
2025-02-24 8:14 ` Cédric Le Goater
2025-02-24 15:09 ` Alex Williamson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250220224918.2520417-2-alex.williamson@redhat.com \
--to=alex.williamson@redhat.com \
--cc=clg@redhat.com \
--cc=eric.auger.pro@gmail.com \
--cc=eric.auger@redhat.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=mst@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=zhenzhong.duan@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).