* [PATCH 1/2] vfio/pci: Latch disable_idle_d3 per device
2026-06-11 18:25 [PATCH 0/2] vfio/pci: Latch module parameters per device Alex Williamson
@ 2026-06-11 18:25 ` Alex Williamson
2026-06-11 18:25 ` [PATCH 2/2] vfio/pci: Latch all module parameters " Alex Williamson
1 sibling, 0 replies; 5+ messages in thread
From: Alex Williamson @ 2026-06-11 18:25 UTC (permalink / raw)
To: kvm, Alex Williamson
Cc: Alex Williamson, linux-kernel, Jason Gunthorpe, Kevin Tian, kanie,
stable
When disable_idle_d3 was introduced in vfio-pci, it directly manipulated
the device power state with pci_set_power_state(). There were no
refcounts to maintain or balanced operations, we could unconditionally
bring the device to D0 and conditionally move it to D3hot. Therefore
the module parameter was made writable.
Later, in commit c61302aa48f7 ("vfio/pci: Move module parameters to
vfio_pci.c"), as part of the vfio-pci-core split, the writable aspect
of the module parameter was nullified. The parameter value could still
be changed through sysfs, but the vfio-pci driver latched the values
into vfio-pci-core globals at module init. Loading the vfio-pci module,
or unloading and reloading, with non-default or different values could
change the globals relative to existing devices bound to vfio-pci
variant drivers.
Runtime PM was introduced in commit 7ab5e10eda02 ("vfio/pci: Move the
unused device into low power state with runtime PM"), which marks the
point where power states became refcounted. PM get and put operations
need to be balanced, but the same module operations noted above can
change the global variables relative to those devices already bound to
vfio-pci variant drivers. This introduces a window where PM operations
can now become unbalanced.
To resolve this with a narrow footprint for stable backports, the
disable_idle_d3 flag is latched into the vfio_pci_core_device at the
time of initialization, such that the device always operates with a
consistent value.
Fixes: 7ab5e10eda02 ("vfio/pci: Move the unused device into low power state with runtime PM")
Cc: stable@vger.kernel.org
Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Alex Williamson <alex.williamson@nvidia.com>
---
drivers/vfio/pci/vfio_pci_core.c | 30 ++++++++++++++++++++----------
include/linux/vfio_pci_core.h | 1 +
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index a28f1e99362c..9f71eae0cc94 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -538,7 +538,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
u16 cmd;
u8 msix_pos;
- if (!disable_idle_d3) {
+ if (!vdev->disable_idle_d3) {
ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0)
return ret;
@@ -617,7 +617,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
out_disable_device:
pci_disable_device(pdev);
out_power:
- if (!disable_idle_d3)
+ if (!vdev->disable_idle_d3)
pm_runtime_put(&pdev->dev);
return ret;
}
@@ -753,7 +753,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
vfio_pci_dev_set_try_reset(vdev->vdev.dev_set);
/* Put the pm-runtime usage counter acquired during enable */
- if (!disable_idle_d3)
+ if (!vdev->disable_idle_d3)
pm_runtime_put(&pdev->dev);
}
EXPORT_SYMBOL_GPL(vfio_pci_core_disable);
@@ -2144,6 +2144,8 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
init_rwsem(&vdev->memory_lock);
xa_init(&vdev->ctx);
+ vdev->disable_idle_d3 = disable_idle_d3;
+
return 0;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_init_dev);
@@ -2239,7 +2241,7 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
dev->driver->pm = &vfio_pci_core_pm_ops;
pm_runtime_allow(dev);
- if (!disable_idle_d3)
+ if (!vdev->disable_idle_d3)
pm_runtime_put(dev);
ret = vfio_register_group_dev(&vdev->vdev);
@@ -2248,7 +2250,7 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
return 0;
out_power:
- if (!disable_idle_d3)
+ if (!vdev->disable_idle_d3)
pm_runtime_get_noresume(dev);
pm_runtime_forbid(dev);
@@ -2267,7 +2269,7 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
vfio_pci_vf_uninit(vdev);
vfio_pci_vga_uninit(vdev);
- if (!disable_idle_d3)
+ if (!vdev->disable_idle_d3)
pm_runtime_get_noresume(&vdev->pdev->dev);
pm_runtime_forbid(&vdev->pdev->dev);
@@ -2429,6 +2431,8 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
int ret;
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
+ if (cur->disable_idle_d3)
+ continue;
ret = pm_runtime_resume_and_get(&cur->pdev->dev);
if (ret)
goto unwind;
@@ -2438,8 +2442,11 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
unwind:
list_for_each_entry_continue_reverse(cur, &dev_set->device_list,
- vdev.dev_set_list)
+ vdev.dev_set_list) {
+ if (cur->disable_idle_d3)
+ continue;
pm_runtime_put(&cur->pdev->dev);
+ }
return ret;
}
@@ -2552,8 +2559,11 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
up_write(&vdev->memory_lock);
}
- list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
+ list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) {
+ if (vdev->disable_idle_d3)
+ continue;
pm_runtime_put(&vdev->pdev->dev);
+ }
err_unlock:
mutex_unlock(&dev_set->lock);
@@ -2599,7 +2609,7 @@ static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
* state. Increment the usage count for all the devices in the dev_set
* before reset and decrement the same after reset.
*/
- if (!disable_idle_d3 && vfio_pci_dev_set_pm_runtime_get(dev_set))
+ if (vfio_pci_dev_set_pm_runtime_get(dev_set))
return;
if (!pci_reset_bus(pdev))
@@ -2609,7 +2619,7 @@ static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
if (reset_done)
cur->needs_reset = false;
- if (!disable_idle_d3)
+ if (!cur->disable_idle_d3)
pm_runtime_put(&cur->pdev->dev);
}
}
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 5fc6ce4dd786..27aab3fdbb91 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -127,6 +127,7 @@ struct vfio_pci_core_device {
bool needs_pm_restore:1;
bool pm_intx_masked:1;
bool pm_runtime_engaged:1;
+ bool disable_idle_d3:1;
bool sriov_active;
struct pci_saved_state *pci_saved_state;
struct pci_saved_state *pm_save;
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/2] vfio/pci: Latch all module parameters per device
2026-06-11 18:25 [PATCH 0/2] vfio/pci: Latch module parameters per device Alex Williamson
2026-06-11 18:25 ` [PATCH 1/2] vfio/pci: Latch disable_idle_d3 " Alex Williamson
@ 2026-06-11 18:25 ` Alex Williamson
2026-06-11 18:44 ` sashiko-bot
1 sibling, 1 reply; 5+ messages in thread
From: Alex Williamson @ 2026-06-11 18:25 UTC (permalink / raw)
To: kvm, Alex Williamson
Cc: Alex Williamson, linux-kernel, Jason Gunthorpe, Kevin Tian, kanie
The vfio-pci module parameters of disable_idle_d3, nointxmask, and
disable_vga latch vfio-pci policy into vfio-pci-core globals each time
the vfio-pci module is initialized. The disable_idle_d3 parameter has
already migrated to a per-device flag in order to provide consistency
for refcounted PM operations for the lifetime of the device
registration.
Pull the remaining vfio-pci module-parameter policy out of vfio-pci-core
into per-device flags set at device initialization.
This also restores the mutable aspect of the disable_idle_d3 and
nointxmask module parameters for vfio-pci, with the caveat that the
parameters are latched into the device at probe.
A notable change for variant drivers is that their devices are no longer
affected by vfio-pci module parameters and those drivers may need to
adopt similar module parameters if any devices have a hidden dependency
on vfio-pci setting non-default policy.
Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Alex Williamson <alex.williamson@nvidia.com>
---
drivers/vfio/pci/vfio_pci.c | 30 ++++++++++++++++++++++--------
drivers/vfio/pci/vfio_pci_core.c | 26 ++++++--------------------
include/linux/vfio_pci_core.h | 4 ++--
3 files changed, 30 insertions(+), 30 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 0c771064c0b8..830369ff878d 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -125,9 +125,30 @@ static int vfio_pci_open_device(struct vfio_device *core_vdev)
return 0;
}
+static int vfio_pci_init_dev(struct vfio_device *core_vdev)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+ /*
+ * These behaviors originated in vfio-pci and moved into
+ * vfio-pci-core when the driver was split; vfio-pci remains the
+ * only driver that toggles them. Latch our module parameters per
+ * device at init time so that later parameter changes do not
+ * affect already-initialized devices.
+ */
+ vdev->nointxmask = nointxmask;
+ vdev->disable_idle_d3 = disable_idle_d3;
+#ifdef CONFIG_VFIO_PCI_VGA
+ vdev->disable_vga = disable_vga;
+#endif
+
+ return vfio_pci_core_init_dev(core_vdev);
+}
+
static const struct vfio_device_ops vfio_pci_ops = {
.name = "vfio-pci",
- .init = vfio_pci_core_init_dev,
+ .init = vfio_pci_init_dev,
.release = vfio_pci_core_release_dev,
.open_device = vfio_pci_open_device,
.close_device = vfio_pci_core_close_device,
@@ -256,13 +277,6 @@ static void __init vfio_pci_fill_ids(void)
static int __init vfio_pci_init(void)
{
int ret;
- bool is_disable_vga = true;
-
-#ifdef CONFIG_VFIO_PCI_VGA
- is_disable_vga = disable_vga;
-#endif
-
- vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3);
/* Register and scan for devices */
ret = pci_register_driver(&vfio_pci_driver);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 9f71eae0cc94..61cf1a1996b0 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -38,10 +38,6 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "core driver for VFIO based PCI devices"
-static bool nointxmask;
-static bool disable_vga;
-static bool disable_idle_d3;
-
static void vfio_pci_eventfd_rcu_free(struct rcu_head *rcu)
{
struct vfio_pci_eventfd *eventfd =
@@ -92,10 +88,10 @@ struct vfio_pci_vf_token {
int users;
};
-static inline bool vfio_vga_disabled(void)
+static inline bool vfio_vga_disabled(struct vfio_pci_core_device *vdev)
{
#ifdef CONFIG_VFIO_PCI_VGA
- return disable_vga;
+ return vdev->disable_vga;
#else
return true;
#endif
@@ -111,11 +107,12 @@ static inline bool vfio_vga_disabled(void)
*/
static unsigned int vfio_pci_set_decode(struct pci_dev *pdev, bool single_vga)
{
+ struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
struct pci_dev *tmp = NULL;
unsigned char max_busnr;
unsigned int decodes;
- if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
+ if (single_vga || !vfio_vga_disabled(vdev) || pci_is_root_bus(pdev->bus))
return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
@@ -562,7 +559,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
if (!vdev->pci_saved_state)
pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
- if (likely(!nointxmask)) {
+ if (likely(!vdev->nointxmask)) {
if (vfio_pci_nointx(pdev)) {
pci_info(pdev, "Masking broken INTx support\n");
vdev->nointx = true;
@@ -602,7 +599,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
vdev->has_dyn_msix = false;
}
- if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
+ if (!vfio_vga_disabled(vdev) && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
vfio_pci_core_map_bars(vdev);
@@ -2144,8 +2141,6 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
init_rwsem(&vdev->memory_lock);
xa_init(&vdev->ctx);
- vdev->disable_idle_d3 = disable_idle_d3;
-
return 0;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_init_dev);
@@ -2624,15 +2619,6 @@ static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
}
}
-void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga,
- bool is_disable_idle_d3)
-{
- nointxmask = is_nointxmask;
- disable_vga = is_disable_vga;
- disable_idle_d3 = is_disable_idle_d3;
-}
-EXPORT_SYMBOL_GPL(vfio_pci_core_set_params);
-
static void vfio_pci_core_cleanup(void)
{
vfio_pci_uninit_perm_bits();
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 27aab3fdbb91..74b950d73aef 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -128,6 +128,8 @@ struct vfio_pci_core_device {
bool pm_intx_masked:1;
bool pm_runtime_engaged:1;
bool disable_idle_d3:1;
+ bool nointxmask:1;
+ bool disable_vga:1;
bool sriov_active;
struct pci_saved_state *pci_saved_state;
struct pci_saved_state *pm_save;
@@ -158,8 +160,6 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
unsigned int type, unsigned int subtype,
const struct vfio_pci_regops *ops,
size_t size, u32 flags, void *data);
-void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga,
- bool is_disable_idle_d3);
void vfio_pci_core_close_device(struct vfio_device *core_vdev);
int vfio_pci_core_init_dev(struct vfio_device *core_vdev);
void vfio_pci_core_release_dev(struct vfio_device *core_vdev);
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread