linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD
@ 2025-07-10  1:16 David E. Box
  2025-07-10  2:17 ` Kenneth R. Crudup
  2025-07-10 19:53 ` Rafael J. Wysocki
  0 siblings, 2 replies; 8+ messages in thread
From: David E. Box @ 2025-07-10  1:16 UTC (permalink / raw)
  To: rafael, bhelgaas, andrea.righi, vicamo.yang, kenny, nirmal.patel
  Cc: David E. Box, linux-pm, linux-pci, ilpo.jarvinen, linux-kernel

Devices behind Intel's Volume Management Device (VMD) controller reside on
a synthetic PCI hierarchy that is intentionally hidden from ACPI and
firmware. As such, BIOS does not configure ASPM for these devices, and the
responsibility for link power management, including ASPM and LTR, falls
entirely to the VMD driver.

However, the current PCIe ASPM code prevents ASPM configuration when the
ACPI_FADT_NO_ASPM flag is set, disallowing OS management. This leaves ASPM
permanently disabled for these devices, contrary to the platform's design
intent.

Introduce a callback mechanism that allows the VMD driver to enable ASPM
for its domain, bypassing the global ACPI_FADT_NO_ASPM restriction that is
not applicable in this context. This ensures that devices behind VMD can
benefit from ASPM savings as originally intended.

Link: https://lore.kernel.org/linux-pm/0b166ece-eeec-ba5d-2212-50d995611cef@panix.com
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
---
 drivers/pci/controller/vmd.c | 28 ++++++++++++++++++++++++++--
 drivers/pci/pci.h            |  8 ++++++++
 drivers/pci/pcie/aspm.c      | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index 8df064b62a2f..e685586dc18b 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -21,6 +21,8 @@
 
 #include <asm/irqdomain.h>
 
+#include "../pci.h"
+
 #define VMD_CFGBAR	0
 #define VMD_MEMBAR1	2
 #define VMD_MEMBAR2	4
@@ -730,7 +732,7 @@ static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
 }
 
 /*
- * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
+ * Enable LTR settings on devices that aren't configured by BIOS.
  */
 static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
 {
@@ -770,10 +772,27 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
 	 * PCIe r6.0, sec 5.5.4.
 	 */
 	pci_set_power_state_locked(pdev, PCI_D0);
-	pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
 	return 0;
 }
 
+static long vmd_get_link_state(struct pci_dev *pdev, void *data)
+{
+	struct pci_bus *vmd_bus = data;
+	struct pci_bus *bus = pdev->bus;
+
+	while (bus) {
+		if (bus == vmd_bus)
+			return PCIE_LINK_STATE_ALL;
+
+		if (!bus->self)
+			break;
+
+		bus = bus->self->bus;
+	}
+
+	return -ENODEV;
+}
+
 static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 {
 	struct pci_sysdata *sd = &vmd->sysdata;
@@ -785,6 +804,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 	resource_size_t membar2_offset = 0x2000;
 	struct pci_bus *child;
 	struct pci_dev *dev;
+	struct pcie_aspm_vmd_link_state vmd_link_state;
 	int ret;
 
 	/*
@@ -911,6 +931,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 		return -ENODEV;
 	}
 
+	vmd_link_state.cb = vmd_get_link_state;
+	vmd_link_state.data = vmd->bus;
+	pci_register_vmd_link_state_cb(&vmd_link_state);
+
 	vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
 				   to_pci_host_bridge(vmd->bus->bridge));
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 12215ee72afb..dcf7d39c660f 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -821,6 +821,12 @@ void pci_configure_aspm_l1ss(struct pci_dev *dev);
 void pci_save_aspm_l1ss_state(struct pci_dev *dev);
 void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
 
+
+struct pcie_aspm_vmd_link_state {
+	long (*cb)(struct pci_dev *pdev, void *data);
+	void *data;
+};
+
 #ifdef CONFIG_PCIEASPM
 void pcie_aspm_init_link_state(struct pci_dev *pdev);
 void pcie_aspm_exit_link_state(struct pci_dev *pdev);
@@ -828,6 +834,7 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
 void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 void pci_configure_ltr(struct pci_dev *pdev);
 void pci_bridge_reconfigure_ltr(struct pci_dev *pdev);
+void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
 static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
@@ -835,6 +842,7 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
 static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
 static inline void pci_configure_ltr(struct pci_dev *pdev) { }
 static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { }
+void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state) { }
 #endif
 
 #ifdef CONFIG_PCIE_ECRC
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 29fcb0689a91..c609d3c309be 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -320,6 +320,27 @@ static int policy_to_clkpm_state(struct pcie_link_state *link)
 	return 0;
 }
 
+static struct pcie_aspm_vmd_link_state vmd_state;
+
+void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state)
+{
+	mutex_lock(&aspm_lock);
+	vmd_state.cb = state->cb;
+	vmd_state.data = state->data;
+	mutex_unlock(&aspm_lock);
+}
+EXPORT_SYMBOL_GPL(pci_register_vmd_link_state_cb);
+
+static long pci_get_vmd_link_state(struct pci_dev *pdev)
+{
+	int state = -ENODEV;
+
+	if (vmd_state.cb)
+		state = vmd_state.cb(pdev, vmd_state.data);
+
+	return state;
+}
+
 static void pci_update_aspm_saved_state(struct pci_dev *dev)
 {
 	struct pci_cap_saved_state *save_state;
@@ -794,6 +815,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 	u32 parent_lnkcap, child_lnkcap;
 	u16 parent_lnkctl, child_lnkctl;
 	struct pci_bus *linkbus = parent->subordinate;
+	int vmd_aspm_default;
 
 	if (blacklist) {
 		/* Set enabled/disable so that we will disable ASPM later */
@@ -865,8 +887,20 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 		pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl);
 	}
 
+	/*
+	 * Devices behind Intel VMD operate on a synthetic PCI bus that BIOS
+	 * and ACPI do not enumerate or configure. ASPM for these devices must
+	 * be managed by the VMD driver itself, independent of global ACPI ASPM
+	 * constraints. This callback mechanism allows selective ASPM
+	 * enablement for such domains.
+	 */
+	vmd_aspm_default = pci_get_vmd_link_state(parent);
+
 	/* Save default state */
-	link->aspm_default = link->aspm_enabled;
+	if (vmd_aspm_default < 0)
+		link->aspm_default = link->aspm_enabled;
+	else
+		link->aspm_default = vmd_aspm_default;
 
 	/* Setup initial capable state. Will be updated later */
 	link->aspm_capable = link->aspm_support;

base-commit: d0b3b7b22dfa1f4b515fd3a295b3fd958f9e81af
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD
  2025-07-10  1:16 [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD David E. Box
@ 2025-07-10  2:17 ` Kenneth R. Crudup
  2025-07-10 19:53 ` Rafael J. Wysocki
  1 sibling, 0 replies; 8+ messages in thread
From: Kenneth R. Crudup @ 2025-07-10  2:17 UTC (permalink / raw)
  To: David E. Box
  Cc: rafael, bhelgaas, andrea.righi, vicamo.yang, nirmal.patel,
	linux-pm, linux-pci, ilpo.jarvinen, linux-kernel


(Resending; for some reason VGER thinks the Thunderbird MUA is "Spammy", but doesn't think as badly of "Alpine")

Testing now; so far, results look quite promising!

-K

On Wed, 9 Jul 2025, David E. Box wrote:

> Devices behind Intel's Volume Management Device (VMD) controller reside on
> a synthetic PCI hierarchy that is intentionally hidden from ACPI and
> firmware. As such, BIOS does not configure ASPM for these devices, and the
> responsibility for link power management, including ASPM and LTR, falls
> entirely to the VMD driver.
>
> However, the current PCIe ASPM code prevents ASPM configuration when the
> ACPI_FADT_NO_ASPM flag is set, disallowing OS management. This leaves ASPM
> permanently disabled for these devices, contrary to the platform's design
> intent.
>
> Introduce a callback mechanism that allows the VMD driver to enable ASPM
> for its domain, bypassing the global ACPI_FADT_NO_ASPM restriction that is
> not applicable in this context. This ensures that devices behind VMD can
> benefit from ASPM savings as originally intended.
>
> Link: https://lore.kernel.org/linux-pm/0b166ece-eeec-ba5d-2212-50d995611cef@panix.com
> Signed-off-by: David E. Box <david.e.box@linux.intel.com>
> ---
>  drivers/pci/controller/vmd.c | 28 ++++++++++++++++++++++++++--
>  drivers/pci/pci.h            |  8 ++++++++
>  drivers/pci/pcie/aspm.c      | 36 +++++++++++++++++++++++++++++++++++-
>  3 files changed, 69 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> index 8df064b62a2f..e685586dc18b 100644
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -21,6 +21,8 @@
>
>  #include <asm/irqdomain.h>
>
> +#include "../pci.h"
> +
>  #define VMD_CFGBAR	0
>  #define VMD_MEMBAR1	2
>  #define VMD_MEMBAR2	4
> @@ -730,7 +732,7 @@ static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
>  }
>
>  /*
> - * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
> + * Enable LTR settings on devices that aren't configured by BIOS.
>   */
>  static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
>  {
> @@ -770,10 +772,27 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
>  	 * PCIe r6.0, sec 5.5.4.
>  	 */
>  	pci_set_power_state_locked(pdev, PCI_D0);
> -	pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
>  	return 0;
>  }
>
> +static long vmd_get_link_state(struct pci_dev *pdev, void *data)
> +{
> +	struct pci_bus *vmd_bus = data;
> +	struct pci_bus *bus = pdev->bus;
> +
> +	while (bus) {
> +		if (bus == vmd_bus)
> +			return PCIE_LINK_STATE_ALL;
> +
> +		if (!bus->self)
> +			break;
> +
> +		bus = bus->self->bus;
> +	}
> +
> +	return -ENODEV;
> +}
> +
>  static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>  {
>  	struct pci_sysdata *sd = &vmd->sysdata;
> @@ -785,6 +804,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>  	resource_size_t membar2_offset = 0x2000;
>  	struct pci_bus *child;
>  	struct pci_dev *dev;
> +	struct pcie_aspm_vmd_link_state vmd_link_state;
>  	int ret;
>
>  	/*
> @@ -911,6 +931,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>  		return -ENODEV;
>  	}
>
> +	vmd_link_state.cb = vmd_get_link_state;
> +	vmd_link_state.data = vmd->bus;
> +	pci_register_vmd_link_state_cb(&vmd_link_state);
> +
>  	vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
>  				   to_pci_host_bridge(vmd->bus->bridge));
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 12215ee72afb..dcf7d39c660f 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -821,6 +821,12 @@ void pci_configure_aspm_l1ss(struct pci_dev *dev);
>  void pci_save_aspm_l1ss_state(struct pci_dev *dev);
>  void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
>
> +
> +struct pcie_aspm_vmd_link_state {
> +	long (*cb)(struct pci_dev *pdev, void *data);
> +	void *data;
> +};
> +
>  #ifdef CONFIG_PCIEASPM
>  void pcie_aspm_init_link_state(struct pci_dev *pdev);
>  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
> @@ -828,6 +834,7 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
>  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
>  void pci_configure_ltr(struct pci_dev *pdev);
>  void pci_bridge_reconfigure_ltr(struct pci_dev *pdev);
> +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state);
>  #else
>  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
> @@ -835,6 +842,7 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
>  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
>  static inline void pci_configure_ltr(struct pci_dev *pdev) { }
>  static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { }
> +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state) { }
>  #endif
>
>  #ifdef CONFIG_PCIE_ECRC
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index 29fcb0689a91..c609d3c309be 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -320,6 +320,27 @@ static int policy_to_clkpm_state(struct pcie_link_state *link)
>  	return 0;
>  }
>
> +static struct pcie_aspm_vmd_link_state vmd_state;
> +
> +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state)
> +{
> +	mutex_lock(&aspm_lock);
> +	vmd_state.cb = state->cb;
> +	vmd_state.data = state->data;
> +	mutex_unlock(&aspm_lock);
> +}
> +EXPORT_SYMBOL_GPL(pci_register_vmd_link_state_cb);
> +
> +static long pci_get_vmd_link_state(struct pci_dev *pdev)
> +{
> +	int state = -ENODEV;
> +
> +	if (vmd_state.cb)
> +		state = vmd_state.cb(pdev, vmd_state.data);
> +
> +	return state;
> +}
> +
>  static void pci_update_aspm_saved_state(struct pci_dev *dev)
>  {
>  	struct pci_cap_saved_state *save_state;
> @@ -794,6 +815,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
>  	u32 parent_lnkcap, child_lnkcap;
>  	u16 parent_lnkctl, child_lnkctl;
>  	struct pci_bus *linkbus = parent->subordinate;
> +	int vmd_aspm_default;
>
>  	if (blacklist) {
>  		/* Set enabled/disable so that we will disable ASPM later */
> @@ -865,8 +887,20 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
>  		pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl);
>  	}
>
> +	/*
> +	 * Devices behind Intel VMD operate on a synthetic PCI bus that BIOS
> +	 * and ACPI do not enumerate or configure. ASPM for these devices must
> +	 * be managed by the VMD driver itself, independent of global ACPI ASPM
> +	 * constraints. This callback mechanism allows selective ASPM
> +	 * enablement for such domains.
> +	 */
> +	vmd_aspm_default = pci_get_vmd_link_state(parent);
> +
>  	/* Save default state */
> -	link->aspm_default = link->aspm_enabled;
> +	if (vmd_aspm_default < 0)
> +		link->aspm_default = link->aspm_enabled;
> +	else
> +		link->aspm_default = vmd_aspm_default;
>
>  	/* Setup initial capable state. Will be updated later */
>  	link->aspm_capable = link->aspm_support;
>
> base-commit: d0b3b7b22dfa1f4b515fd3a295b3fd958f9e81af
>

-- 
Kenneth R. Crudup / Sr. SW Engineer, Scott County Consulting, Orange County CA

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD
  2025-07-10  1:16 [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD David E. Box
  2025-07-10  2:17 ` Kenneth R. Crudup
@ 2025-07-10 19:53 ` Rafael J. Wysocki
  2025-07-11 14:33   ` David Box
  1 sibling, 1 reply; 8+ messages in thread
From: Rafael J. Wysocki @ 2025-07-10 19:53 UTC (permalink / raw)
  To: David E. Box
  Cc: rafael, bhelgaas, andrea.righi, vicamo.yang, kenny, nirmal.patel,
	linux-pm, linux-pci, ilpo.jarvinen, linux-kernel

On Thu, Jul 10, 2025 at 3:16 AM David E. Box
<david.e.box@linux.intel.com> wrote:
>
> Devices behind Intel's Volume Management Device (VMD) controller reside on
> a synthetic PCI hierarchy that is intentionally hidden from ACPI and
> firmware. As such, BIOS does not configure ASPM for these devices, and the
> responsibility for link power management, including ASPM and LTR, falls
> entirely to the VMD driver.
>
> However, the current PCIe ASPM code prevents ASPM configuration when the
> ACPI_FADT_NO_ASPM flag is set, disallowing OS management. This leaves ASPM
> permanently disabled for these devices, contrary to the platform's design
> intent.
>
> Introduce a callback mechanism that allows the VMD driver to enable ASPM
> for its domain, bypassing the global ACPI_FADT_NO_ASPM restriction that is
> not applicable in this context. This ensures that devices behind VMD can
> benefit from ASPM savings as originally intended.
>
> Link: https://lore.kernel.org/linux-pm/0b166ece-eeec-ba5d-2212-50d995611cef@panix.com
> Signed-off-by: David E. Box <david.e.box@linux.intel.com>

First of all, thanks for doing this work, much appreciated!

> ---
>  drivers/pci/controller/vmd.c | 28 ++++++++++++++++++++++++++--
>  drivers/pci/pci.h            |  8 ++++++++
>  drivers/pci/pcie/aspm.c      | 36 +++++++++++++++++++++++++++++++++++-
>  3 files changed, 69 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> index 8df064b62a2f..e685586dc18b 100644
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -21,6 +21,8 @@
>
>  #include <asm/irqdomain.h>
>
> +#include "../pci.h"
> +
>  #define VMD_CFGBAR     0
>  #define VMD_MEMBAR1    2
>  #define VMD_MEMBAR2    4
> @@ -730,7 +732,7 @@ static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
>  }
>
>  /*
> - * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
> + * Enable LTR settings on devices that aren't configured by BIOS.
>   */
>  static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
>  {
> @@ -770,10 +772,27 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
>          * PCIe r6.0, sec 5.5.4.
>          */
>         pci_set_power_state_locked(pdev, PCI_D0);
> -       pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);

Do I think correctly that this doesn't work because of the
aspm_disabled check in __pci_enable_link_state()?

>         return 0;
>  }
>
> +static long vmd_get_link_state(struct pci_dev *pdev, void *data)
> +{
> +       struct pci_bus *vmd_bus = data;
> +       struct pci_bus *bus = pdev->bus;
> +
> +       while (bus) {
> +               if (bus == vmd_bus)
> +                       return PCIE_LINK_STATE_ALL;
> +
> +               if (!bus->self)
> +                       break;
> +
> +               bus = bus->self->bus;
> +       }

If I'm not mistaken, it would be sufficient to do a check like

    if (pci_dev->bus->ops == &vmd_ops)
            return PCIE_LINK_STATE_ALL;

instead of the above, or if not then why not?

> +
> +       return -ENODEV;
> +}
> +
>  static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>  {
>         struct pci_sysdata *sd = &vmd->sysdata;
> @@ -785,6 +804,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>         resource_size_t membar2_offset = 0x2000;
>         struct pci_bus *child;
>         struct pci_dev *dev;
> +       struct pcie_aspm_vmd_link_state vmd_link_state;
>         int ret;
>
>         /*
> @@ -911,6 +931,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
>                 return -ENODEV;
>         }
>
> +       vmd_link_state.cb = vmd_get_link_state;
> +       vmd_link_state.data = vmd->bus;
> +       pci_register_vmd_link_state_cb(&vmd_link_state);
> +
>         vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
>                                    to_pci_host_bridge(vmd->bus->bridge));
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 12215ee72afb..dcf7d39c660f 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -821,6 +821,12 @@ void pci_configure_aspm_l1ss(struct pci_dev *dev);
>  void pci_save_aspm_l1ss_state(struct pci_dev *dev);
>  void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
>
> +
> +struct pcie_aspm_vmd_link_state {
> +       long (*cb)(struct pci_dev *pdev, void *data);
> +       void *data;
> +};
> +
>  #ifdef CONFIG_PCIEASPM
>  void pcie_aspm_init_link_state(struct pci_dev *pdev);
>  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
> @@ -828,6 +834,7 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
>  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
>  void pci_configure_ltr(struct pci_dev *pdev);
>  void pci_bridge_reconfigure_ltr(struct pci_dev *pdev);
> +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state);
>  #else
>  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
>  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
> @@ -835,6 +842,7 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
>  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
>  static inline void pci_configure_ltr(struct pci_dev *pdev) { }
>  static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { }
> +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state) { }
>  #endif
>
>  #ifdef CONFIG_PCIE_ECRC
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index 29fcb0689a91..c609d3c309be 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -320,6 +320,27 @@ static int policy_to_clkpm_state(struct pcie_link_state *link)
>         return 0;
>  }
>
> +static struct pcie_aspm_vmd_link_state vmd_state;
> +
> +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state)
> +{
> +       mutex_lock(&aspm_lock);
> +       vmd_state.cb = state->cb;
> +       vmd_state.data = state->data;
> +       mutex_unlock(&aspm_lock);
> +}
> +EXPORT_SYMBOL_GPL(pci_register_vmd_link_state_cb);
> +
> +static long pci_get_vmd_link_state(struct pci_dev *pdev)
> +{
> +       int state = -ENODEV;
> +
> +       if (vmd_state.cb)
> +               state = vmd_state.cb(pdev, vmd_state.data);
> +
> +       return state;
> +}
> +
>  static void pci_update_aspm_saved_state(struct pci_dev *dev)
>  {
>         struct pci_cap_saved_state *save_state;
> @@ -794,6 +815,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
>         u32 parent_lnkcap, child_lnkcap;
>         u16 parent_lnkctl, child_lnkctl;
>         struct pci_bus *linkbus = parent->subordinate;
> +       int vmd_aspm_default;
>
>         if (blacklist) {
>                 /* Set enabled/disable so that we will disable ASPM later */
> @@ -865,8 +887,20 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
>                 pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl);
>         }
>
> +       /*
> +        * Devices behind Intel VMD operate on a synthetic PCI bus that BIOS
> +        * and ACPI do not enumerate or configure. ASPM for these devices must
> +        * be managed by the VMD driver itself, independent of global ACPI ASPM
> +        * constraints. This callback mechanism allows selective ASPM
> +        * enablement for such domains.
> +        */
> +       vmd_aspm_default = pci_get_vmd_link_state(parent);
> +
>         /* Save default state */
> -       link->aspm_default = link->aspm_enabled;
> +       if (vmd_aspm_default < 0)
> +               link->aspm_default = link->aspm_enabled;
> +       else
> +               link->aspm_default = vmd_aspm_default;

Well, this is not nice.

First off, it adds VMD-specific stuff to otherwise generic ASPM code.
Second, it doesn't actually do anything about the aspm_disabled checks
all over the place, so they will still trigger even though ASPM will
be effectively enabled for devices on the VMD bus.

>
>         /* Setup initial capable state. Will be updated later */
>         link->aspm_capable = link->aspm_support;
>
> base-commit: d0b3b7b22dfa1f4b515fd3a295b3fd958f9e81af
> --

It appears to me that the underlying problem is that aspm_disabled is
global and it is set during PCI root bus creation in
acpi_pci_root_add().  Thus it affects all of the PCI buses even though
the BIOS says that it wants to control ASPM for this particular PCIe
hierarchy.  If there were another PCI root enumerated by ACPI where
the OS would be allowed to control ASPM, it would not work just like
the VMD case.

To me, this suggests an approach based on moving the "ASPM disabled
because the BIOS wants to control it" setting to pci_bus_flags_t and
setting it on a per-hierarchy basis.  Since the VMD bus is a separate
PCIe hierarchy from the kernel perspective, it will not have this flag
set and the OS should be able to configure ASPM for devices on that
bus.

Do I think correctly or am I overlooking something here?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD
  2025-07-10 19:53 ` Rafael J. Wysocki
@ 2025-07-11 14:33   ` David Box
  2025-07-11 14:49     ` Ilpo Järvinen
  2025-07-13  4:47     ` David Box
  0 siblings, 2 replies; 8+ messages in thread
From: David Box @ 2025-07-11 14:33 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: bhelgaas, andrea.righi, vicamo.yang, kenny, nirmal.patel,
	linux-pm, linux-pci, ilpo.jarvinen, linux-kernel

Hey Rafael,

On Thu, Jul 10, 2025 at 09:53:18PM +0200, Rafael J. Wysocki wrote:
> On Thu, Jul 10, 2025 at 3:16 AM David E. Box
> <david.e.box@linux.intel.com> wrote:
> >
> > Devices behind Intel's Volume Management Device (VMD) controller reside on
> > a synthetic PCI hierarchy that is intentionally hidden from ACPI and
> > firmware. As such, BIOS does not configure ASPM for these devices, and the
> > responsibility for link power management, including ASPM and LTR, falls
> > entirely to the VMD driver.
> >
> > However, the current PCIe ASPM code prevents ASPM configuration when the
> > ACPI_FADT_NO_ASPM flag is set, disallowing OS management. This leaves ASPM
> > permanently disabled for these devices, contrary to the platform's design
> > intent.
> >
> > Introduce a callback mechanism that allows the VMD driver to enable ASPM
> > for its domain, bypassing the global ACPI_FADT_NO_ASPM restriction that is
> > not applicable in this context. This ensures that devices behind VMD can
> > benefit from ASPM savings as originally intended.
> >
> > Link: https://lore.kernel.org/linux-pm/0b166ece-eeec-ba5d-2212-50d995611cef@panix.com
> > Signed-off-by: David E. Box <david.e.box@linux.intel.com>
> 
> First of all, thanks for doing this work, much appreciated!
> 
> > ---
> >  drivers/pci/controller/vmd.c | 28 ++++++++++++++++++++++++++--
> >  drivers/pci/pci.h            |  8 ++++++++
> >  drivers/pci/pcie/aspm.c      | 36 +++++++++++++++++++++++++++++++++++-
> >  3 files changed, 69 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> > index 8df064b62a2f..e685586dc18b 100644
> > --- a/drivers/pci/controller/vmd.c
> > +++ b/drivers/pci/controller/vmd.c
> > @@ -21,6 +21,8 @@
> >
> >  #include <asm/irqdomain.h>
> >
> > +#include "../pci.h"
> > +
> >  #define VMD_CFGBAR     0
> >  #define VMD_MEMBAR1    2
> >  #define VMD_MEMBAR2    4
> > @@ -730,7 +732,7 @@ static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
> >  }
> >
> >  /*
> > - * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
> > + * Enable LTR settings on devices that aren't configured by BIOS.
> >   */
> >  static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
> >  {
> > @@ -770,10 +772,27 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
> >          * PCIe r6.0, sec 5.5.4.
> >          */
> >         pci_set_power_state_locked(pdev, PCI_D0);
> > -       pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
> 
> Do I think correctly that this doesn't work because of the
> aspm_disabled check in __pci_enable_link_state()?

Yes.

> 
> >         return 0;
> >  }
> >
> > +static long vmd_get_link_state(struct pci_dev *pdev, void *data)
> > +{
> > +       struct pci_bus *vmd_bus = data;
> > +       struct pci_bus *bus = pdev->bus;
> > +
> > +       while (bus) {
> > +               if (bus == vmd_bus)
> > +                       return PCIE_LINK_STATE_ALL;
> > +
> > +               if (!bus->self)
> > +                       break;
> > +
> > +               bus = bus->self->bus;
> > +       }
> 
> If I'm not mistaken, it would be sufficient to do a check like
> 
>     if (pci_dev->bus->ops == &vmd_ops)
>             return PCIE_LINK_STATE_ALL;
> 
> instead of the above, or if not then why not?

As a replacement in the loop, that does look sufficient. I'm not sure if
you're also suggesting replacing the loop itself.

> 
> > +
> > +       return -ENODEV;
> > +}
> > +
> >  static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> >  {
> >         struct pci_sysdata *sd = &vmd->sysdata;
> > @@ -785,6 +804,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> >         resource_size_t membar2_offset = 0x2000;
> >         struct pci_bus *child;
> >         struct pci_dev *dev;
> > +       struct pcie_aspm_vmd_link_state vmd_link_state;
> >         int ret;
> >
> >         /*
> > @@ -911,6 +931,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> >                 return -ENODEV;
> >         }
> >
> > +       vmd_link_state.cb = vmd_get_link_state;
> > +       vmd_link_state.data = vmd->bus;
> > +       pci_register_vmd_link_state_cb(&vmd_link_state);
> > +
> >         vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
> >                                    to_pci_host_bridge(vmd->bus->bridge));
> >
> > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > index 12215ee72afb..dcf7d39c660f 100644
> > --- a/drivers/pci/pci.h
> > +++ b/drivers/pci/pci.h
> > @@ -821,6 +821,12 @@ void pci_configure_aspm_l1ss(struct pci_dev *dev);
> >  void pci_save_aspm_l1ss_state(struct pci_dev *dev);
> >  void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
> >
> > +
> > +struct pcie_aspm_vmd_link_state {
> > +       long (*cb)(struct pci_dev *pdev, void *data);
> > +       void *data;
> > +};
> > +
> >  #ifdef CONFIG_PCIEASPM
> >  void pcie_aspm_init_link_state(struct pci_dev *pdev);
> >  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
> > @@ -828,6 +834,7 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
> >  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
> >  void pci_configure_ltr(struct pci_dev *pdev);
> >  void pci_bridge_reconfigure_ltr(struct pci_dev *pdev);
> > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state);
> >  #else
> >  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
> >  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
> > @@ -835,6 +842,7 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
> >  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
> >  static inline void pci_configure_ltr(struct pci_dev *pdev) { }
> >  static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { }
> > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state) { }
> >  #endif
> >
> >  #ifdef CONFIG_PCIE_ECRC
> > diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> > index 29fcb0689a91..c609d3c309be 100644
> > --- a/drivers/pci/pcie/aspm.c
> > +++ b/drivers/pci/pcie/aspm.c
> > @@ -320,6 +320,27 @@ static int policy_to_clkpm_state(struct pcie_link_state *link)
> >         return 0;
> >  }
> >
> > +static struct pcie_aspm_vmd_link_state vmd_state;
> > +
> > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state)
> > +{
> > +       mutex_lock(&aspm_lock);
> > +       vmd_state.cb = state->cb;
> > +       vmd_state.data = state->data;
> > +       mutex_unlock(&aspm_lock);
> > +}
> > +EXPORT_SYMBOL_GPL(pci_register_vmd_link_state_cb);
> > +
> > +static long pci_get_vmd_link_state(struct pci_dev *pdev)
> > +{
> > +       int state = -ENODEV;
> > +
> > +       if (vmd_state.cb)
> > +               state = vmd_state.cb(pdev, vmd_state.data);
> > +
> > +       return state;
> > +}
> > +
> >  static void pci_update_aspm_saved_state(struct pci_dev *dev)
> >  {
> >         struct pci_cap_saved_state *save_state;
> > @@ -794,6 +815,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> >         u32 parent_lnkcap, child_lnkcap;
> >         u16 parent_lnkctl, child_lnkctl;
> >         struct pci_bus *linkbus = parent->subordinate;
> > +       int vmd_aspm_default;
> >
> >         if (blacklist) {
> >                 /* Set enabled/disable so that we will disable ASPM later */
> > @@ -865,8 +887,20 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> >                 pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl);
> >         }
> >
> > +       /*
> > +        * Devices behind Intel VMD operate on a synthetic PCI bus that BIOS
> > +        * and ACPI do not enumerate or configure. ASPM for these devices must
> > +        * be managed by the VMD driver itself, independent of global ACPI ASPM
> > +        * constraints. This callback mechanism allows selective ASPM
> > +        * enablement for such domains.
> > +        */
> > +       vmd_aspm_default = pci_get_vmd_link_state(parent);
> > +
> >         /* Save default state */
> > -       link->aspm_default = link->aspm_enabled;
> > +       if (vmd_aspm_default < 0)
> > +               link->aspm_default = link->aspm_enabled;
> > +       else
> > +               link->aspm_default = vmd_aspm_default;
> 
> Well, this is not nice.
> 
> First off, it adds VMD-specific stuff to otherwise generic ASPM code.
> Second, it doesn't actually do anything about the aspm_disabled checks
> all over the place, so they will still trigger even though ASPM will
> be effectively enabled for devices on the VMD bus.

I agree that it's not nice to be mixing VMD specific code here. It's a
working bad solution to come up with a working good solution :)

The reason this patch works is that the aspm_disabled checks only gate
OS-controlled ASPM configuration. They don't affect the BIOS default
values, which are what we're setting in link->aspm_default. The ASPM
code uses link->aspm_default as the fallback when ASPM is globally
disabled, which is exactly what we want for devices behind VMD where the
driver, not BIOS, effectively is the platform provider of the defaults.

I put it here using a callback value because this where the BIOS
defaults are saved for each device.

> 
> >
> >         /* Setup initial capable state. Will be updated later */
> >         link->aspm_capable = link->aspm_support;
> >
> > base-commit: d0b3b7b22dfa1f4b515fd3a295b3fd958f9e81af
> > --
> 
> It appears to me that the underlying problem is that aspm_disabled is
> global and it is set during PCI root bus creation in
> acpi_pci_root_add().  Thus it affects all of the PCI buses even though
> the BIOS says that it wants to control ASPM for this particular PCIe
> hierarchy. If there were another PCI root enumerated by ACPI where
> the OS would be allowed to control ASPM, it would not work just like
> the VMD case.

It would work in the non-VMD case because it has the BIOS default to
use. VMD does not.

> 
> To me, this suggests an approach based on moving the "ASPM disabled
> because the BIOS wants to control it" setting to pci_bus_flags_t and
> setting it on a per-hierarchy basis.  Since the VMD bus is a separate
> PCIe hierarchy from the kernel perspective, it will not have this flag
> set and the OS should be able to configure ASPM for devices on that
> bus.
> 
> Do I think correctly or am I overlooking something here?

It’s definitely along those lines. The issue is really caused by two
things:

    1. aspm_disabled global prevents OS control of ASPM
    2. For VMD, there’s no BIOS provided link->aspm_default

The solution I proposed addresses the problem by having a mechanism for
the VMD driver to supply its own defaults in place of the missing BIOS
configuration.

Using pci_bus_flags_t could work as well, but would rather just say that
aspm_disabled doesn't apply to the bus hierarchy, allowing the current
pci_enable_link_state_locked() solution to work all the time. Either
way, I'm definitely looking for a cleaner solution. The above hopefully
clarifies the situation.

David

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD
  2025-07-11 14:33   ` David Box
@ 2025-07-11 14:49     ` Ilpo Järvinen
  2025-07-11 16:06       ` David Box
  2025-07-13  4:47     ` David Box
  1 sibling, 1 reply; 8+ messages in thread
From: Ilpo Järvinen @ 2025-07-11 14:49 UTC (permalink / raw)
  To: David Box
  Cc: Rafael J. Wysocki, bhelgaas, andrea.righi, vicamo.yang, kenny,
	nirmal.patel, linux-pm, linux-pci, ilpo.jarvinen, LKML

[-- Attachment #1: Type: text/plain, Size: 12444 bytes --]

On Fri, 11 Jul 2025, David Box wrote:

> Hey Rafael,
> 
> On Thu, Jul 10, 2025 at 09:53:18PM +0200, Rafael J. Wysocki wrote:
> > On Thu, Jul 10, 2025 at 3:16 AM David E. Box
> > <david.e.box@linux.intel.com> wrote:
> > >
> > > Devices behind Intel's Volume Management Device (VMD) controller reside on
> > > a synthetic PCI hierarchy that is intentionally hidden from ACPI and
> > > firmware. As such, BIOS does not configure ASPM for these devices, and the
> > > responsibility for link power management, including ASPM and LTR, falls
> > > entirely to the VMD driver.
> > >
> > > However, the current PCIe ASPM code prevents ASPM configuration when the
> > > ACPI_FADT_NO_ASPM flag is set, disallowing OS management. This leaves ASPM
> > > permanently disabled for these devices, contrary to the platform's design
> > > intent.
> > >
> > > Introduce a callback mechanism that allows the VMD driver to enable ASPM
> > > for its domain, bypassing the global ACPI_FADT_NO_ASPM restriction that is
> > > not applicable in this context. This ensures that devices behind VMD can
> > > benefit from ASPM savings as originally intended.
> > >
> > > Link: https://lore.kernel.org/linux-pm/0b166ece-eeec-ba5d-2212-50d995611cef@panix.com
> > > Signed-off-by: David E. Box <david.e.box@linux.intel.com>
> > 
> > First of all, thanks for doing this work, much appreciated!
> > 
> > > ---
> > >  drivers/pci/controller/vmd.c | 28 ++++++++++++++++++++++++++--
> > >  drivers/pci/pci.h            |  8 ++++++++
> > >  drivers/pci/pcie/aspm.c      | 36 +++++++++++++++++++++++++++++++++++-
> > >  3 files changed, 69 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> > > index 8df064b62a2f..e685586dc18b 100644
> > > --- a/drivers/pci/controller/vmd.c
> > > +++ b/drivers/pci/controller/vmd.c
> > > @@ -21,6 +21,8 @@
> > >
> > >  #include <asm/irqdomain.h>
> > >
> > > +#include "../pci.h"
> > > +
> > >  #define VMD_CFGBAR     0
> > >  #define VMD_MEMBAR1    2
> > >  #define VMD_MEMBAR2    4
> > > @@ -730,7 +732,7 @@ static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
> > >  }
> > >
> > >  /*
> > > - * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
> > > + * Enable LTR settings on devices that aren't configured by BIOS.
> > >   */
> > >  static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
> > >  {
> > > @@ -770,10 +772,27 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
> > >          * PCIe r6.0, sec 5.5.4.
> > >          */
> > >         pci_set_power_state_locked(pdev, PCI_D0);
> > > -       pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
> > 
> > Do I think correctly that this doesn't work because of the
> > aspm_disabled check in __pci_enable_link_state()?
> 
> Yes.
> 
> > 
> > >         return 0;
> > >  }
> > >
> > > +static long vmd_get_link_state(struct pci_dev *pdev, void *data)
> > > +{
> > > +       struct pci_bus *vmd_bus = data;
> > > +       struct pci_bus *bus = pdev->bus;
> > > +
> > > +       while (bus) {
> > > +               if (bus == vmd_bus)
> > > +                       return PCIE_LINK_STATE_ALL;
> > > +
> > > +               if (!bus->self)
> > > +                       break;
> > > +
> > > +               bus = bus->self->bus;
> > > +       }
> > 
> > If I'm not mistaken, it would be sufficient to do a check like
> > 
> >     if (pci_dev->bus->ops == &vmd_ops)
> >             return PCIE_LINK_STATE_ALL;
> > 
> > instead of the above, or if not then why not?
> 
> As a replacement in the loop, that does look sufficient. I'm not sure if
> you're also suggesting replacing the loop itself.
> 
> > 
> > > +
> > > +       return -ENODEV;
> > > +}
> > > +
> > >  static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > >  {
> > >         struct pci_sysdata *sd = &vmd->sysdata;
> > > @@ -785,6 +804,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > >         resource_size_t membar2_offset = 0x2000;
> > >         struct pci_bus *child;
> > >         struct pci_dev *dev;
> > > +       struct pcie_aspm_vmd_link_state vmd_link_state;
> > >         int ret;
> > >
> > >         /*
> > > @@ -911,6 +931,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > >                 return -ENODEV;
> > >         }
> > >
> > > +       vmd_link_state.cb = vmd_get_link_state;
> > > +       vmd_link_state.data = vmd->bus;
> > > +       pci_register_vmd_link_state_cb(&vmd_link_state);
> > > +
> > >         vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
> > >                                    to_pci_host_bridge(vmd->bus->bridge));
> > >
> > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > > index 12215ee72afb..dcf7d39c660f 100644
> > > --- a/drivers/pci/pci.h
> > > +++ b/drivers/pci/pci.h
> > > @@ -821,6 +821,12 @@ void pci_configure_aspm_l1ss(struct pci_dev *dev);
> > >  void pci_save_aspm_l1ss_state(struct pci_dev *dev);
> > >  void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
> > >
> > > +
> > > +struct pcie_aspm_vmd_link_state {
> > > +       long (*cb)(struct pci_dev *pdev, void *data);
> > > +       void *data;
> > > +};
> > > +
> > >  #ifdef CONFIG_PCIEASPM
> > >  void pcie_aspm_init_link_state(struct pci_dev *pdev);
> > >  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
> > > @@ -828,6 +834,7 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
> > >  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
> > >  void pci_configure_ltr(struct pci_dev *pdev);
> > >  void pci_bridge_reconfigure_ltr(struct pci_dev *pdev);
> > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state);
> > >  #else
> > >  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
> > >  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
> > > @@ -835,6 +842,7 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
> > >  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
> > >  static inline void pci_configure_ltr(struct pci_dev *pdev) { }
> > >  static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { }
> > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state) { }
> > >  #endif
> > >
> > >  #ifdef CONFIG_PCIE_ECRC
> > > diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> > > index 29fcb0689a91..c609d3c309be 100644
> > > --- a/drivers/pci/pcie/aspm.c
> > > +++ b/drivers/pci/pcie/aspm.c
> > > @@ -320,6 +320,27 @@ static int policy_to_clkpm_state(struct pcie_link_state *link)
> > >         return 0;
> > >  }
> > >
> > > +static struct pcie_aspm_vmd_link_state vmd_state;
> > > +
> > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state)
> > > +{
> > > +       mutex_lock(&aspm_lock);
> > > +       vmd_state.cb = state->cb;
> > > +       vmd_state.data = state->data;
> > > +       mutex_unlock(&aspm_lock);
> > > +}
> > > +EXPORT_SYMBOL_GPL(pci_register_vmd_link_state_cb);
> > > +
> > > +static long pci_get_vmd_link_state(struct pci_dev *pdev)
> > > +{
> > > +       int state = -ENODEV;
> > > +
> > > +       if (vmd_state.cb)
> > > +               state = vmd_state.cb(pdev, vmd_state.data);
> > > +
> > > +       return state;
> > > +}
> > > +
> > >  static void pci_update_aspm_saved_state(struct pci_dev *dev)
> > >  {
> > >         struct pci_cap_saved_state *save_state;
> > > @@ -794,6 +815,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> > >         u32 parent_lnkcap, child_lnkcap;
> > >         u16 parent_lnkctl, child_lnkctl;
> > >         struct pci_bus *linkbus = parent->subordinate;
> > > +       int vmd_aspm_default;
> > >
> > >         if (blacklist) {
> > >                 /* Set enabled/disable so that we will disable ASPM later */
> > > @@ -865,8 +887,20 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> > >                 pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl);
> > >         }
> > >
> > > +       /*
> > > +        * Devices behind Intel VMD operate on a synthetic PCI bus that BIOS
> > > +        * and ACPI do not enumerate or configure. ASPM for these devices must
> > > +        * be managed by the VMD driver itself, independent of global ACPI ASPM
> > > +        * constraints. This callback mechanism allows selective ASPM
> > > +        * enablement for such domains.
> > > +        */
> > > +       vmd_aspm_default = pci_get_vmd_link_state(parent);
> > > +
> > >         /* Save default state */
> > > -       link->aspm_default = link->aspm_enabled;
> > > +       if (vmd_aspm_default < 0)
> > > +               link->aspm_default = link->aspm_enabled;
> > > +       else
> > > +               link->aspm_default = vmd_aspm_default;
> > 
> > Well, this is not nice.
> > 
> > First off, it adds VMD-specific stuff to otherwise generic ASPM code.
> > Second, it doesn't actually do anything about the aspm_disabled checks
> > all over the place, so they will still trigger even though ASPM will
> > be effectively enabled for devices on the VMD bus.
> 
> I agree that it's not nice to be mixing VMD specific code here. It's a
> working bad solution to come up with a working good solution :)
> 
> The reason this patch works is that the aspm_disabled checks only gate
> OS-controlled ASPM configuration. They don't affect the BIOS default
> values, which are what we're setting in link->aspm_default. The ASPM
> code uses link->aspm_default as the fallback when ASPM is globally
> disabled, which is exactly what we want for devices behind VMD where the
> driver, not BIOS, effectively is the platform provider of the defaults.

Oh, this was a big news to me.

So what you're saying is that if aspm_disabled is set, ->aspm_disable 
cannot be set and thus pcie_config_aspm_link() that is not gated by 
aspm_disabled can alter ASPM state despite OS not having ASPM control???

...That's really odd logic which the ASPM driver seems to be full of.

-- 
 i.

> I put it here using a callback value because this where the BIOS
> defaults are saved for each device.
> 
> > 
> > >
> > >         /* Setup initial capable state. Will be updated later */
> > >         link->aspm_capable = link->aspm_support;
> > >
> > > base-commit: d0b3b7b22dfa1f4b515fd3a295b3fd958f9e81af
> > > --
> > 
> > It appears to me that the underlying problem is that aspm_disabled is
> > global and it is set during PCI root bus creation in
> > acpi_pci_root_add().  Thus it affects all of the PCI buses even though
> > the BIOS says that it wants to control ASPM for this particular PCIe
> > hierarchy. If there were another PCI root enumerated by ACPI where
> > the OS would be allowed to control ASPM, it would not work just like
> > the VMD case.
> 
> It would work in the non-VMD case because it has the BIOS default to
> use. VMD does not.
> 
> > 
> > To me, this suggests an approach based on moving the "ASPM disabled
> > because the BIOS wants to control it" setting to pci_bus_flags_t and
> > setting it on a per-hierarchy basis.  Since the VMD bus is a separate
> > PCIe hierarchy from the kernel perspective, it will not have this flag
> > set and the OS should be able to configure ASPM for devices on that
> > bus.
> > 
> > Do I think correctly or am I overlooking something here?
> 
> It’s definitely along those lines. The issue is really caused by two
> things:
> 
>     1. aspm_disabled global prevents OS control of ASPM
>     2. For VMD, there’s no BIOS provided link->aspm_default
> 
> The solution I proposed addresses the problem by having a mechanism for
> the VMD driver to supply its own defaults in place of the missing BIOS
> configuration.
> 
> Using pci_bus_flags_t could work as well, but would rather just say that
> aspm_disabled doesn't apply to the bus hierarchy, allowing the current
> pci_enable_link_state_locked() solution to work all the time. Either
> way, I'm definitely looking for a cleaner solution. The above hopefully
> clarifies the situation.
> 
> David
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD
  2025-07-11 14:49     ` Ilpo Järvinen
@ 2025-07-11 16:06       ` David Box
  2025-07-11 21:19         ` Bjorn Helgaas
  0 siblings, 1 reply; 8+ messages in thread
From: David Box @ 2025-07-11 16:06 UTC (permalink / raw)
  To: Ilpo Järvinen, bhelgaas
  Cc: Rafael J. Wysocki, vicamo.yang, kenny, nirmal.patel, linux-pm,
	linux-pci, LKML

On Fri, Jul 11, 2025 at 05:49:03PM +0300, Ilpo Järvinen wrote:
> On Fri, 11 Jul 2025, David Box wrote:
> 
> > Hey Rafael,
> > 
> > On Thu, Jul 10, 2025 at 09:53:18PM +0200, Rafael J. Wysocki wrote:
> > > On Thu, Jul 10, 2025 at 3:16 AM David E. Box
> > > <david.e.box@linux.intel.com> wrote:
> > > >
> > > > Devices behind Intel's Volume Management Device (VMD) controller reside on
> > > > a synthetic PCI hierarchy that is intentionally hidden from ACPI and
> > > > firmware. As such, BIOS does not configure ASPM for these devices, and the
> > > > responsibility for link power management, including ASPM and LTR, falls
> > > > entirely to the VMD driver.
> > > >
> > > > However, the current PCIe ASPM code prevents ASPM configuration when the
> > > > ACPI_FADT_NO_ASPM flag is set, disallowing OS management. This leaves ASPM
> > > > permanently disabled for these devices, contrary to the platform's design
> > > > intent.
> > > >
> > > > Introduce a callback mechanism that allows the VMD driver to enable ASPM
> > > > for its domain, bypassing the global ACPI_FADT_NO_ASPM restriction that is
> > > > not applicable in this context. This ensures that devices behind VMD can
> > > > benefit from ASPM savings as originally intended.
> > > >
> > > > Link: https://lore.kernel.org/linux-pm/0b166ece-eeec-ba5d-2212-50d995611cef@panix.com
> > > > Signed-off-by: David E. Box <david.e.box@linux.intel.com>
> > > 
> > > First of all, thanks for doing this work, much appreciated!
> > > 
> > > > ---
> > > >  drivers/pci/controller/vmd.c | 28 ++++++++++++++++++++++++++--
> > > >  drivers/pci/pci.h            |  8 ++++++++
> > > >  drivers/pci/pcie/aspm.c      | 36 +++++++++++++++++++++++++++++++++++-
> > > >  3 files changed, 69 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> > > > index 8df064b62a2f..e685586dc18b 100644
> > > > --- a/drivers/pci/controller/vmd.c
> > > > +++ b/drivers/pci/controller/vmd.c
> > > > @@ -21,6 +21,8 @@
> > > >
> > > >  #include <asm/irqdomain.h>
> > > >
> > > > +#include "../pci.h"
> > > > +
> > > >  #define VMD_CFGBAR     0
> > > >  #define VMD_MEMBAR1    2
> > > >  #define VMD_MEMBAR2    4
> > > > @@ -730,7 +732,7 @@ static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
> > > >  }
> > > >
> > > >  /*
> > > > - * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
> > > > + * Enable LTR settings on devices that aren't configured by BIOS.
> > > >   */
> > > >  static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
> > > >  {
> > > > @@ -770,10 +772,27 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
> > > >          * PCIe r6.0, sec 5.5.4.
> > > >          */
> > > >         pci_set_power_state_locked(pdev, PCI_D0);
> > > > -       pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
> > > 
> > > Do I think correctly that this doesn't work because of the
> > > aspm_disabled check in __pci_enable_link_state()?
> > 
> > Yes.
> > 
> > > 
> > > >         return 0;
> > > >  }
> > > >
> > > > +static long vmd_get_link_state(struct pci_dev *pdev, void *data)
> > > > +{
> > > > +       struct pci_bus *vmd_bus = data;
> > > > +       struct pci_bus *bus = pdev->bus;
> > > > +
> > > > +       while (bus) {
> > > > +               if (bus == vmd_bus)
> > > > +                       return PCIE_LINK_STATE_ALL;
> > > > +
> > > > +               if (!bus->self)
> > > > +                       break;
> > > > +
> > > > +               bus = bus->self->bus;
> > > > +       }
> > > 
> > > If I'm not mistaken, it would be sufficient to do a check like
> > > 
> > >     if (pci_dev->bus->ops == &vmd_ops)
> > >             return PCIE_LINK_STATE_ALL;
> > > 
> > > instead of the above, or if not then why not?
> > 
> > As a replacement in the loop, that does look sufficient. I'm not sure if
> > you're also suggesting replacing the loop itself.
> > 
> > > 
> > > > +
> > > > +       return -ENODEV;
> > > > +}
> > > > +
> > > >  static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > > >  {
> > > >         struct pci_sysdata *sd = &vmd->sysdata;
> > > > @@ -785,6 +804,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > > >         resource_size_t membar2_offset = 0x2000;
> > > >         struct pci_bus *child;
> > > >         struct pci_dev *dev;
> > > > +       struct pcie_aspm_vmd_link_state vmd_link_state;
> > > >         int ret;
> > > >
> > > >         /*
> > > > @@ -911,6 +931,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > > >                 return -ENODEV;
> > > >         }
> > > >
> > > > +       vmd_link_state.cb = vmd_get_link_state;
> > > > +       vmd_link_state.data = vmd->bus;
> > > > +       pci_register_vmd_link_state_cb(&vmd_link_state);
> > > > +
> > > >         vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
> > > >                                    to_pci_host_bridge(vmd->bus->bridge));
> > > >
> > > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > > > index 12215ee72afb..dcf7d39c660f 100644
> > > > --- a/drivers/pci/pci.h
> > > > +++ b/drivers/pci/pci.h
> > > > @@ -821,6 +821,12 @@ void pci_configure_aspm_l1ss(struct pci_dev *dev);
> > > >  void pci_save_aspm_l1ss_state(struct pci_dev *dev);
> > > >  void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
> > > >
> > > > +
> > > > +struct pcie_aspm_vmd_link_state {
> > > > +       long (*cb)(struct pci_dev *pdev, void *data);
> > > > +       void *data;
> > > > +};
> > > > +
> > > >  #ifdef CONFIG_PCIEASPM
> > > >  void pcie_aspm_init_link_state(struct pci_dev *pdev);
> > > >  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
> > > > @@ -828,6 +834,7 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
> > > >  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
> > > >  void pci_configure_ltr(struct pci_dev *pdev);
> > > >  void pci_bridge_reconfigure_ltr(struct pci_dev *pdev);
> > > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state);
> > > >  #else
> > > >  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
> > > >  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
> > > > @@ -835,6 +842,7 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
> > > >  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
> > > >  static inline void pci_configure_ltr(struct pci_dev *pdev) { }
> > > >  static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { }
> > > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state) { }
> > > >  #endif
> > > >
> > > >  #ifdef CONFIG_PCIE_ECRC
> > > > diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> > > > index 29fcb0689a91..c609d3c309be 100644
> > > > --- a/drivers/pci/pcie/aspm.c
> > > > +++ b/drivers/pci/pcie/aspm.c
> > > > @@ -320,6 +320,27 @@ static int policy_to_clkpm_state(struct pcie_link_state *link)
> > > >         return 0;
> > > >  }
> > > >
> > > > +static struct pcie_aspm_vmd_link_state vmd_state;
> > > > +
> > > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state)
> > > > +{
> > > > +       mutex_lock(&aspm_lock);
> > > > +       vmd_state.cb = state->cb;
> > > > +       vmd_state.data = state->data;
> > > > +       mutex_unlock(&aspm_lock);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(pci_register_vmd_link_state_cb);
> > > > +
> > > > +static long pci_get_vmd_link_state(struct pci_dev *pdev)
> > > > +{
> > > > +       int state = -ENODEV;
> > > > +
> > > > +       if (vmd_state.cb)
> > > > +               state = vmd_state.cb(pdev, vmd_state.data);
> > > > +
> > > > +       return state;
> > > > +}
> > > > +
> > > >  static void pci_update_aspm_saved_state(struct pci_dev *dev)
> > > >  {
> > > >         struct pci_cap_saved_state *save_state;
> > > > @@ -794,6 +815,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> > > >         u32 parent_lnkcap, child_lnkcap;
> > > >         u16 parent_lnkctl, child_lnkctl;
> > > >         struct pci_bus *linkbus = parent->subordinate;
> > > > +       int vmd_aspm_default;
> > > >
> > > >         if (blacklist) {
> > > >                 /* Set enabled/disable so that we will disable ASPM later */
> > > > @@ -865,8 +887,20 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> > > >                 pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl);
> > > >         }
> > > >
> > > > +       /*
> > > > +        * Devices behind Intel VMD operate on a synthetic PCI bus that BIOS
> > > > +        * and ACPI do not enumerate or configure. ASPM for these devices must
> > > > +        * be managed by the VMD driver itself, independent of global ACPI ASPM
> > > > +        * constraints. This callback mechanism allows selective ASPM
> > > > +        * enablement for such domains.
> > > > +        */
> > > > +       vmd_aspm_default = pci_get_vmd_link_state(parent);
> > > > +
> > > >         /* Save default state */
> > > > -       link->aspm_default = link->aspm_enabled;
> > > > +       if (vmd_aspm_default < 0)
> > > > +               link->aspm_default = link->aspm_enabled;
> > > > +       else
> > > > +               link->aspm_default = vmd_aspm_default;
> > > 
> > > Well, this is not nice.
> > > 
> > > First off, it adds VMD-specific stuff to otherwise generic ASPM code.
> > > Second, it doesn't actually do anything about the aspm_disabled checks
> > > all over the place, so they will still trigger even though ASPM will
> > > be effectively enabled for devices on the VMD bus.
> > 
> > I agree that it's not nice to be mixing VMD specific code here. It's a
> > working bad solution to come up with a working good solution :)
> > 
> > The reason this patch works is that the aspm_disabled checks only gate
> > OS-controlled ASPM configuration. They don't affect the BIOS default
> > values, which are what we're setting in link->aspm_default. The ASPM
> > code uses link->aspm_default as the fallback when ASPM is globally
> > disabled, which is exactly what we want for devices behind VMD where the
> > driver, not BIOS, effectively is the platform provider of the defaults.
> 
> Oh, this was a big news to me.
> 
> So what you're saying is that if aspm_disabled is set, ->aspm_disable 
> cannot be set and thus pcie_config_aspm_link() that is not gated by 
> aspm_disabled can alter ASPM state despite OS not having ASPM control???

Yes, that’s correct. Bjorn can confirm, but I believe this is by design. The
aspm_disabled flag is really a bit of a misnomer. It probably should have been
called something like os_aspm_disabled. The intent as I understand it is that,
when disallowed, the OS cannot select or manage the active ASPM policy,
but it can still configure the link to match the BIOS provided policy.

In other words, ASPM isn’t fully disabled. It’s just not under OS control. The
BIOS values, reflected in link->aspm_default, remain valid and
pcie_config_aspm_link() can apply them regardless of the aspm_disabled setting.

David

> 
> ...That's really odd logic which the ASPM driver seems to be full of.
> 
> -- 
>  i.
> 
> > I put it here using a callback value because this where the BIOS
> > defaults are saved for each device.
> > 
> > > 
> > > >
> > > >         /* Setup initial capable state. Will be updated later */
> > > >         link->aspm_capable = link->aspm_support;
> > > >
> > > > base-commit: d0b3b7b22dfa1f4b515fd3a295b3fd958f9e81af
> > > > --
> > > 
> > > It appears to me that the underlying problem is that aspm_disabled is
> > > global and it is set during PCI root bus creation in
> > > acpi_pci_root_add().  Thus it affects all of the PCI buses even though
> > > the BIOS says that it wants to control ASPM for this particular PCIe
> > > hierarchy. If there were another PCI root enumerated by ACPI where
> > > the OS would be allowed to control ASPM, it would not work just like
> > > the VMD case.
> > 
> > It would work in the non-VMD case because it has the BIOS default to
> > use. VMD does not.
> > 
> > > 
> > > To me, this suggests an approach based on moving the "ASPM disabled
> > > because the BIOS wants to control it" setting to pci_bus_flags_t and
> > > setting it on a per-hierarchy basis.  Since the VMD bus is a separate
> > > PCIe hierarchy from the kernel perspective, it will not have this flag
> > > set and the OS should be able to configure ASPM for devices on that
> > > bus.
> > > 
> > > Do I think correctly or am I overlooking something here?
> > 
> > It’s definitely along those lines. The issue is really caused by two
> > things:
> > 
> >     1. aspm_disabled global prevents OS control of ASPM
> >     2. For VMD, there’s no BIOS provided link->aspm_default
> > 
> > The solution I proposed addresses the problem by having a mechanism for
> > the VMD driver to supply its own defaults in place of the missing BIOS
> > configuration.
> > 
> > Using pci_bus_flags_t could work as well, but would rather just say that
> > aspm_disabled doesn't apply to the bus hierarchy, allowing the current
> > pci_enable_link_state_locked() solution to work all the time. Either
> > way, I'm definitely looking for a cleaner solution. The above hopefully
> > clarifies the situation.
> > 
> > David
> > 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD
  2025-07-11 16:06       ` David Box
@ 2025-07-11 21:19         ` Bjorn Helgaas
  0 siblings, 0 replies; 8+ messages in thread
From: Bjorn Helgaas @ 2025-07-11 21:19 UTC (permalink / raw)
  To: David Box
  Cc: Ilpo Järvinen, bhelgaas, Rafael J. Wysocki, vicamo.yang,
	kenny, nirmal.patel, linux-pm, linux-pci, LKML

On Fri, Jul 11, 2025 at 09:06:27AM -0700, David Box wrote:
> On Fri, Jul 11, 2025 at 05:49:03PM +0300, Ilpo Järvinen wrote:
> > On Fri, 11 Jul 2025, David Box wrote:
> > > On Thu, Jul 10, 2025 at 09:53:18PM +0200, Rafael J. Wysocki wrote:
> > > > On Thu, Jul 10, 2025 at 3:16 AM David E. Box
> > > > <david.e.box@linux.intel.com> wrote:
> > > > >
> > > > > Devices behind Intel's Volume Management Device (VMD) controller reside on
> > > > > a synthetic PCI hierarchy that is intentionally hidden from ACPI and
> > > > > firmware. As such, BIOS does not configure ASPM for these devices, and the
> > > > > responsibility for link power management, including ASPM and LTR, falls
> > > > > entirely to the VMD driver.
> > > > >
> > > > > However, the current PCIe ASPM code prevents ASPM configuration when the
> > > > > ACPI_FADT_NO_ASPM flag is set, disallowing OS management. This leaves ASPM
> > > > > permanently disabled for these devices, contrary to the platform's design
> > > > > intent.
> > > > >
> > > > > Introduce a callback mechanism that allows the VMD driver to enable ASPM
> > > > > for its domain, bypassing the global ACPI_FADT_NO_ASPM restriction that is
> > > > > not applicable in this context. This ensures that devices behind VMD can
> > > > > benefit from ASPM savings as originally intended.
> ...

> > > > > +       /*
> > > > > +        * Devices behind Intel VMD operate on a synthetic PCI bus that BIOS
> > > > > +        * and ACPI do not enumerate or configure. ASPM for these devices must
> > > > > +        * be managed by the VMD driver itself, independent of global ACPI ASPM
> > > > > +        * constraints. This callback mechanism allows selective ASPM
> > > > > +        * enablement for such domains.
> > > > > +        */
> > > > > +       vmd_aspm_default = pci_get_vmd_link_state(parent);
> > > > > +
> > > > >         /* Save default state */
> > > > > -       link->aspm_default = link->aspm_enabled;
> > > > > +       if (vmd_aspm_default < 0)
> > > > > +               link->aspm_default = link->aspm_enabled;
> > > > > +       else
> > > > > +               link->aspm_default = vmd_aspm_default;
> > > > 
> > > > Well, this is not nice.
> > > > 
> > > > First off, it adds VMD-specific stuff to otherwise generic
> > > > ASPM code.  Second, it doesn't actually do anything about the
> > > > aspm_disabled checks all over the place, so they will still
> > > > trigger even though ASPM will be effectively enabled for
> > > > devices on the VMD bus.
> > > 
> > > I agree that it's not nice to be mixing VMD specific code here.
> > > It's a working bad solution to come up with a working good
> > > solution :)
> > > 
> > > The reason this patch works is that the aspm_disabled checks
> > > only gate OS-controlled ASPM configuration. They don't affect
> > > the BIOS default values, which are what we're setting in
> > > link->aspm_default. The ASPM code uses link->aspm_default as the
> > > fallback when ASPM is globally disabled, which is exactly what
> > > we want for devices behind VMD where the driver, not BIOS,
> > > effectively is the platform provider of the defaults.
> > 
> > Oh, this was a big news to me.
> > 
> > So what you're saying is that if aspm_disabled is set,
> > ->aspm_disable cannot be set and thus pcie_config_aspm_link() that
> > is not gated by aspm_disabled can alter ASPM state despite OS not
> > having ASPM control???
> 
> Yes, that’s correct. Bjorn can confirm, but I believe this is by
> design. The aspm_disabled flag is really a bit of a misnomer. It
> probably should have been called something like os_aspm_disabled.
> The intent as I understand it is that, when disallowed, the OS
> cannot select or manage the active ASPM policy, but it can still
> configure the link to match the BIOS provided policy.

Right, there's a long ugly history of this, here's the pcie_no_aspm()
comment:

  /*
   * Disabling ASPM is intended to prevent the kernel from modifying
   * existing hardware state, not to clear existing state. To that end:
   * (a) set policy to POLICY_DEFAULT in order to avoid changing state
   * (b) prevent userspace from changing policy
   */
  if (!aspm_force) {
	  aspm_policy = POLICY_DEFAULT;
	  aspm_disabled = 1;
  }

which came from 3c076351c402 ("PCI: Rework ASPM disable code").  All
the ASPM disable, force, support_enabled, policy, etc flags make this
ugly and impossible to read.  I guess renaming might help a little.

> In other words, ASPM isn’t fully disabled. It’s just not under OS
> control. The BIOS values, reflected in link->aspm_default, remain
> valid and pcie_config_aspm_link() can apply them regardless of the
> aspm_disabled setting.
> 
> > ...That's really odd logic which the ASPM driver seems to be full of.

+10

Also the ugliness of pcie_aspm_init_link_state() being called
completely out of the usual enumeration flow.  And the parallel device
hierarchy maintained in struct pcie_link_state.  And config options to
set the default policy.  Ugh.

I hope we can avoid adding VMD-specific code in aspm.c.

Bjorn

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD
  2025-07-11 14:33   ` David Box
  2025-07-11 14:49     ` Ilpo Järvinen
@ 2025-07-13  4:47     ` David Box
  1 sibling, 0 replies; 8+ messages in thread
From: David Box @ 2025-07-13  4:47 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: bhelgaas, andrea.righi, vicamo.yang, kenny, nirmal.patel,
	linux-pm, linux-pci, ilpo.jarvinen, linux-kernel

On Fri, Jul 11, 2025 at 07:33:15AM -0700, David Box wrote:
> Hey Rafael,
> 
> On Thu, Jul 10, 2025 at 09:53:18PM +0200, Rafael J. Wysocki wrote:
> > On Thu, Jul 10, 2025 at 3:16 AM David E. Box
> > <david.e.box@linux.intel.com> wrote:
> > >
> > > Devices behind Intel's Volume Management Device (VMD) controller reside on
> > > a synthetic PCI hierarchy that is intentionally hidden from ACPI and
> > > firmware. As such, BIOS does not configure ASPM for these devices, and the
> > > responsibility for link power management, including ASPM and LTR, falls
> > > entirely to the VMD driver.
> > >
> > > However, the current PCIe ASPM code prevents ASPM configuration when the
> > > ACPI_FADT_NO_ASPM flag is set, disallowing OS management. This leaves ASPM
> > > permanently disabled for these devices, contrary to the platform's design
> > > intent.
> > >
> > > Introduce a callback mechanism that allows the VMD driver to enable ASPM
> > > for its domain, bypassing the global ACPI_FADT_NO_ASPM restriction that is
> > > not applicable in this context. This ensures that devices behind VMD can
> > > benefit from ASPM savings as originally intended.
> > >
> > > Link: https://lore.kernel.org/linux-pm/0b166ece-eeec-ba5d-2212-50d995611cef@panix.com
> > > Signed-off-by: David E. Box <david.e.box@linux.intel.com>
> > 
> > First of all, thanks for doing this work, much appreciated!
> > 
> > > ---
> > >  drivers/pci/controller/vmd.c | 28 ++++++++++++++++++++++++++--
> > >  drivers/pci/pci.h            |  8 ++++++++
> > >  drivers/pci/pcie/aspm.c      | 36 +++++++++++++++++++++++++++++++++++-
> > >  3 files changed, 69 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> > > index 8df064b62a2f..e685586dc18b 100644
> > > --- a/drivers/pci/controller/vmd.c
> > > +++ b/drivers/pci/controller/vmd.c
> > > @@ -21,6 +21,8 @@
> > >
> > >  #include <asm/irqdomain.h>
> > >
> > > +#include "../pci.h"
> > > +
> > >  #define VMD_CFGBAR     0
> > >  #define VMD_MEMBAR1    2
> > >  #define VMD_MEMBAR2    4
> > > @@ -730,7 +732,7 @@ static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
> > >  }
> > >
> > >  /*
> > > - * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
> > > + * Enable LTR settings on devices that aren't configured by BIOS.
> > >   */
> > >  static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
> > >  {
> > > @@ -770,10 +772,27 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
> > >          * PCIe r6.0, sec 5.5.4.
> > >          */
> > >         pci_set_power_state_locked(pdev, PCI_D0);
> > > -       pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
> > 
> > Do I think correctly that this doesn't work because of the
> > aspm_disabled check in __pci_enable_link_state()?
> 
> Yes.
> 
> > 
> > >         return 0;
> > >  }
> > >
> > > +static long vmd_get_link_state(struct pci_dev *pdev, void *data)
> > > +{
> > > +       struct pci_bus *vmd_bus = data;
> > > +       struct pci_bus *bus = pdev->bus;
> > > +
> > > +       while (bus) {
> > > +               if (bus == vmd_bus)
> > > +                       return PCIE_LINK_STATE_ALL;
> > > +
> > > +               if (!bus->self)
> > > +                       break;
> > > +
> > > +               bus = bus->self->bus;
> > > +       }
> > 
> > If I'm not mistaken, it would be sufficient to do a check like
> > 
> >     if (pci_dev->bus->ops == &vmd_ops)
> >             return PCIE_LINK_STATE_ALL;
> > 
> > instead of the above, or if not then why not?
> 
> As a replacement in the loop, that does look sufficient. I'm not sure if
> you're also suggesting replacing the loop itself.
> 
> > 
> > > +
> > > +       return -ENODEV;
> > > +}
> > > +
> > >  static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > >  {
> > >         struct pci_sysdata *sd = &vmd->sysdata;
> > > @@ -785,6 +804,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > >         resource_size_t membar2_offset = 0x2000;
> > >         struct pci_bus *child;
> > >         struct pci_dev *dev;
> > > +       struct pcie_aspm_vmd_link_state vmd_link_state;
> > >         int ret;
> > >
> > >         /*
> > > @@ -911,6 +931,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> > >                 return -ENODEV;
> > >         }
> > >
> > > +       vmd_link_state.cb = vmd_get_link_state;
> > > +       vmd_link_state.data = vmd->bus;
> > > +       pci_register_vmd_link_state_cb(&vmd_link_state);
> > > +
> > >         vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
> > >                                    to_pci_host_bridge(vmd->bus->bridge));
> > >
> > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > > index 12215ee72afb..dcf7d39c660f 100644
> > > --- a/drivers/pci/pci.h
> > > +++ b/drivers/pci/pci.h
> > > @@ -821,6 +821,12 @@ void pci_configure_aspm_l1ss(struct pci_dev *dev);
> > >  void pci_save_aspm_l1ss_state(struct pci_dev *dev);
> > >  void pci_restore_aspm_l1ss_state(struct pci_dev *dev);
> > >
> > > +
> > > +struct pcie_aspm_vmd_link_state {
> > > +       long (*cb)(struct pci_dev *pdev, void *data);
> > > +       void *data;
> > > +};
> > > +
> > >  #ifdef CONFIG_PCIEASPM
> > >  void pcie_aspm_init_link_state(struct pci_dev *pdev);
> > >  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
> > > @@ -828,6 +834,7 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
> > >  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
> > >  void pci_configure_ltr(struct pci_dev *pdev);
> > >  void pci_bridge_reconfigure_ltr(struct pci_dev *pdev);
> > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state);
> > >  #else
> > >  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
> > >  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
> > > @@ -835,6 +842,7 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
> > >  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
> > >  static inline void pci_configure_ltr(struct pci_dev *pdev) { }
> > >  static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { }
> > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state) { }
> > >  #endif
> > >
> > >  #ifdef CONFIG_PCIE_ECRC
> > > diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> > > index 29fcb0689a91..c609d3c309be 100644
> > > --- a/drivers/pci/pcie/aspm.c
> > > +++ b/drivers/pci/pcie/aspm.c
> > > @@ -320,6 +320,27 @@ static int policy_to_clkpm_state(struct pcie_link_state *link)
> > >         return 0;
> > >  }
> > >
> > > +static struct pcie_aspm_vmd_link_state vmd_state;
> > > +
> > > +void pci_register_vmd_link_state_cb(struct pcie_aspm_vmd_link_state *state)
> > > +{
> > > +       mutex_lock(&aspm_lock);
> > > +       vmd_state.cb = state->cb;
> > > +       vmd_state.data = state->data;
> > > +       mutex_unlock(&aspm_lock);
> > > +}
> > > +EXPORT_SYMBOL_GPL(pci_register_vmd_link_state_cb);
> > > +
> > > +static long pci_get_vmd_link_state(struct pci_dev *pdev)
> > > +{
> > > +       int state = -ENODEV;
> > > +
> > > +       if (vmd_state.cb)
> > > +               state = vmd_state.cb(pdev, vmd_state.data);
> > > +
> > > +       return state;
> > > +}
> > > +
> > >  static void pci_update_aspm_saved_state(struct pci_dev *dev)
> > >  {
> > >         struct pci_cap_saved_state *save_state;
> > > @@ -794,6 +815,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> > >         u32 parent_lnkcap, child_lnkcap;
> > >         u16 parent_lnkctl, child_lnkctl;
> > >         struct pci_bus *linkbus = parent->subordinate;
> > > +       int vmd_aspm_default;
> > >
> > >         if (blacklist) {
> > >                 /* Set enabled/disable so that we will disable ASPM later */
> > > @@ -865,8 +887,20 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> > >                 pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl);
> > >         }
> > >
> > > +       /*
> > > +        * Devices behind Intel VMD operate on a synthetic PCI bus that BIOS
> > > +        * and ACPI do not enumerate or configure. ASPM for these devices must
> > > +        * be managed by the VMD driver itself, independent of global ACPI ASPM
> > > +        * constraints. This callback mechanism allows selective ASPM
> > > +        * enablement for such domains.
> > > +        */
> > > +       vmd_aspm_default = pci_get_vmd_link_state(parent);
> > > +
> > >         /* Save default state */
> > > -       link->aspm_default = link->aspm_enabled;
> > > +       if (vmd_aspm_default < 0)
> > > +               link->aspm_default = link->aspm_enabled;
> > > +       else
> > > +               link->aspm_default = vmd_aspm_default;
> > 
> > Well, this is not nice.
> > 
> > First off, it adds VMD-specific stuff to otherwise generic ASPM code.
> > Second, it doesn't actually do anything about the aspm_disabled checks
> > all over the place, so they will still trigger even though ASPM will
> > be effectively enabled for devices on the VMD bus.
> 
> I agree that it's not nice to be mixing VMD specific code here. It's a
> working bad solution to come up with a working good solution :)
> 
> The reason this patch works is that the aspm_disabled checks only gate
> OS-controlled ASPM configuration. They don't affect the BIOS default
> values, which are what we're setting in link->aspm_default. The ASPM
> code uses link->aspm_default as the fallback when ASPM is globally
> disabled, which is exactly what we want for devices behind VMD where the
> driver, not BIOS, effectively is the platform provider of the defaults.
> 
> I put it here using a callback value because this where the BIOS
> defaults are saved for each device.
> 
> > 
> > >
> > >         /* Setup initial capable state. Will be updated later */
> > >         link->aspm_capable = link->aspm_support;
> > >
> > > base-commit: d0b3b7b22dfa1f4b515fd3a295b3fd958f9e81af
> > > --
> > 
> > It appears to me that the underlying problem is that aspm_disabled is
> > global and it is set during PCI root bus creation in
> > acpi_pci_root_add().  Thus it affects all of the PCI buses even though
> > the BIOS says that it wants to control ASPM for this particular PCIe
> > hierarchy. If there were another PCI root enumerated by ACPI where
> > the OS would be allowed to control ASPM, it would not work just like
> > the VMD case.
> 
> It would work in the non-VMD case because it has the BIOS default to
> use. VMD does not.
> 
> > 
> > To me, this suggests an approach based on moving the "ASPM disabled
> > because the BIOS wants to control it" setting to pci_bus_flags_t and
> > setting it on a per-hierarchy basis.

A bus flag would address the need to bypass aspm_disabled for a given
hierarchy, but it doesn’t solve the second part of the problem, providing
the policy to apply in place of the missing BIOS defaults.

Ideally, policy should be determined within pcie_aspm_cap_init(), rather
than requiring a separate walk, which the VMD driver does now (but only
works when aspm_disabled is not set).

To support this cleanly, what if we added a pm_ops pointer to struct
pci_bus?

This pci_bus_pm_ops struct could include a method, like get_aspm_default(),
that aspm.c would query to retrieve the appropriate default policy if
present. The presence of this pm_ops interface would also indicate that
aspm_disabled should not apply to that bus, avoiding the need for an
additional flag.

The relevant logic in pcie_aspm_cap_init() would look like this:

        struct pci_bus_pm_ops *pm_ops = parent->bus->pm_ops;

        ...

        if (pm_ops && pm_ops->get_aspm_default)
                link->aspm_default = pm_ops->get_aspm_default(parent);
        else
                link->aspm_default = link->aspm_enabled;

Would this be a more acceptable direction?

David

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-07-13  4:47 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-10  1:16 [PATCH] PCI/ASPM: Allow ASPM enablement for devices behind Intel VMD David E. Box
2025-07-10  2:17 ` Kenneth R. Crudup
2025-07-10 19:53 ` Rafael J. Wysocki
2025-07-11 14:33   ` David Box
2025-07-11 14:49     ` Ilpo Järvinen
2025-07-11 16:06       ` David Box
2025-07-11 21:19         ` Bjorn Helgaas
2025-07-13  4:47     ` David Box

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).