* [PATCH v4] PCI: vmd: Add feature to scan BIOS enumerated devices.
@ 2026-05-22 15:08 Nirmal Patel
2026-05-22 15:42 ` sashiko-bot
2026-05-27 19:01 ` Nirmal Patel
0 siblings, 2 replies; 4+ messages in thread
From: Nirmal Patel @ 2026-05-22 15:08 UTC (permalink / raw)
To: linux-pci, nirmal.patel; +Cc: Nirmal Patel
Newer VMD with device ID 0x28c1 has unique settings compared to its
predecessor where BIOS enumerates the entire VMD device tree and
assigns respective configurations.
VMD configuration BAR0 carries over from GNR legacy VMD as the mechanism
to access the configuration space of the devices owned by VMD. The size
of this window is fixed at 256 MB, where each function consumes 4 KB and
every bus consumes 1 MB.
The shadow and scratchpad registers have been relocated from the VMD
configuration space to the VMD MMIO space in VMD BAR4/BAR5, otherwise
refers to as MEMBAR2 or MSI-X bar.
VMD MSI-X remapping enable/disable is no longer supported.
All the VMD driver code needs to do is to obtain bus hide range along
with shadow register values set by BIOS and perform a bus scan.
The patch also involves small refactoring of vmd_enable_domain function.
Signed-off-by: Nirmal Patel <nirmal.patel@linux.intel.com>
---
v4 : Updating vmd_set_msi_remapping for supported devices only.
v3 : Hard code configbar .end to 0xff same as probe.c; Adjust membar2
offset to accommodate more registers in 28C1. Remove redundant
IORESOURCE_MEM check in vmd_prepare_offsets_and_bus.
v2 : Using PCI features flag instead of devic ID and fixing corner cases
for vmd_remove_irq.
---
drivers/pci/controller/vmd.c | 174 +++++++++++++++++++++++++++--------
include/linux/pci_ids.h | 1 +
2 files changed, 136 insertions(+), 39 deletions(-)
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index d4ae250d4bc6..7c90243b5ff0 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -37,6 +37,12 @@
#define MB2_SHADOW_OFFSET 0x2000
#define MB2_SHADOW_SIZE 16
+/* DMR BAR4 register offsets */
+#define SHADOW_MEMBAR1_28C1 0x2818 /* MEMBAR1 physical address */
+#define SHADOW_MEMBAR2_28C1 0x2820 /* MEMBAR2 physical address */
+#define BASE_ID_REG_28C1 0x2840
+#define MEMBAR2_OFFSET_28C1 0x30d0
+
enum vmd_features {
/*
* Device may contain registers which hint the physical location of the
@@ -77,6 +83,15 @@ enum vmd_features {
* proper power management of the SoC.
*/
VMD_FEAT_BIOS_PM_QUIRK = (1 << 5),
+
+ /*
+ * Newer VMD with device ID 0x28c1 has unique settings compared to its
+ * predecessor where BIOS enumerates the entire VMD device tree and
+ * stores respective configurations including bus start range and
+ * shadow registers in VMD MMIO space in VMD BAR4/BAR5, otherwise refers
+ * to as MEMBAR2 or MSI-X bar.
+ */
+ VMD_FEAT_USE_BIOS_INFO = (1 << 6),
};
#define VMD_BIOS_PM_QUIRK_LTR 0x1003 /* 3145728 ns */
@@ -142,6 +157,7 @@ struct vmd_dev {
u8 first_vec;
char *name;
int instance;
+ unsigned long features;
};
static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
@@ -365,6 +381,8 @@ static int vmd_create_irq_domain(struct vmd_dev *vmd)
static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool enable)
{
u16 reg;
+ if (!!(vmd->features & VMD_FEAT_USE_BIOS_INFO))
+ return;
pci_read_config_word(vmd->dev, PCI_REG_VMCONFIG, ®);
reg = enable ? (reg & ~VMCONFIG_MSI_REMAP) :
@@ -374,6 +392,8 @@ static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool enable)
static void vmd_remove_irq_domain(struct vmd_dev *vmd)
{
+ if (!!(vmd->features & VMD_FEAT_USE_BIOS_INFO))
+ return;
/*
* Some production BIOS won't enable remapping between soft reboots.
* Ensure remapping is restored before unloading the driver.
@@ -393,7 +413,12 @@ static void __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
unsigned int devfn, int reg, int len)
{
unsigned int busnr_ecam = bus->number - vmd->busn_start;
- u32 offset = PCIE_ECAM_OFFSET(busnr_ecam, devfn, reg);
+ u32 offset;
+
+ if (!!(vmd->features & VMD_FEAT_USE_BIOS_INFO))
+ busnr_ecam = bus->number;
+
+ offset = PCIE_ECAM_OFFSET(busnr_ecam, devfn, reg);
if (offset + len >= resource_size(&vmd->dev->resource[VMD_CFGBAR]))
return NULL;
@@ -661,6 +686,46 @@ static int vmd_get_bus_number_start(struct vmd_dev *vmd)
return 0;
}
+static int vmd_get_bus_info_from_bar4(struct vmd_dev *vmd,
+ resource_size_t *offset1,
+ resource_size_t *offset2)
+{
+ u64 phys1, phys2, bar4_2840;
+ void __iomem *bar4;
+ u32 base_id;
+ u8 base_bus;
+
+
+ bar4 = pci_ioremap_bar(vmd->dev, 4);
+ if (!bar4)
+ return -ENOMEM;
+
+ /* Read shadow registers for MEMBAR1 and MEMBAR2 physical addresses. */
+ phys1 = readq(bar4 + SHADOW_MEMBAR1_28C1);
+ phys2 = readq(bar4 + SHADOW_MEMBAR2_28C1);
+
+ /*
+ * Read and set bus start number from Base ID register.
+ * 24-bit Base ID register is part of 64-bit shadowed reqid hide
+ * range register and holds segment, bus, device and function.
+ */
+ bar4_2840 = readq(bar4 + BASE_ID_REG_28C1);
+ base_id = bar4_2840 & 0xFFFFFF;
+ base_bus = base_id >> 8;
+ vmd->busn_start = base_bus;
+
+ /* Calculate offsets like vmd_get_phys_offsets() does. */
+ if (phys1)
+ *offset1 = vmd->dev->resource[VMD_MEMBAR1].start -
+ (phys1 & PCI_BASE_ADDRESS_MEM_MASK);
+ if (phys2)
+ *offset2 = vmd->dev->resource[VMD_MEMBAR2].start -
+ (phys2 & PCI_BASE_ADDRESS_MEM_MASK);
+
+ pci_iounmap(vmd->dev, bar4);
+ return 0;
+}
+
static irqreturn_t vmd_irq(int irq, void *data)
{
struct vmd_irq_list *irqs = data;
@@ -711,6 +776,53 @@ static int vmd_alloc_irqs(struct vmd_dev *vmd)
return 0;
}
+static int vmd_prepare_offsets_and_bus(struct vmd_dev *vmd,
+ unsigned long features,
+ resource_size_t *membar2_offset,
+ resource_size_t *offset1,
+ resource_size_t *offset2)
+{
+ int ret;
+
+ /*
+ * Shadow registers may exist in certain VMD device ids which allow
+ * guests to correctly assign host physical addresses to the root ports
+ * and child devices. These registers will either return the host value
+ * or 0, depending on an enable bit in the VMD device.
+ */
+ /*
+ * For certain VMD devices (i.e. 0x28C1), BIOS places device info
+ * in BAR4 shadow registers to determine the base bus number and memory
+ * offsets.
+ */
+ if (features & VMD_FEAT_USE_BIOS_INFO) {
+ *membar2_offset = MEMBAR2_OFFSET_28C1;
+ ret = vmd_get_bus_info_from_bar4(vmd, offset1, offset2);
+ if (ret)
+ return ret;
+ } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
+ *membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
+ ret = vmd_get_phys_offsets(vmd, true, offset1, offset2);
+ if (ret)
+ return ret;
+ } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) {
+ ret = vmd_get_phys_offsets(vmd, false, offset1, offset2);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Certain VMD devices may have a root port configuration option which
+ * limits the bus range to between 0-127, 128-255, or 224-255.
+ */
+ if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
+ ret = vmd_get_bus_number_start(vmd);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
/*
* Since VMD is an aperture to regular PCIe root ports, only allow it to
* control features that the OS is allowed to control on the physical PCI bus.
@@ -784,38 +896,16 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
struct pci_dev *dev;
int ret;
- /*
- * Shadow registers may exist in certain VMD device ids which allow
- * guests to correctly assign host physical addresses to the root ports
- * and child devices. These registers will either return the host value
- * or 0, depending on an enable bit in the VMD device.
- */
- if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
- membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
- ret = vmd_get_phys_offsets(vmd, true, &offset[0], &offset[1]);
- if (ret)
- return ret;
- } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) {
- ret = vmd_get_phys_offsets(vmd, false, &offset[0], &offset[1]);
- if (ret)
- return ret;
- }
-
- /*
- * Certain VMD devices may have a root port configuration option which
- * limits the bus range to between 0-127, 128-255, or 224-255
- */
- if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
- ret = vmd_get_bus_number_start(vmd);
- if (ret)
- return ret;
- }
+ ret = vmd_prepare_offsets_and_bus(vmd, features, &membar2_offset,
+ &offset[0], &offset[1]);
+ if (ret)
+ return ret;
res = &vmd->dev->resource[VMD_CFGBAR];
vmd->resources[0] = (struct resource) {
.name = "VMD CFGBAR",
.start = vmd->busn_start,
- .end = vmd->busn_start + (resource_size(res) >> 20) - 1,
+ .end = 0xff,
.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
};
@@ -868,19 +958,21 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
* acceptable because the guest is usually CPU-limited and MSI
* remapping doesn't become a performance bottleneck.
*/
- if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
- offset[0] || offset[1]) {
- ret = vmd_alloc_irqs(vmd);
- if (ret)
- return ret;
+ if (!(features & VMD_FEAT_USE_BIOS_INFO)) {
+ if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
+ offset[0] || offset[1]) {
+ ret = vmd_alloc_irqs(vmd);
+ if (ret)
+ return ret;
- vmd_set_msi_remapping(vmd, true);
+ vmd_set_msi_remapping(vmd, true);
- ret = vmd_create_irq_domain(vmd);
- if (ret)
- return ret;
- } else {
- vmd_set_msi_remapping(vmd, false);
+ ret = vmd_create_irq_domain(vmd);
+ if (ret)
+ return ret;
+ } else {
+ vmd_set_msi_remapping(vmd, false);
+ }
}
pci_add_resource(&resources, &vmd->resources[0]);
@@ -998,6 +1090,7 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
vmd->dev = dev;
vmd->sysdata.domain = PCI_DOMAIN_NR_NOT_SET;
+ vmd->features = features;
vmd->instance = ida_alloc(&vmd_instance_ida, GFP_KERNEL);
if (vmd->instance < 0)
return vmd->instance;
@@ -1114,6 +1207,9 @@ static const struct pci_device_id vmd_ids[] = {
.driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
VMD_FEAT_HAS_BUS_RESTRICTIONS |
VMD_FEAT_CAN_BYPASS_MSI_REMAP,},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_28C1),
+ .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
+ VMD_FEAT_USE_BIOS_INFO,},
{PCI_VDEVICE(INTEL, 0x467f),
.driver_data = VMD_FEATS_CLIENT,},
{PCI_VDEVICE(INTEL, 0x4c3d),
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 24cb42f66e4b..2a8ebe7df92e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2889,6 +2889,7 @@
#define PCI_DEVICE_ID_INTEL_HDA_ICH8 0x284b
#define PCI_DEVICE_ID_INTEL_ICH8_6 0x2850
#define PCI_DEVICE_ID_INTEL_VMD_28C0 0x28c0
+#define PCI_DEVICE_ID_INTEL_VMD_28C1 0x28c1
#define PCI_DEVICE_ID_INTEL_ICH9_0 0x2910
#define PCI_DEVICE_ID_INTEL_ICH9_2 0x2912
#define PCI_DEVICE_ID_INTEL_ICH9_3 0x2913
--
2.52.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v4] PCI: vmd: Add feature to scan BIOS enumerated devices.
2026-05-22 15:08 [PATCH v4] PCI: vmd: Add feature to scan BIOS enumerated devices Nirmal Patel
@ 2026-05-22 15:42 ` sashiko-bot
2026-05-26 19:43 ` Nirmal Patel
2026-05-27 19:01 ` Nirmal Patel
1 sibling, 1 reply; 4+ messages in thread
From: sashiko-bot @ 2026-05-22 15:42 UTC (permalink / raw)
To: Nirmal Patel; +Cc: linux-pci
Thank you for your contribution! Sashiko AI review found 6 potential issue(s) to consider:
New issues:
- [High] The driver unconditionally reads from fixed offsets in BAR4 without verifying that the BAR size is sufficiently large.
- [High] The PCI bus resource `.end` is unconditionally hardcoded to `0xff`, breaking legacy devices with smaller configuration BARs.
Pre-existing issues:
- [High] Allocated IRQ vectors are leaked in multiple error paths and during device teardown.
- [High] Per-CPU memory allocated for SRCU structures is leaked on error paths.
- [High] SRCU lock structures are destroyed before the IRQ handlers are unregistered, allowing a use-after-free.
- [High] Subtracting a fixed offset from the resource bounds of MEMBAR2 without verifying the actual BAR length causes an integer underflow.
--
commit 0653c66e9c93e9c62213a329c19bf93e4499969f
Author: Nirmal Patel <nirmal.patel@linux.intel.com>
Subject: PCI: vmd: Add feature to scan BIOS enumerated devices.
This commit introduces the VMD_FEAT_USE_BIOS_INFO feature flag for device
0x28c1. It parses configuration parameters, bus start ranges, and shadow
registers directly from the BIOS-populated BAR4/BAR5 MMIO space rather
than enumerating the tree dynamically.
> diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
> index d4ae250d4bc6d..7c90243b5ff0c 100644
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
[ ... ]
> @@ -661,6 +686,46 @@ static int vmd_get_bus_number_start(struct vmd_dev *vmd)
> return 0;
> }
>
> +static int vmd_get_bus_info_from_bar4(struct vmd_dev *vmd,
> + resource_size_t *offset1,
> + resource_size_t *offset2)
> +{
> + u64 phys1, phys2, bar4_2840;
> + void __iomem *bar4;
> + u32 base_id;
> + u8 base_bus;
> +
> +
> + bar4 = pci_ioremap_bar(vmd->dev, 4);
> + if (!bar4)
> + return -ENOMEM;
> +
> + /* Read shadow registers for MEMBAR1 and MEMBAR2 physical addresses. */
> + phys1 = readq(bar4 + SHADOW_MEMBAR1_28C1);
[Severity: High]
Does this code read out-of-bounds if the hardware presents a smaller BAR4?
pci_ioremap_bar() maps the available resource, but there doesn't appear to
be a check verifying that the BAR length is at least large enough to cover
the fixed offsets being read, such as 0x2840 (BASE_ID_REG_28C1).
Could a misconfigured or malicious device trigger an out-of-bounds MMIO
read here?
> + phys2 = readq(bar4 + SHADOW_MEMBAR2_28C1);
[ ... ]
> @@ -711,6 +776,53 @@ static int vmd_alloc_irqs(struct vmd_dev *vmd)
> return 0;
> }
>
> +static int vmd_prepare_offsets_and_bus(struct vmd_dev *vmd,
> + unsigned long features,
> + resource_size_t *membar2_offset,
> + resource_size_t *offset1,
> + resource_size_t *offset2)
> +{
> + int ret;
> +
> + /*
> + * Shadow registers may exist in certain VMD device ids which allow
> + * guests to correctly assign host physical addresses to the root ports
> + * and child devices. These registers will either return the host value
> + * or 0, depending on an enable bit in the VMD device.
> + */
> + /*
> + * For certain VMD devices (i.e. 0x28C1), BIOS places device info
> + * in BAR4 shadow registers to determine the base bus number and memory
> + * offsets.
> + */
> + if (features & VMD_FEAT_USE_BIOS_INFO) {
> + *membar2_offset = MEMBAR2_OFFSET_28C1;
[Severity: High]
This is a pre-existing issue, but can setting this offset cause an integer
underflow later?
In vmd_enable_domain(), the resource start is calculated as:
vmd->resources[2] = (struct resource) {
.name = "VMD MEMBAR2",
.start = res->start + membar2_offset,
.end = res->end,
...
};
If the physical MEMBAR2 length provided by the hardware is smaller than
membar2_offset, start becomes greater than end, which could cause
resource_size() to underflow and yield a massively inflated memory window.
> + ret = vmd_get_bus_info_from_bar4(vmd, offset1, offset2);
> + if (ret)
> + return ret;
> + } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
[ ... ]
> @@ -784,38 +896,16 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> struct pci_dev *dev;
> int ret;
>
> - /*
> - * Shadow registers may exist in certain VMD device ids which allow
[ ... ]
> - if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
> - ret = vmd_get_bus_number_start(vmd);
> - if (ret)
> - return ret;
> - }
> + ret = vmd_prepare_offsets_and_bus(vmd, features, &membar2_offset,
> + &offset[0], &offset[1]);
> + if (ret)
> + return ret;
>
> res = &vmd->dev->resource[VMD_CFGBAR];
> vmd->resources[0] = (struct resource) {
> .name = "VMD CFGBAR",
> .start = vmd->busn_start,
> - .end = vmd->busn_start + (resource_size(res) >> 20) - 1,
> + .end = 0xff,
[Severity: High]
Does unconditionally setting the end to 0xff cause a regression for legacy
VMD devices with smaller configuration BARs?
Older devices on client platforms might have smaller CFGBARs. Claiming buses
up to 255 might cause the PCI subsystem to erroneously assign bus numbers
that fall outside the hardware's actual ECAM capabilities.
> .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
> };
>
> @@ -868,19 +958,21 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
> * acceptable because the guest is usually CPU-limited and MSI
> * remapping doesn't become a performance bottleneck.
> */
> - if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
> - offset[0] || offset[1]) {
> - ret = vmd_alloc_irqs(vmd);
> - if (ret)
> - return ret;
> + if (!(features & VMD_FEAT_USE_BIOS_INFO)) {
> + if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
> + offset[0] || offset[1]) {
> + ret = vmd_alloc_irqs(vmd);
> + if (ret)
> + return ret;
>
> - vmd_set_msi_remapping(vmd, true);
> + vmd_set_msi_remapping(vmd, true);
>
> - ret = vmd_create_irq_domain(vmd);
> - if (ret)
> - return ret;
> + ret = vmd_create_irq_domain(vmd);
> + if (ret)
> + return ret;
[Severity: High]
This is a pre-existing issue, but are IRQ vectors leaked when errors occur
here?
The PCI subsystem guidelines mandate calling pci_free_irq_vectors() in error
paths after a successful pci_alloc_irq_vectors(). If vmd_create_irq_domain()
fails, we return directly without freeing the vectors allocated in
vmd_alloc_irqs(). This same cleanup omission appears to exist in vmd_remove()
as well, potentially leading to system IRQ capacity exhaustion.
Additionally, if vmd_alloc_irqs() fails midway through devm_request_irq(),
does it return without cleaning up the successfully initialized SRCU
structures from previous loop iterations? The per-CPU memory allocated by
init_srcu_struct() might be leaked.
[Severity: High]
Also a pre-existing issue, but does using devm_request_irq() for the
handlers cause a use-after-free during device teardown?
In vmd_remove(), the driver calls vmd_cleanup_srcu() which destroys the SRCU
structures. However, since the IRQs are managed by devres, they aren't
deregistered until after vmd_remove() returns.
If an interrupt fires in this window, vmd_irq() would call srcu_read_lock()
on an already-destroyed SRCU structure.
> + } else {
> + vmd_set_msi_remapping(vmd, false);
> + }
> }
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260522150829.5246-1-nirmal.patel@linux.intel.com?part=1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v4] PCI: vmd: Add feature to scan BIOS enumerated devices.
2026-05-22 15:42 ` sashiko-bot
@ 2026-05-26 19:43 ` Nirmal Patel
0 siblings, 0 replies; 4+ messages in thread
From: Nirmal Patel @ 2026-05-26 19:43 UTC (permalink / raw)
To: sashiko-bot; +Cc: sashiko-reviews, linux-pci
On Fri, 22 May 2026 15:42:16 +0000
sashiko-bot@kernel.org wrote:
> Thank you for your contribution! Sashiko AI review found 6 potential
> issue(s) to consider:
>
> New issues:
> - [High] The driver unconditionally reads from fixed offsets in BAR4
> without verifying that the BAR size is sufficiently large.
> - [High] The PCI bus resource `.end` is unconditionally hardcoded to
> `0xff`, breaking legacy devices with smaller configuration BARs.
>
> Pre-existing issues:
> - [High] Allocated IRQ vectors are leaked in multiple error paths and
> during device teardown.
> - [High] Per-CPU memory allocated for SRCU structures is leaked on
> error paths.
> - [High] SRCU lock structures are destroyed before the IRQ handlers
> are unregistered, allowing a use-after-free.
> - [High] Subtracting a fixed offset from the resource bounds of
> MEMBAR2 without verifying the actual BAR length causes an integer
> underflow. --
>
Pre-existing issues are not in scope of the feature in this patch
series.
> commit 0653c66e9c93e9c62213a329c19bf93e4499969f
> Author: Nirmal Patel <nirmal.patel@linux.intel.com>
> Subject: PCI: vmd: Add feature to scan BIOS enumerated devices.
>
> This commit introduces the VMD_FEAT_USE_BIOS_INFO feature flag for
> device 0x28c1. It parses configuration parameters, bus start ranges,
> and shadow registers directly from the BIOS-populated BAR4/BAR5 MMIO
> space rather than enumerating the tree dynamically.
>
> > diff --git a/drivers/pci/controller/vmd.c
> > b/drivers/pci/controller/vmd.c index d4ae250d4bc6d..7c90243b5ff0c
> > 100644 --- a/drivers/pci/controller/vmd.c
> > +++ b/drivers/pci/controller/vmd.c
> [ ... ]
> > @@ -661,6 +686,46 @@ static int vmd_get_bus_number_start(struct
> > vmd_dev *vmd) return 0;
> > }
> >
> > +static int vmd_get_bus_info_from_bar4(struct vmd_dev *vmd,
> > + resource_size_t *offset1,
> > + resource_size_t *offset2)
> > +{
> > + u64 phys1, phys2, bar4_2840;
> > + void __iomem *bar4;
> > + u32 base_id;
> > + u8 base_bus;
> > +
> > +
> > + bar4 = pci_ioremap_bar(vmd->dev, 4);
> > + if (!bar4)
> > + return -ENOMEM;
> > +
> > + /* Read shadow registers for MEMBAR1 and MEMBAR2 physical
> > addresses. */
> > + phys1 = readq(bar4 + SHADOW_MEMBAR1_28C1);
>
> [Severity: High]
> Does this code read out-of-bounds if the hardware presents a smaller
> BAR4?
>
> pci_ioremap_bar() maps the available resource, but there doesn't
> appear to be a check verifying that the BAR length is at least large
> enough to cover the fixed offsets being read, such as 0x2840
> (BASE_ID_REG_28C1).
>
> Could a misconfigured or malicious device trigger an out-of-bounds
> MMIO read here?
I dont think so. vmd_get_bus_info_from_bar4 gets called for only device
ID specific flag VMD_FEAT_USE_BIOS_INFO. BASE_ID_REG_28C is set to
0x2840 which is just 10KB of space and is much lesser than standard
membar2 resource size 2MB. Besides we are doing the same thing in
vmd_get_phys_offsets.
>
> > + phys2 = readq(bar4 + SHADOW_MEMBAR2_28C1);
> [ ... ]
> > @@ -711,6 +776,53 @@ static int vmd_alloc_irqs(struct vmd_dev *vmd)
> > return 0;
> > }
> >
> > +static int vmd_prepare_offsets_and_bus(struct vmd_dev *vmd,
> > + unsigned long features,
> > + resource_size_t
> > *membar2_offset,
> > + resource_size_t *offset1,
> > + resource_size_t *offset2)
> > +{
> > + int ret;
> > +
> > + /*
> > + * Shadow registers may exist in certain VMD device ids
> > which allow
> > + * guests to correctly assign host physical addresses to
> > the root ports
> > + * and child devices. These registers will either return
> > the host value
> > + * or 0, depending on an enable bit in the VMD device.
> > + */
> > + /*
> > + * For certain VMD devices (i.e. 0x28C1), BIOS places
> > device info
> > + * in BAR4 shadow registers to determine the base bus
> > number and memory
> > + * offsets.
> > + */
> > + if (features & VMD_FEAT_USE_BIOS_INFO) {
> > + *membar2_offset = MEMBAR2_OFFSET_28C1;
>
> [Severity: High]
> This is a pre-existing issue, but can setting this offset cause an
> integer underflow later?
>
> In vmd_enable_domain(), the resource start is calculated as:
>
> vmd->resources[2] = (struct resource) {
> .name = "VMD MEMBAR2",
> .start = res->start + membar2_offset,
> .end = res->end,
> ...
> };
>
> If the physical MEMBAR2 length provided by the hardware is smaller
> than membar2_offset, start becomes greater than end, which could cause
> resource_size() to underflow and yield a massively inflated memory
> window.
membar2_offset is 0x2000 and it is required to leave such space to
successfully map other VMD hardware registers including MSI-X. So
resource_size() will never be less than 0x2000 for any vmd device since
the minimum possible membar2 size is 2MB set in BIOS.
>
> > + ret = vmd_get_bus_info_from_bar4(vmd, offset1,
> > offset2);
> > + if (ret)
> > + return ret;
> > + } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
> [ ... ]
> > @@ -784,38 +896,16 @@ static int vmd_enable_domain(struct vmd_dev
> > *vmd, unsigned long features) struct pci_dev *dev;
> > int ret;
> >
> > - /*
> > - * Shadow registers may exist in certain VMD device ids
> > which allow
> [ ... ]
> > - if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
> > - ret = vmd_get_bus_number_start(vmd);
> > - if (ret)
> > - return ret;
> > - }
> > + ret = vmd_prepare_offsets_and_bus(vmd, features,
> > &membar2_offset,
> > + &offset[0], &offset[1]);
> > + if (ret)
> > + return ret;
> >
> > res = &vmd->dev->resource[VMD_CFGBAR];
> > vmd->resources[0] = (struct resource) {
> > .name = "VMD CFGBAR",
> > .start = vmd->busn_start,
> > - .end = vmd->busn_start + (resource_size(res) >>
> > 20) - 1,
> > + .end = 0xff,
>
> [Severity: High]
> Does unconditionally setting the end to 0xff cause a regression for
> legacy VMD devices with smaller configuration BARs?
>
> Older devices on client platforms might have smaller CFGBARs.
> Claiming buses up to 255 might cause the PCI subsystem to erroneously
> assign bus numbers that fall outside the hardware's actual ECAM
> capabilities.
If i keep existing implementation (like v1), the .end value can go
out-of-bound (i.e. 478).
Keeping just .end = (resource_size(res) >> 20) - 1 will result in a
crash when busn_start is a high value like 128 and .end is set to 32.
So like other places in pci code-base (i.e. pci/probe.c), it is better
to hard code it to 0xff.
>
> > .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
> > };
> >
> > @@ -868,19 +958,21 @@ static int vmd_enable_domain(struct vmd_dev
> > *vmd, unsigned long features)
> > * acceptable because the guest is usually CPU-limited and
> > MSI
> > * remapping doesn't become a performance bottleneck.
> > */
> > - if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
> > - offset[0] || offset[1]) {
> > - ret = vmd_alloc_irqs(vmd);
> > - if (ret)
> > - return ret;
> > + if (!(features & VMD_FEAT_USE_BIOS_INFO)) {
> > + if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
> > + offset[0] || offset[1]) {
> > + ret = vmd_alloc_irqs(vmd);
> > + if (ret)
> > + return ret;
> >
> > - vmd_set_msi_remapping(vmd, true);
> > + vmd_set_msi_remapping(vmd, true);
> >
> > - ret = vmd_create_irq_domain(vmd);
> > - if (ret)
> > - return ret;
> > + ret = vmd_create_irq_domain(vmd);
> > + if (ret)
> > + return ret;
>
> [Severity: High]
> This is a pre-existing issue, but are IRQ vectors leaked when errors
> occur here?
No changes are being done here as part of this feature patch series.
The existing issue needs to be reproduced and if needed, then to be
fixed in separate patches.
>
> The PCI subsystem guidelines mandate calling pci_free_irq_vectors()
> in error paths after a successful pci_alloc_irq_vectors(). If
> vmd_create_irq_domain() fails, we return directly without freeing the
> vectors allocated in vmd_alloc_irqs(). This same cleanup omission
> appears to exist in vmd_remove() as well, potentially leading to
> system IRQ capacity exhaustion.
>
> Additionally, if vmd_alloc_irqs() fails midway through
> devm_request_irq(), does it return without cleaning up the
> successfully initialized SRCU structures from previous loop
> iterations? The per-CPU memory allocated by init_srcu_struct() might
> be leaked.
>
> [Severity: High]
> Also a pre-existing issue, but does using devm_request_irq() for the
> handlers cause a use-after-free during device teardown?
The issue has not be reported or seen in any vmd generations.
No changes are being done here as part of this feature patch series.
The existing issue needs to be reproduced and if needed, then to be
fixed in separate patches.
>
> In vmd_remove(), the driver calls vmd_cleanup_srcu() which destroys
> the SRCU structures. However, since the IRQs are managed by devres,
> they aren't deregistered until after vmd_remove() returns.
>
> If an interrupt fires in this window, vmd_irq() would call
> srcu_read_lock() on an already-destroyed SRCU structure.
>
> > + } else {
> > + vmd_set_msi_remapping(vmd, false);
> > + }
> > }
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v4] PCI: vmd: Add feature to scan BIOS enumerated devices.
2026-05-22 15:08 [PATCH v4] PCI: vmd: Add feature to scan BIOS enumerated devices Nirmal Patel
2026-05-22 15:42 ` sashiko-bot
@ 2026-05-27 19:01 ` Nirmal Patel
1 sibling, 0 replies; 4+ messages in thread
From: Nirmal Patel @ 2026-05-27 19:01 UTC (permalink / raw)
To: linux-pci, nirmal.patel
On Fri, 22 May 2026 15:08:29 +0000
Nirmal Patel <nirmal.patel@linux.intel.com> wrote:
> Newer VMD with device ID 0x28c1 has unique settings compared to its
> predecessor where BIOS enumerates the entire VMD device tree and
> assigns respective configurations.
>
> VMD configuration BAR0 carries over from GNR legacy VMD as the
> mechanism to access the configuration space of the devices owned by
> VMD. The size of this window is fixed at 256 MB, where each function
> consumes 4 KB and every bus consumes 1 MB.
>
> The shadow and scratchpad registers have been relocated from the VMD
> configuration space to the VMD MMIO space in VMD BAR4/BAR5, otherwise
> refers to as MEMBAR2 or MSI-X bar.
>
> VMD MSI-X remapping enable/disable is no longer supported.
>
> All the VMD driver code needs to do is to obtain bus hide range along
> with shadow register values set by BIOS and perform a bus scan.
>
> The patch also involves small refactoring of vmd_enable_domain
> function.
>
> Signed-off-by: Nirmal Patel <nirmal.patel@linux.intel.com>
> ---
> v4 : Updating vmd_set_msi_remapping for supported devices only.
> v3 : Hard code configbar .end to 0xff same as probe.c; Adjust membar2
> offset to accommodate more registers in 28C1. Remove redundant
> IORESOURCE_MEM check in vmd_prepare_offsets_and_bus.
> v2 : Using PCI features flag instead of devic ID and fixing corner
> cases for vmd_remove_irq.
> ---
> drivers/pci/controller/vmd.c | 174
> +++++++++++++++++++++++++++-------- include/linux/pci_ids.h |
> 1 + 2 files changed, 136 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/pci/controller/vmd.c
> b/drivers/pci/controller/vmd.c index d4ae250d4bc6..7c90243b5ff0 100644
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -37,6 +37,12 @@
> #define MB2_SHADOW_OFFSET 0x2000
> #define MB2_SHADOW_SIZE 16
>
> +/* DMR BAR4 register offsets */
> +#define SHADOW_MEMBAR1_28C1 0x2818 /* MEMBAR1
> physical address */ +#define SHADOW_MEMBAR2_28C1
> 0x2820 /* MEMBAR2 physical address */ +#define
> BASE_ID_REG_28C1 0x2840 +#define
> MEMBAR2_OFFSET_28C1 0x30d0 +
> enum vmd_features {
> /*
> * Device may contain registers which hint the physical
> location of the @@ -77,6 +83,15 @@ enum vmd_features {
> * proper power management of the SoC.
> */
> VMD_FEAT_BIOS_PM_QUIRK = (1 << 5),
> +
> + /*
> + * Newer VMD with device ID 0x28c1 has unique settings
> compared to its
> + * predecessor where BIOS enumerates the entire VMD device
> tree and
> + * stores respective configurations including bus start
> range and
> + * shadow registers in VMD MMIO space in VMD BAR4/BAR5,
> otherwise refers
> + * to as MEMBAR2 or MSI-X bar.
> + */
> + VMD_FEAT_USE_BIOS_INFO = (1 << 6),
> };
>
> #define VMD_BIOS_PM_QUIRK_LTR 0x1003 /* 3145728 ns */
> @@ -142,6 +157,7 @@ struct vmd_dev {
> u8 first_vec;
> char *name;
> int instance;
> + unsigned long features;
> };
>
> static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
> @@ -365,6 +381,8 @@ static int vmd_create_irq_domain(struct vmd_dev
> *vmd) static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool
> enable) {
> u16 reg;
> + if (!!(vmd->features & VMD_FEAT_USE_BIOS_INFO))
> + return;
>
> pci_read_config_word(vmd->dev, PCI_REG_VMCONFIG, ®);
> reg = enable ? (reg & ~VMCONFIG_MSI_REMAP) :
> @@ -374,6 +392,8 @@ static void vmd_set_msi_remapping(struct vmd_dev
> *vmd, bool enable)
> static void vmd_remove_irq_domain(struct vmd_dev *vmd)
> {
> + if (!!(vmd->features & VMD_FEAT_USE_BIOS_INFO))
> + return;
> /*
> * Some production BIOS won't enable remapping between soft
> reboots.
> * Ensure remapping is restored before unloading the driver.
> @@ -393,7 +413,12 @@ static void __iomem *vmd_cfg_addr(struct vmd_dev
> *vmd, struct pci_bus *bus, unsigned int devfn, int reg, int len)
> {
> unsigned int busnr_ecam = bus->number - vmd->busn_start;
> - u32 offset = PCIE_ECAM_OFFSET(busnr_ecam, devfn, reg);
> + u32 offset;
> +
> + if (!!(vmd->features & VMD_FEAT_USE_BIOS_INFO))
> + busnr_ecam = bus->number;
> +
> + offset = PCIE_ECAM_OFFSET(busnr_ecam, devfn, reg);
>
> if (offset + len >=
> resource_size(&vmd->dev->resource[VMD_CFGBAR])) return NULL;
> @@ -661,6 +686,46 @@ static int vmd_get_bus_number_start(struct
> vmd_dev *vmd) return 0;
> }
>
> +static int vmd_get_bus_info_from_bar4(struct vmd_dev *vmd,
> + resource_size_t *offset1,
> + resource_size_t *offset2)
> +{
> + u64 phys1, phys2, bar4_2840;
> + void __iomem *bar4;
> + u32 base_id;
> + u8 base_bus;
> +
> +
> + bar4 = pci_ioremap_bar(vmd->dev, 4);
> + if (!bar4)
> + return -ENOMEM;
> +
> + /* Read shadow registers for MEMBAR1 and MEMBAR2 physical
> addresses. */
> + phys1 = readq(bar4 + SHADOW_MEMBAR1_28C1);
> + phys2 = readq(bar4 + SHADOW_MEMBAR2_28C1);
> +
> + /*
> + * Read and set bus start number from Base ID register.
> + * 24-bit Base ID register is part of 64-bit shadowed reqid
> hide
> + * range register and holds segment, bus, device and
> function.
> + */
> + bar4_2840 = readq(bar4 + BASE_ID_REG_28C1);
> + base_id = bar4_2840 & 0xFFFFFF;
> + base_bus = base_id >> 8;
> + vmd->busn_start = base_bus;
> +
> + /* Calculate offsets like vmd_get_phys_offsets() does. */
> + if (phys1)
> + *offset1 = vmd->dev->resource[VMD_MEMBAR1].start -
> + (phys1 & PCI_BASE_ADDRESS_MEM_MASK);
> + if (phys2)
> + *offset2 = vmd->dev->resource[VMD_MEMBAR2].start -
> + (phys2 & PCI_BASE_ADDRESS_MEM_MASK);
> +
> + pci_iounmap(vmd->dev, bar4);
> + return 0;
> +}
> +
> static irqreturn_t vmd_irq(int irq, void *data)
> {
> struct vmd_irq_list *irqs = data;
> @@ -711,6 +776,53 @@ static int vmd_alloc_irqs(struct vmd_dev *vmd)
> return 0;
> }
>
> +static int vmd_prepare_offsets_and_bus(struct vmd_dev *vmd,
> + unsigned long features,
> + resource_size_t
> *membar2_offset,
> + resource_size_t *offset1,
> + resource_size_t *offset2)
> +{
> + int ret;
> +
> + /*
> + * Shadow registers may exist in certain VMD device ids
> which allow
> + * guests to correctly assign host physical addresses to the
> root ports
> + * and child devices. These registers will either return the
> host value
> + * or 0, depending on an enable bit in the VMD device.
> + */
> + /*
> + * For certain VMD devices (i.e. 0x28C1), BIOS places device
> info
> + * in BAR4 shadow registers to determine the base bus number
> and memory
> + * offsets.
> + */
> + if (features & VMD_FEAT_USE_BIOS_INFO) {
> + *membar2_offset = MEMBAR2_OFFSET_28C1;
> + ret = vmd_get_bus_info_from_bar4(vmd, offset1,
> offset2);
> + if (ret)
> + return ret;
> + } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
> + *membar2_offset = MB2_SHADOW_OFFSET +
> MB2_SHADOW_SIZE;
> + ret = vmd_get_phys_offsets(vmd, true, offset1,
> offset2);
> + if (ret)
> + return ret;
> + } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) {
> + ret = vmd_get_phys_offsets(vmd, false, offset1,
> offset2);
> + if (ret)
> + return ret;
> + }
> +
> + /*
> + * Certain VMD devices may have a root port configuration
> option which
> + * limits the bus range to between 0-127, 128-255, or
> 224-255.
> + */
> + if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
> + ret = vmd_get_bus_number_start(vmd);
> + if (ret)
> + return ret;
> + }
> + return 0;
> +}
> +
> /*
> * Since VMD is an aperture to regular PCIe root ports, only allow
> it to
> * control features that the OS is allowed to control on the
> physical PCI bus. @@ -784,38 +896,16 @@ static int
> vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) struct
> pci_dev *dev; int ret;
>
> - /*
> - * Shadow registers may exist in certain VMD device ids
> which allow
> - * guests to correctly assign host physical addresses to the
> root ports
> - * and child devices. These registers will either return the
> host value
> - * or 0, depending on an enable bit in the VMD device.
> - */
> - if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
> - membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
> - ret = vmd_get_phys_offsets(vmd, true, &offset[0],
> &offset[1]);
> - if (ret)
> - return ret;
> - } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) {
> - ret = vmd_get_phys_offsets(vmd, false, &offset[0],
> &offset[1]);
> - if (ret)
> - return ret;
> - }
> -
> - /*
> - * Certain VMD devices may have a root port configuration
> option which
> - * limits the bus range to between 0-127, 128-255, or 224-255
> - */
> - if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
> - ret = vmd_get_bus_number_start(vmd);
> - if (ret)
> - return ret;
> - }
> + ret = vmd_prepare_offsets_and_bus(vmd, features,
> &membar2_offset,
> + &offset[0], &offset[1]);
> + if (ret)
> + return ret;
>
> res = &vmd->dev->resource[VMD_CFGBAR];
> vmd->resources[0] = (struct resource) {
> .name = "VMD CFGBAR",
> .start = vmd->busn_start,
> - .end = vmd->busn_start + (resource_size(res) >>
> 20) - 1,
> + .end = 0xff,
> .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
> };
>
> @@ -868,19 +958,21 @@ static int vmd_enable_domain(struct vmd_dev
> *vmd, unsigned long features)
> * acceptable because the guest is usually CPU-limited and
> MSI
> * remapping doesn't become a performance bottleneck.
> */
> - if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
> - offset[0] || offset[1]) {
> - ret = vmd_alloc_irqs(vmd);
> - if (ret)
> - return ret;
> + if (!(features & VMD_FEAT_USE_BIOS_INFO)) {
> + if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
> + offset[0] || offset[1]) {
> + ret = vmd_alloc_irqs(vmd);
> + if (ret)
> + return ret;
>
> - vmd_set_msi_remapping(vmd, true);
> + vmd_set_msi_remapping(vmd, true);
>
> - ret = vmd_create_irq_domain(vmd);
> - if (ret)
> - return ret;
> - } else {
> - vmd_set_msi_remapping(vmd, false);
> + ret = vmd_create_irq_domain(vmd);
> + if (ret)
> + return ret;
> + } else {
> + vmd_set_msi_remapping(vmd, false);
> + }
> }
>
> pci_add_resource(&resources, &vmd->resources[0]);
> @@ -998,6 +1090,7 @@ static int vmd_probe(struct pci_dev *dev, const
> struct pci_device_id *id)
> vmd->dev = dev;
> vmd->sysdata.domain = PCI_DOMAIN_NR_NOT_SET;
> + vmd->features = features;
> vmd->instance = ida_alloc(&vmd_instance_ida, GFP_KERNEL);
> if (vmd->instance < 0)
> return vmd->instance;
> @@ -1114,6 +1207,9 @@ static const struct pci_device_id vmd_ids[] = {
> .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
> VMD_FEAT_HAS_BUS_RESTRICTIONS |
> VMD_FEAT_CAN_BYPASS_MSI_REMAP,},
> + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_28C1),
> + .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
> + VMD_FEAT_USE_BIOS_INFO,},
> {PCI_VDEVICE(INTEL, 0x467f),
> .driver_data = VMD_FEATS_CLIENT,},
> {PCI_VDEVICE(INTEL, 0x4c3d),
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index 24cb42f66e4b..2a8ebe7df92e 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -2889,6 +2889,7 @@
> #define PCI_DEVICE_ID_INTEL_HDA_ICH8 0x284b
> #define PCI_DEVICE_ID_INTEL_ICH8_6 0x2850
> #define PCI_DEVICE_ID_INTEL_VMD_28C0 0x28c0
> +#define PCI_DEVICE_ID_INTEL_VMD_28C1 0x28c1
> #define PCI_DEVICE_ID_INTEL_ICH9_0 0x2910
> #define PCI_DEVICE_ID_INTEL_ICH9_2 0x2912
> #define PCI_DEVICE_ID_INTEL_ICH9_3 0x2913
Gentle reminder. FYI, I also addressed comments from AI review.
Thanks.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-05-27 19:01 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-22 15:08 [PATCH v4] PCI: vmd: Add feature to scan BIOS enumerated devices Nirmal Patel
2026-05-22 15:42 ` sashiko-bot
2026-05-26 19:43 ` Nirmal Patel
2026-05-27 19:01 ` Nirmal Patel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox