All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/5] (v)PCI: extended capability handling
@ 2026-02-10 10:51 Jan Beulich
  2026-02-10 10:52 ` [PATCH v4 1/5] x86/PCI: avoid re-evaluation of extended config space accessibility Jan Beulich
                   ` (4 more replies)
  0 siblings, 5 replies; 21+ messages in thread
From: Jan Beulich @ 2026-02-10 10:51 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Anthony PERARD,
	Michal Orzel, Roger Pau Monné, Stewart Hildebrand

This is a follow-on to 'vPCI: avoid bogus "overlap in extended cap list"
warnings', addressing further issues noted there.

v4: Three new patches and some other re-work. See individual patches.

1: x86/PCI: avoid re-evaluation of extended config space accessibility
2: vPCI: introduce private header
3: vPCI: move vpci_init_capabilities() to a separate file
4: vPCI: move capability-list init
5: vPCI: re-init extended-capabilities when MMCFG availability changed

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v4 1/5] x86/PCI: avoid re-evaluation of extended config space accessibility
  2026-02-10 10:51 [PATCH v4 0/5] (v)PCI: extended capability handling Jan Beulich
@ 2026-02-10 10:52 ` Jan Beulich
  2026-02-10 15:59   ` Stewart Hildebrand
  2026-02-10 10:53 ` [PATCH v4 2/5] vPCI: introduce private header Jan Beulich
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2026-02-10 10:52 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Roger Pau Monné, Stewart Hildebrand

When, during boot, we have already correctly determined availability of
the MMCFG access method for a given bus range, there's then no need to
invoke pci_check_extcfg() again for every of the devices. This in
particular avoids ->ext_cfg to transiently indicate the wrong state.

Switch to using Xen style on lines being touched and immediately adjacent
ones.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
---
v4: Don't bypass mcfg_ioremap(cfg, idx, 0) in pci_mmcfg_arch_disable().
v3: New.

--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -528,6 +528,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( !ret )
             ret = pci_segment_iterate(info.segment, physdev_check_pci_extcfg,
                                       &info);
+        else if ( ret > 0 ) /* Indication of "no change". */
+            ret = 0;
 
         if ( !ret && has_vpci(currd) && (info.flags & XEN_PCI_MMCFG_RESERVED) )
         {
--- a/xen/arch/x86/x86_64/mmconfig.h
+++ b/xen/arch/x86/x86_64/mmconfig.h
@@ -74,6 +74,6 @@ int pci_mmcfg_reserved(uint64_t address,
                        unsigned int flags);
 int pci_mmcfg_arch_init(void);
 int pci_mmcfg_arch_enable(unsigned int idx);
-void pci_mmcfg_arch_disable(unsigned int idx);
+int pci_mmcfg_arch_disable(unsigned int idx);
 
 #endif /* X86_64_MMCONFIG_H */
--- a/xen/arch/x86/x86_64/mmconfig-shared.c
+++ b/xen/arch/x86/x86_64/mmconfig-shared.c
@@ -388,8 +388,9 @@ static bool __init pci_mmcfg_reject_brok
                (unsigned int)cfg->start_bus_number,
                (unsigned int)cfg->end_bus_number);
 
-        if (!is_mmconf_reserved(addr, size, i, cfg) ||
-            pci_mmcfg_arch_enable(i)) {
+        if ( !is_mmconf_reserved(addr, size, i, cfg) ||
+             pci_mmcfg_arch_enable(i) < 0 )
+        {
             pci_mmcfg_arch_disable(i);
             valid = 0;
         }
@@ -417,8 +418,8 @@ void __init acpi_mmcfg_init(void)
         unsigned int i;
 
         pci_mmcfg_arch_init();
-        for (i = 0; i < pci_mmcfg_config_num; ++i)
-            if (pci_mmcfg_arch_enable(i))
+        for ( i = 0; i < pci_mmcfg_config_num; ++i )
+            if ( pci_mmcfg_arch_enable(i) < 0 )
                 valid = 0;
     } else {
         acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
@@ -458,10 +459,11 @@ int pci_mmcfg_reserved(uint64_t address,
                        segment, start_bus, end_bus, address, cfg->address);
                 return -EIO;
             }
-            if (flags & XEN_PCI_MMCFG_RESERVED)
+
+            if ( flags & XEN_PCI_MMCFG_RESERVED )
                 return pci_mmcfg_arch_enable(i);
-            pci_mmcfg_arch_disable(i);
-            return 0;
+
+            return pci_mmcfg_arch_disable(i);
         }
     }
 
--- a/xen/arch/x86/x86_64/mmconfig_64.c
+++ b/xen/arch/x86/x86_64/mmconfig_64.c
@@ -138,8 +138,9 @@ int pci_mmcfg_arch_enable(unsigned int i
     const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
     unsigned long start_mfn, end_mfn;
 
-    if (pci_mmcfg_virt[idx].virt)
-        return 0;
+    if ( pci_mmcfg_virt[idx].virt )
+        return 1;
+
     pci_mmcfg_virt[idx].virt = mcfg_ioremap(cfg, idx, PAGE_HYPERVISOR_UC);
     if (!pci_mmcfg_virt[idx].virt) {
         printk(KERN_ERR "PCI: Cannot map MCFG aperture for segment %04x\n",
@@ -160,9 +161,10 @@ int pci_mmcfg_arch_enable(unsigned int i
     return 0;
 }
 
-void pci_mmcfg_arch_disable(unsigned int idx)
+int pci_mmcfg_arch_disable(unsigned int idx)
 {
     const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
+    int ret = !pci_mmcfg_virt[idx].virt;
 
     pci_mmcfg_virt[idx].virt = NULL;
     /*
@@ -173,6 +175,8 @@ void pci_mmcfg_arch_disable(unsigned int
     mcfg_ioremap(cfg, idx, 0);
     printk(KERN_WARNING "PCI: Not using MCFG for segment %04x bus %02x-%02x\n",
            cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
+
+    return ret;
 }
 
 bool pci_mmcfg_decode(unsigned long mfn, unsigned int *seg, unsigned int *bdf)



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v4 2/5] vPCI: introduce private header
  2026-02-10 10:51 [PATCH v4 0/5] (v)PCI: extended capability handling Jan Beulich
  2026-02-10 10:52 ` [PATCH v4 1/5] x86/PCI: avoid re-evaluation of extended config space accessibility Jan Beulich
@ 2026-02-10 10:53 ` Jan Beulich
  2026-02-17 22:04   ` Stewart Hildebrand
  2026-02-18 21:58   ` Stewart Hildebrand
  2026-02-10 10:54 ` [PATCH v4 3/5] vPCI: move vpci_init_capabilities() to a separate file Jan Beulich
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 21+ messages in thread
From: Jan Beulich @ 2026-02-10 10:53 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Anthony PERARD,
	Michal Orzel, Roger Pau Monné, Stewart Hildebrand,
	Bertrand Marquis, Volodymyr Babchuk

Before adding more private stuff to xen/vpci.h, split it up. In order to
be able to include the private header first in a CU, the per-arch struct
decls also need to move (to new asm/vpci.h files).

While adjusting the test harness'es Makefile, also switch the pre-existing
header symlink-ing rule to a pattern one.

Apart from in the test harness code, things only move; no functional
change intended.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Subsequently, at least on x86 more stuff may want moving into asm/vpci.h.
---
v4: New.

--- a/tools/tests/vpci/Makefile
+++ b/tools/tests/vpci/Makefile
@@ -14,8 +14,8 @@ else
 	$(warning HOSTCC != CC, will not run test)
 endif
 
-$(TARGET): vpci.c vpci.h list.h main.c emul.h
-	$(CC) $(CFLAGS_xeninclude) -g -o $@ vpci.c main.c
+$(TARGET): vpci.c vpci.h list.h private.h main.c emul.h
+	$(CC) $(CFLAGS_xeninclude) -include emul.h -g -o $@ vpci.c main.c
 
 .PHONY: clean
 clean:
@@ -34,10 +34,10 @@ uninstall:
 	$(RM) -- $(DESTDIR)$(LIBEXEC)/tests/$(TARGET)
 
 vpci.c: $(XEN_ROOT)/xen/drivers/vpci/vpci.c
-	# Remove includes and add the test harness header
-	sed -e '/#include/d' -e '1s/^/#include "emul.h"/' <$< >$@
+	sed -e '/#include/d' <$< >$@
+
+private.h: %.h: $(XEN_ROOT)/xen/drivers/vpci/%.h
+	sed -e '/#include/d' <$< >$@
 
-list.h: $(XEN_ROOT)/xen/include/xen/list.h
-vpci.h: $(XEN_ROOT)/xen/include/xen/vpci.h
-list.h vpci.h:
+list.h vpci.h: %.h: $(XEN_ROOT)/xen/include/xen/%.h
 	sed -e '/#include/d' <$< >$@
--- a/tools/tests/vpci/emul.h
+++ b/tools/tests/vpci/emul.h
@@ -86,6 +86,7 @@ typedef union {
 
 #define CONFIG_HAS_VPCI
 #include "vpci.h"
+#include "private.h"
 
 #define __hwdom_init
 
--- a/xen/arch/arm/include/asm/pci.h
+++ b/xen/arch/arm/include/asm/pci.h
@@ -31,14 +31,6 @@ struct arch_pci_dev {
     struct device dev;
 };
 
-/* Arch-specific MSI data for vPCI. */
-struct vpci_arch_msi {
-};
-
-/* Arch-specific MSI-X entry data for vPCI. */
-struct vpci_arch_msix_entry {
-};
-
 /*
  * Because of the header cross-dependencies, e.g. we need both
  * struct pci_dev and struct arch_pci_dev at the same time, this cannot be
--- /dev/null
+++ b/xen/arch/arm/include/asm/vpci.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef ARM_VPCI_H
+#define ARM_VPCI_H
+
+/* Arch-specific MSI data for vPCI. */
+struct vpci_arch_msi {
+};
+
+/* Arch-specific MSI-X entry data for vPCI. */
+struct vpci_arch_msix_entry {
+};
+
+#endif /* ARM_VPCI_H */
--- a/xen/arch/x86/include/asm/hvm/io.h
+++ b/xen/arch/x86/include/asm/hvm/io.h
@@ -97,17 +97,6 @@ void msixtbl_init(struct domain *d);
 static inline void msixtbl_init(struct domain *d) {}
 #endif
 
-/* Arch-specific MSI data for vPCI. */
-struct vpci_arch_msi {
-    int pirq;
-    bool bound;
-};
-
-/* Arch-specific MSI-X entry data for vPCI. */
-struct vpci_arch_msix_entry {
-    int pirq;
-};
-
 void stdvga_init(struct domain *d);
 
 extern void hvm_dpci_msi_eoi(struct domain *d, int vector);
--- /dev/null
+++ b/xen/arch/x86/include/asm/vpci.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef X86_VPCI_H
+#define X86_VPCI_H
+
+#include <xen/stdbool.h>
+
+/* Arch-specific MSI data for vPCI. */
+struct vpci_arch_msi {
+    int pirq;
+    bool bound;
+};
+
+/* Arch-specific MSI-X entry data for vPCI. */
+struct vpci_arch_msix_entry {
+    int pirq;
+};
+
+#endif /* X86_VPCI_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -17,11 +17,12 @@
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "private.h"
+
 #include <xen/iocap.h>
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <xen/softirq.h>
-#include <xen/vpci.h>
 
 #include <xsm/xsm.h>
 
--- a/xen/drivers/vpci/msi.c
+++ b/xen/drivers/vpci/msi.c
@@ -16,9 +16,10 @@
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "private.h"
+
 #include <xen/sched.h>
 #include <xen/softirq.h>
-#include <xen/vpci.h>
 
 #include <asm/msi.h>
 
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -17,10 +17,11 @@
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "private.h"
+
 #include <xen/io.h>
 #include <xen/lib.h>
 #include <xen/sched.h>
-#include <xen/vpci.h>
 
 #include <asm/msi.h>
 #include <asm/p2m.h>
--- /dev/null
+++ b/xen/drivers/vpci/private.h
@@ -0,0 +1,124 @@
+#ifndef VPCI_PRIVATE_H
+#define VPCI_PRIVATE_H
+
+#include <xen/vpci.h>
+
+typedef uint32_t vpci_read_t(const struct pci_dev *pdev, unsigned int reg,
+                             void *data);
+
+typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg,
+                          uint32_t val, void *data);
+
+typedef struct {
+    unsigned int id;
+    bool is_ext;
+    int (* init)(struct pci_dev *pdev);
+    int (* cleanup)(const struct pci_dev *pdev, bool hide);
+} vpci_capability_t;
+
+#define REGISTER_VPCI_CAPABILITY(cap, name, finit, fclean, ext) \
+    static const vpci_capability_t name##_entry \
+        __used_section(".data.rel.ro.vpci") = { \
+        .id = (cap), \
+        .init = (finit), \
+        .cleanup = (fclean), \
+        .is_ext = (ext), \
+    }
+
+#define REGISTER_VPCI_CAP(name, finit, fclean) \
+    REGISTER_VPCI_CAPABILITY(PCI_CAP_ID_##name, name, finit, fclean, false)
+#define REGISTER_VPCI_EXTCAP(name, finit, fclean) \
+    REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true)
+
+/* Add/remove a register handler. */
+int __must_check vpci_add_register_mask(struct vpci *vpci,
+                                        vpci_read_t *read_handler,
+                                        vpci_write_t *write_handler,
+                                        unsigned int offset, unsigned int size,
+                                        void *data, uint32_t ro_mask,
+                                        uint32_t rw1c_mask, uint32_t rsvdp_mask,
+                                        uint32_t rsvdz_mask);
+int __must_check vpci_add_register(struct vpci *vpci,
+                                   vpci_read_t *read_handler,
+                                   vpci_write_t *write_handler,
+                                   unsigned int offset, unsigned int size,
+                                   void *data);
+
+int vpci_remove_registers(struct vpci *vpci, unsigned int start,
+                          unsigned int size);
+
+/* Helper to return the value passed in data. */
+uint32_t cf_check vpci_read_val(
+    const struct pci_dev *pdev, unsigned int reg, void *data);
+
+/* Passthrough handlers. */
+uint32_t cf_check vpci_hw_read8(
+    const struct pci_dev *pdev, unsigned int reg, void *data);
+uint32_t cf_check vpci_hw_read16(
+    const struct pci_dev *pdev, unsigned int reg, void *data);
+uint32_t cf_check vpci_hw_read32(
+    const struct pci_dev *pdev, unsigned int reg, void *data);
+void cf_check vpci_hw_write8(
+    const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
+void cf_check vpci_hw_write16(
+    const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
+
+#ifdef __XEN__
+/* Make sure there's a hole in the p2m for the MSIX mmio areas. */
+int vpci_make_msix_hole(const struct pci_dev *pdev);
+
+/*
+ * Helper functions to fetch MSIX related data. They are used by both the
+ * emulated MSIX code and the BAR handlers.
+ */
+static inline paddr_t vmsix_table_host_base(const struct vpci *vpci,
+                                            unsigned int nr)
+{
+    return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr;
+}
+
+static inline paddr_t vmsix_table_host_addr(const struct vpci *vpci,
+                                            unsigned int nr)
+{
+    return vmsix_table_host_base(vpci, nr) +
+           (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
+}
+
+static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr)
+{
+    return vpci->header.bars[vpci->msix->tables[nr] &
+                             PCI_MSIX_BIRMASK].guest_addr;
+}
+
+static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr)
+{
+    return vmsix_table_base(vpci, nr) +
+           (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
+}
+
+/*
+ * Note regarding the size calculation of the PBA: the spec mentions "The last
+ * QWORD will not necessarily be fully populated", so it implies that the PBA
+ * size is 64-bit aligned.
+ */
+static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr)
+{
+    return
+        (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE
+                                : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries,
+                                                       8), 8);
+}
+
+#endif /* __XEN__ */
+
+#endif /* VPCI_PRIVATE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- a/xen/drivers/vpci/rebar.c
+++ b/xen/drivers/vpci/rebar.c
@@ -5,8 +5,9 @@
  * Author: Jiqian Chen <Jiqian.Chen@amd.com>
  */
 
+#include "private.h"
+
 #include <xen/sched.h>
-#include <xen/vpci.h>
 
 static void cf_check rebar_ctrl_write(const struct pci_dev *pdev,
                                       unsigned int reg,
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -17,8 +17,9 @@
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "private.h"
+
 #include <xen/sched.h>
-#include <xen/vpci.h>
 #include <xen/vmap.h>
 
 /* Internal struct to store the emulated PCI registers. */
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -7,18 +7,7 @@
 #include <xen/types.h>
 #include <xen/list.h>
 
-typedef uint32_t vpci_read_t(const struct pci_dev *pdev, unsigned int reg,
-                             void *data);
-
-typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg,
-                          uint32_t val, void *data);
-
-typedef struct {
-    unsigned int id;
-    bool is_ext;
-    int (* init)(struct pci_dev *pdev);
-    int (* cleanup)(const struct pci_dev *pdev, bool hide);
-} vpci_capability_t;
+#include <asm/vpci.h>
 
 #define VPCI_ECAM_BDF(addr)     (((addr) & 0x0ffff000) >> 12)
 
@@ -30,20 +19,6 @@ typedef struct {
  */
 #define VPCI_MAX_VIRT_DEV       (PCI_SLOT(~0) + 1)
 
-#define REGISTER_VPCI_CAPABILITY(cap, name, finit, fclean, ext) \
-    static const vpci_capability_t name##_entry \
-        __used_section(".data.rel.ro.vpci") = { \
-        .id = (cap), \
-        .init = (finit), \
-        .cleanup = (fclean), \
-        .is_ext = (ext), \
-    }
-
-#define REGISTER_VPCI_CAP(name, finit, fclean) \
-    REGISTER_VPCI_CAPABILITY(PCI_CAP_ID_##name, name, finit, fclean, false)
-#define REGISTER_VPCI_EXTCAP(name, finit, fclean) \
-    REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true)
-
 int __must_check vpci_init_header(struct pci_dev *pdev);
 
 /* Assign vPCI to device by adding handlers. */
@@ -52,44 +27,11 @@ int __must_check vpci_assign_device(stru
 /* Remove all handlers and free vpci related structures. */
 void vpci_deassign_device(struct pci_dev *pdev);
 
-/* Add/remove a register handler. */
-int __must_check vpci_add_register_mask(struct vpci *vpci,
-                                        vpci_read_t *read_handler,
-                                        vpci_write_t *write_handler,
-                                        unsigned int offset, unsigned int size,
-                                        void *data, uint32_t ro_mask,
-                                        uint32_t rw1c_mask, uint32_t rsvdp_mask,
-                                        uint32_t rsvdz_mask);
-int __must_check vpci_add_register(struct vpci *vpci,
-                                   vpci_read_t *read_handler,
-                                   vpci_write_t *write_handler,
-                                   unsigned int offset, unsigned int size,
-                                   void *data);
-
-int vpci_remove_registers(struct vpci *vpci, unsigned int start,
-                          unsigned int size);
-
 /* Generic read/write handlers for the PCI config space. */
 uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size);
 void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size,
                 uint32_t data);
 
-/* Helper to return the value passed in data. */
-uint32_t cf_check vpci_read_val(
-    const struct pci_dev *pdev, unsigned int reg, void *data);
-
-/* Passthrough handlers. */
-uint32_t cf_check vpci_hw_read8(
-    const struct pci_dev *pdev, unsigned int reg, void *data);
-uint32_t cf_check vpci_hw_read16(
-    const struct pci_dev *pdev, unsigned int reg, void *data);
-uint32_t cf_check vpci_hw_read32(
-    const struct pci_dev *pdev, unsigned int reg, void *data);
-void cf_check vpci_hw_write8(
-    const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
-void cf_check vpci_hw_write16(
-    const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
-
 /*
  * Check for pending vPCI operations on this vcpu. Returns true if the vcpu
  * should not run.
@@ -213,9 +155,6 @@ struct vpci_vcpu {
 #ifdef __XEN__
 void vpci_dump_msi(void);
 
-/* Make sure there's a hole in the p2m for the MSIX mmio areas. */
-int vpci_make_msix_hole(const struct pci_dev *pdev);
-
 /* Arch-specific vPCI MSI helpers. */
 void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
                         unsigned int entry, bool mask);
@@ -238,48 +177,6 @@ int __must_check vpci_msix_arch_disable_
 void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry);
 int vpci_msix_arch_print(const struct vpci_msix *msix);
 
-/*
- * Helper functions to fetch MSIX related data. They are used by both the
- * emulated MSIX code and the BAR handlers.
- */
-static inline paddr_t vmsix_table_host_base(const struct vpci *vpci,
-                                            unsigned int nr)
-{
-    return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr;
-}
-
-static inline paddr_t vmsix_table_host_addr(const struct vpci *vpci,
-                                            unsigned int nr)
-{
-    return vmsix_table_host_base(vpci, nr) +
-           (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
-}
-
-static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr)
-{
-    return vpci->header.bars[vpci->msix->tables[nr] &
-                             PCI_MSIX_BIRMASK].guest_addr;
-}
-
-static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr)
-{
-    return vmsix_table_base(vpci, nr) +
-           (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
-}
-
-/*
- * Note regarding the size calculation of the PBA: the spec mentions "The last
- * QWORD will not necessarily be fully populated", so it implies that the PBA
- * size is 64-bit aligned.
- */
-static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr)
-{
-    return
-        (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE
-                                : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries,
-                                                       8), 8);
-}
-
 static inline unsigned int vmsix_entry_nr(const struct vpci_msix *msix,
                                           const struct vpci_msix_entry *entry)
 {



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v4 3/5] vPCI: move vpci_init_capabilities() to a separate file
  2026-02-10 10:51 [PATCH v4 0/5] (v)PCI: extended capability handling Jan Beulich
  2026-02-10 10:52 ` [PATCH v4 1/5] x86/PCI: avoid re-evaluation of extended config space accessibility Jan Beulich
  2026-02-10 10:53 ` [PATCH v4 2/5] vPCI: introduce private header Jan Beulich
@ 2026-02-10 10:54 ` Jan Beulich
  2026-02-18 15:50   ` Stewart Hildebrand
  2026-02-20  8:48   ` Roger Pau Monné
  2026-02-10 10:54 ` [PATCH v4 4/5] vPCI: move capability-list init Jan Beulich
  2026-02-10 10:55 ` [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed Jan Beulich
  4 siblings, 2 replies; 21+ messages in thread
From: Jan Beulich @ 2026-02-10 10:54 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org; +Cc: Roger Pau Monné, Stewart Hildebrand

Let's keep capability handling together. Start with moving
vpci_init_capabilities() and its helpers, plus splitting out of its
cleanup counterpart.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
vpci_get_register(), while now only used by cap.c, didn't look like it
would want moving there.
---
v4: New.

--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1,2 +1,3 @@
+obj-y += cap.o
 obj-y += vpci.o header.o rebar.o
 obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o
--- /dev/null
+++ b/xen/drivers/vpci/cap.c
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Capability handling for guest PCI configuration space.
+ */
+
+#include "private.h"
+
+#include <xen/sched.h>
+
+extern const vpci_capability_t __start_vpci_array[];
+extern const vpci_capability_t __end_vpci_array[];
+#define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array)
+
+static struct vpci_register *vpci_get_previous_cap_register(
+    const struct vpci *vpci, unsigned int offset)
+{
+    unsigned int next;
+    struct vpci_register *r;
+
+    if ( offset < 0x40 )
+    {
+        ASSERT_UNREACHABLE();
+        return NULL;
+    }
+
+    for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r;
+          r = next >= 0x40 ? vpci_get_register(vpci,
+                                               next + PCI_CAP_LIST_NEXT, 1)
+                           : NULL )
+    {
+        next = (unsigned int)(uintptr_t)r->private;
+        ASSERT(next == (uintptr_t)r->private);
+        if ( next == offset )
+            break;
+    }
+
+    return r;
+}
+
+static int vpci_capability_hide(const struct pci_dev *pdev, unsigned int cap)
+{
+    const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap);
+    struct vpci_register *prev_r, *next_r;
+    struct vpci *vpci = pdev->vpci;
+
+    if ( !offset )
+    {
+        ASSERT_UNREACHABLE();
+        return 0;
+    }
+
+    spin_lock(&vpci->lock);
+    prev_r = vpci_get_previous_cap_register(vpci, offset);
+    next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1);
+    if ( !prev_r || !next_r )
+    {
+        spin_unlock(&vpci->lock);
+        return -ENODEV;
+    }
+
+    prev_r->private = next_r->private;
+    /*
+     * Not calling vpci_remove_registers() here is to avoid redoing
+     * the register search.
+     */
+    list_del(&next_r->node);
+    spin_unlock(&vpci->lock);
+    xfree(next_r);
+
+    if ( !is_hardware_domain(pdev->domain) )
+        return vpci_remove_registers(vpci, offset + PCI_CAP_LIST_ID, 1);
+
+    return 0;
+}
+
+static struct vpci_register *vpci_get_previous_ext_cap_register(
+    const struct vpci *vpci, unsigned int offset)
+{
+    unsigned int pos = PCI_CFG_SPACE_SIZE;
+    struct vpci_register *r;
+
+    if ( offset <= PCI_CFG_SPACE_SIZE )
+    {
+        ASSERT_UNREACHABLE();
+        return NULL;
+    }
+
+    for ( r = vpci_get_register(vpci, pos, 4); r;
+          r = pos > PCI_CFG_SPACE_SIZE ? vpci_get_register(vpci, pos, 4)
+                                       : NULL )
+    {
+        uint32_t header = (uint32_t)(uintptr_t)r->private;
+
+        ASSERT(header == (uintptr_t)r->private);
+
+        pos = PCI_EXT_CAP_NEXT(header);
+        if ( pos == offset )
+            break;
+    }
+
+    return r;
+}
+
+static int vpci_ext_capability_hide(
+    const struct pci_dev *pdev, unsigned int cap)
+{
+    const unsigned int offset = pci_find_ext_capability(pdev, cap);
+    struct vpci_register *r, *prev_r;
+    struct vpci *vpci = pdev->vpci;
+    uint32_t header, pre_header;
+
+    if ( offset < PCI_CFG_SPACE_SIZE )
+    {
+        ASSERT_UNREACHABLE();
+        return 0;
+    }
+
+    spin_lock(&vpci->lock);
+    r = vpci_get_register(vpci, offset, 4);
+    if ( !r )
+    {
+        spin_unlock(&vpci->lock);
+        return -ENODEV;
+    }
+
+    header = (uint32_t)(uintptr_t)r->private;
+    if ( offset == PCI_CFG_SPACE_SIZE )
+    {
+        if ( PCI_EXT_CAP_NEXT(header) <= PCI_CFG_SPACE_SIZE )
+            r->private = (void *)0;
+        else
+            /*
+             * The first extended capability (0x100) can not be removed from
+             * the linked list, so instead mask its capability ID to return 0
+             * hopefully forcing OSes to skip it.
+             */
+            r->private = (void *)(uintptr_t)(header & ~PCI_EXT_CAP_ID(header));
+
+        spin_unlock(&vpci->lock);
+        return 0;
+    }
+
+    prev_r = vpci_get_previous_ext_cap_register(vpci, offset);
+    if ( !prev_r )
+    {
+        spin_unlock(&vpci->lock);
+        return -ENODEV;
+    }
+
+    pre_header = (uint32_t)(uintptr_t)prev_r->private;
+    pre_header &= ~PCI_EXT_CAP_NEXT_MASK;
+    pre_header |= header & PCI_EXT_CAP_NEXT_MASK;
+    prev_r->private = (void *)(uintptr_t)pre_header;
+
+    list_del(&r->node);
+    spin_unlock(&vpci->lock);
+    xfree(r);
+
+    return 0;
+}
+
+int vpci_init_capabilities(struct pci_dev *pdev)
+{
+    for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
+    {
+        const vpci_capability_t *capability = &__start_vpci_array[i];
+        const unsigned int cap = capability->id;
+        const bool is_ext = capability->is_ext;
+        unsigned int pos = 0;
+        int rc;
+
+        if ( !is_ext )
+            pos = pci_find_cap_offset(pdev->sbdf, cap);
+        else if ( is_hardware_domain(pdev->domain) )
+            pos = pci_find_ext_capability(pdev, cap);
+
+        if ( !pos )
+            continue;
+
+        rc = capability->init(pdev);
+        if ( rc )
+        {
+            const char *type = is_ext ? "extended" : "legacy";
+
+            printk(XENLOG_WARNING
+                   "%pd %pp: init %s cap %u fail rc=%d, mask it\n",
+                   pdev->domain, &pdev->sbdf, type, cap, rc);
+
+            if ( capability->cleanup )
+            {
+                rc = capability->cleanup(pdev, true);
+                if ( rc )
+                {
+                    printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n",
+                           pdev->domain, &pdev->sbdf, type, cap, rc);
+                    if ( !is_hardware_domain(pdev->domain) )
+                        return rc;
+                }
+            }
+
+            if ( !is_ext )
+                rc = vpci_capability_hide(pdev, cap);
+            else
+                rc = vpci_ext_capability_hide(pdev, cap);
+            if ( rc )
+            {
+                printk(XENLOG_ERR "%pd %pp: hide %s cap %u fail rc=%d\n",
+                       pdev->domain, &pdev->sbdf, type, cap, rc);
+                return rc;
+            }
+        }
+    }
+
+    return 0;
+}
+
+void vpci_cleanup_capabilities(struct pci_dev *pdev)
+{
+    for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
+    {
+        const vpci_capability_t *capability = &__start_vpci_array[i];
+        const unsigned int cap = capability->id;
+        unsigned int pos = 0;
+
+        if ( !capability->cleanup )
+            continue;
+
+        if ( !capability->is_ext )
+            pos = pci_find_cap_offset(pdev->sbdf, cap);
+        else if ( is_hardware_domain(pdev->domain) )
+            pos = pci_find_ext_capability(pdev, cap);
+        if ( pos )
+        {
+            int rc = capability->cleanup(pdev, false);
+
+            if ( rc )
+                printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n",
+                       pdev->domain, &pdev->sbdf,
+                       capability->is_ext ? "extended" : "legacy", cap, rc);
+        }
+    }
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- a/xen/drivers/vpci/private.h
+++ b/xen/drivers/vpci/private.h
@@ -9,6 +9,20 @@ typedef uint32_t vpci_read_t(const struc
 typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg,
                           uint32_t val, void *data);
 
+/* Internal struct to store the emulated PCI registers. */
+struct vpci_register {
+    vpci_read_t *read;
+    vpci_write_t *write;
+    unsigned int size;
+    unsigned int offset;
+    void *private;
+    struct list_head node;
+    uint32_t ro_mask;
+    uint32_t rw1c_mask;
+    uint32_t rsvdp_mask;
+    uint32_t rsvdz_mask;
+};
+
 typedef struct {
     unsigned int id;
     bool is_ext;
@@ -30,6 +44,9 @@ typedef struct {
 #define REGISTER_VPCI_EXTCAP(name, finit, fclean) \
     REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true)
 
+int vpci_init_capabilities(struct pci_dev *pdev);
+void vpci_cleanup_capabilities(struct pci_dev *pdev);
+
 /* Add/remove a register handler. */
 int __must_check vpci_add_register_mask(struct vpci *vpci,
                                         vpci_read_t *read_handler,
@@ -47,6 +64,10 @@ int __must_check vpci_add_register(struc
 int vpci_remove_registers(struct vpci *vpci, unsigned int start,
                           unsigned int size);
 
+struct vpci_register *vpci_get_register(const struct vpci *vpci,
+                                        unsigned int offset,
+                                        unsigned int size);
+
 /* Helper to return the value passed in data. */
 uint32_t cf_check vpci_read_val(
     const struct pci_dev *pdev, unsigned int reg, void *data);
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -22,24 +22,7 @@
 #include <xen/sched.h>
 #include <xen/vmap.h>
 
-/* Internal struct to store the emulated PCI registers. */
-struct vpci_register {
-    vpci_read_t *read;
-    vpci_write_t *write;
-    unsigned int size;
-    unsigned int offset;
-    void *private;
-    struct list_head node;
-    uint32_t ro_mask;
-    uint32_t rw1c_mask;
-    uint32_t rsvdp_mask;
-    uint32_t rsvdz_mask;
-};
-
 #ifdef __XEN__
-extern const vpci_capability_t __start_vpci_array[];
-extern const vpci_capability_t __end_vpci_array[];
-#define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array)
 
 #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
 static int assign_virtual_sbdf(struct pci_dev *pdev)
@@ -84,9 +67,9 @@ static int assign_virtual_sbdf(struct pc
 
 #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */
 
-static struct vpci_register *vpci_get_register(const struct vpci *vpci,
-                                               unsigned int offset,
-                                               unsigned int size)
+struct vpci_register *vpci_get_register(const struct vpci *vpci,
+                                        unsigned int offset,
+                                        unsigned int size)
 {
     struct vpci_register *r;
 
@@ -104,209 +87,6 @@ static struct vpci_register *vpci_get_re
     return NULL;
 }
 
-static struct vpci_register *vpci_get_previous_cap_register(
-    const struct vpci *vpci, unsigned int offset)
-{
-    unsigned int next;
-    struct vpci_register *r;
-
-    if ( offset < 0x40 )
-    {
-        ASSERT_UNREACHABLE();
-        return NULL;
-    }
-
-    for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r;
-          r = next >= 0x40 ? vpci_get_register(vpci,
-                                               next + PCI_CAP_LIST_NEXT, 1)
-                           : NULL )
-    {
-        next = (unsigned int)(uintptr_t)r->private;
-        ASSERT(next == (uintptr_t)r->private);
-        if ( next == offset )
-            break;
-    }
-
-    return r;
-}
-
-static int vpci_capability_hide(const struct pci_dev *pdev, unsigned int cap)
-{
-    const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap);
-    struct vpci_register *prev_r, *next_r;
-    struct vpci *vpci = pdev->vpci;
-
-    if ( !offset )
-    {
-        ASSERT_UNREACHABLE();
-        return 0;
-    }
-
-    spin_lock(&vpci->lock);
-    prev_r = vpci_get_previous_cap_register(vpci, offset);
-    next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1);
-    if ( !prev_r || !next_r )
-    {
-        spin_unlock(&vpci->lock);
-        return -ENODEV;
-    }
-
-    prev_r->private = next_r->private;
-    /*
-     * Not calling vpci_remove_registers() here is to avoid redoing
-     * the register search.
-     */
-    list_del(&next_r->node);
-    spin_unlock(&vpci->lock);
-    xfree(next_r);
-
-    if ( !is_hardware_domain(pdev->domain) )
-        return vpci_remove_registers(vpci, offset + PCI_CAP_LIST_ID, 1);
-
-    return 0;
-}
-
-static struct vpci_register *vpci_get_previous_ext_cap_register(
-    const struct vpci *vpci, unsigned int offset)
-{
-    unsigned int pos = PCI_CFG_SPACE_SIZE;
-    struct vpci_register *r;
-
-    if ( offset <= PCI_CFG_SPACE_SIZE )
-    {
-        ASSERT_UNREACHABLE();
-        return NULL;
-    }
-
-    for ( r = vpci_get_register(vpci, pos, 4); r;
-          r = pos > PCI_CFG_SPACE_SIZE ? vpci_get_register(vpci, pos, 4)
-                                       : NULL )
-    {
-        uint32_t header = (uint32_t)(uintptr_t)r->private;
-
-        ASSERT(header == (uintptr_t)r->private);
-
-        pos = PCI_EXT_CAP_NEXT(header);
-        if ( pos == offset )
-            break;
-    }
-
-    return r;
-}
-
-static int vpci_ext_capability_hide(
-    const struct pci_dev *pdev, unsigned int cap)
-{
-    const unsigned int offset = pci_find_ext_capability(pdev, cap);
-    struct vpci_register *r, *prev_r;
-    struct vpci *vpci = pdev->vpci;
-    uint32_t header, pre_header;
-
-    if ( offset < PCI_CFG_SPACE_SIZE )
-    {
-        ASSERT_UNREACHABLE();
-        return 0;
-    }
-
-    spin_lock(&vpci->lock);
-    r = vpci_get_register(vpci, offset, 4);
-    if ( !r )
-    {
-        spin_unlock(&vpci->lock);
-        return -ENODEV;
-    }
-
-    header = (uint32_t)(uintptr_t)r->private;
-    if ( offset == PCI_CFG_SPACE_SIZE )
-    {
-        if ( PCI_EXT_CAP_NEXT(header) <= PCI_CFG_SPACE_SIZE )
-            r->private = (void *)0;
-        else
-            /*
-             * The first extended capability (0x100) can not be removed from
-             * the linked list, so instead mask its capability ID to return 0
-             * hopefully forcing OSes to skip it.
-             */
-            r->private = (void *)(uintptr_t)(header & ~PCI_EXT_CAP_ID(header));
-
-        spin_unlock(&vpci->lock);
-        return 0;
-    }
-
-    prev_r = vpci_get_previous_ext_cap_register(vpci, offset);
-    if ( !prev_r )
-    {
-        spin_unlock(&vpci->lock);
-        return -ENODEV;
-    }
-
-    pre_header = (uint32_t)(uintptr_t)prev_r->private;
-    pre_header &= ~PCI_EXT_CAP_NEXT_MASK;
-    pre_header |= header & PCI_EXT_CAP_NEXT_MASK;
-    prev_r->private = (void *)(uintptr_t)pre_header;
-
-    list_del(&r->node);
-    spin_unlock(&vpci->lock);
-    xfree(r);
-
-    return 0;
-}
-
-static int vpci_init_capabilities(struct pci_dev *pdev)
-{
-    for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
-    {
-        const vpci_capability_t *capability = &__start_vpci_array[i];
-        const unsigned int cap = capability->id;
-        const bool is_ext = capability->is_ext;
-        unsigned int pos = 0;
-        int rc;
-
-        if ( !is_ext )
-            pos = pci_find_cap_offset(pdev->sbdf, cap);
-        else if ( is_hardware_domain(pdev->domain) )
-            pos = pci_find_ext_capability(pdev, cap);
-
-        if ( !pos )
-            continue;
-
-        rc = capability->init(pdev);
-        if ( rc )
-        {
-            const char *type = is_ext ? "extended" : "legacy";
-
-            printk(XENLOG_WARNING
-                   "%pd %pp: init %s cap %u fail rc=%d, mask it\n",
-                   pdev->domain, &pdev->sbdf, type, cap, rc);
-
-            if ( capability->cleanup )
-            {
-                rc = capability->cleanup(pdev, true);
-                if ( rc )
-                {
-                    printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n",
-                           pdev->domain, &pdev->sbdf, type, cap, rc);
-                    if ( !is_hardware_domain(pdev->domain) )
-                        return rc;
-                }
-            }
-
-            if ( !is_ext )
-                rc = vpci_capability_hide(pdev, cap);
-            else
-                rc = vpci_ext_capability_hide(pdev, cap);
-            if ( rc )
-            {
-                printk(XENLOG_ERR "%pd %pp: hide %s cap %u fail rc=%d\n",
-                       pdev->domain, &pdev->sbdf, type, cap, rc);
-                return rc;
-            }
-        }
-    }
-
-    return 0;
-}
-
 void vpci_deassign_device(struct pci_dev *pdev)
 {
     unsigned int i;
@@ -322,29 +102,7 @@ void vpci_deassign_device(struct pci_dev
                     &pdev->domain->vpci_dev_assigned_map);
 #endif
 
-    for ( i = 0; i < NUM_VPCI_INIT; i++ )
-    {
-        const vpci_capability_t *capability = &__start_vpci_array[i];
-        const unsigned int cap = capability->id;
-        unsigned int pos = 0;
-
-        if ( !capability->cleanup )
-            continue;
-
-        if ( !capability->is_ext )
-            pos = pci_find_cap_offset(pdev->sbdf, cap);
-        else if ( is_hardware_domain(pdev->domain) )
-            pos = pci_find_ext_capability(pdev, cap);
-        if ( pos )
-        {
-            int rc = capability->cleanup(pdev, false);
-
-            if ( rc )
-                printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n",
-                       pdev->domain, &pdev->sbdf,
-                       capability->is_ext ? "extended" : "legacy", cap, rc);
-        }
-    }
+    vpci_cleanup_capabilities(pdev);
 
     spin_lock(&pdev->vpci->lock);
     while ( !list_empty(&pdev->vpci->handlers) )



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v4 4/5] vPCI: move capability-list init
  2026-02-10 10:51 [PATCH v4 0/5] (v)PCI: extended capability handling Jan Beulich
                   ` (2 preceding siblings ...)
  2026-02-10 10:54 ` [PATCH v4 3/5] vPCI: move vpci_init_capabilities() to a separate file Jan Beulich
@ 2026-02-10 10:54 ` Jan Beulich
  2026-02-18 20:54   ` Stewart Hildebrand
  2026-02-20  8:59   ` Roger Pau Monné
  2026-02-10 10:55 ` [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed Jan Beulich
  4 siblings, 2 replies; 21+ messages in thread
From: Jan Beulich @ 2026-02-10 10:54 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org; +Cc: Roger Pau Monné, Stewart Hildebrand

... both for when the functions are invoked and where they live in source.
Don't invoke them directly in vpci_init_header(), but instead first thing
in vpci_init_capabilities().

Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

--- a/xen/drivers/vpci/cap.c
+++ b/xen/drivers/vpci/cap.c
@@ -159,15 +159,150 @@ static int vpci_ext_capability_hide(
     return 0;
 }
 
+static int vpci_init_capability_list(struct pci_dev *pdev)
+{
+    int rc;
+    bool mask_cap_list = false;
+    bool is_hwdom = is_hardware_domain(pdev->domain);
+
+    if ( pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST )
+    {
+        /* Only expose capabilities to the guest that vPCI can handle. */
+        unsigned int next, ttl = 48;
+        static const unsigned int supported_caps[] = {
+            PCI_CAP_ID_MSI,
+            PCI_CAP_ID_MSIX,
+        };
+        /*
+         * For dom0, we should expose all capabilities instead of a fixed
+         * capabilities array, so setting n to 0 here is to get the next
+         * capability position directly in pci_find_next_cap_ttl.
+         */
+        const unsigned int n = is_hwdom ? 0 : ARRAY_SIZE(supported_caps);
+
+        next = pci_find_next_cap_ttl(pdev->sbdf, PCI_CAPABILITY_LIST,
+                                     supported_caps, n, &ttl);
+
+        rc = vpci_add_register(pdev->vpci, vpci_read_val,
+                               is_hwdom ? vpci_hw_write8 : NULL,
+                               PCI_CAPABILITY_LIST, 1,
+                               (void *)(uintptr_t)next);
+        if ( rc )
+            return rc;
+
+        next &= ~3;
+
+        if ( !next && !is_hwdom )
+            /*
+             * If we don't have any supported capabilities to expose to the
+             * guest, mask the PCI_STATUS_CAP_LIST bit in the status
+             * register.
+             */
+            mask_cap_list = true;
+
+        while ( next && ttl )
+        {
+            unsigned int pos = next;
+
+            next = pci_find_next_cap_ttl(pdev->sbdf,
+                                         pos + PCI_CAP_LIST_NEXT,
+                                         supported_caps, n, &ttl);
+
+            if ( !is_hwdom )
+            {
+                rc = vpci_add_register(pdev->vpci, vpci_hw_read8, NULL,
+                                       pos + PCI_CAP_LIST_ID, 1, NULL);
+                if ( rc )
+                    return rc;
+            }
+
+            rc = vpci_add_register(pdev->vpci, vpci_read_val,
+                                   is_hwdom ? vpci_hw_write8 : NULL,
+                                   pos + PCI_CAP_LIST_NEXT, 1,
+                                   (void *)(uintptr_t)next);
+            if ( rc )
+                return rc;
+
+            next &= ~3;
+        }
+    }
+
+    /* Return early for the hw domain, no masking of PCI_STATUS. */
+    if ( is_hwdom )
+        return 0;
+
+    /* Utilize rsvdp_mask to hide PCI_STATUS_CAP_LIST from the guest. */
+    return vpci_add_register_mask(pdev->vpci, vpci_hw_read16, vpci_hw_write16,
+                                  PCI_STATUS, 2, NULL,
+                                  PCI_STATUS_RO_MASK &
+                                    ~(mask_cap_list ? PCI_STATUS_CAP_LIST : 0),
+                                  PCI_STATUS_RW1C_MASK,
+                                  mask_cap_list ? PCI_STATUS_CAP_LIST : 0,
+                                  PCI_STATUS_RSVDZ_MASK);
+}
+
+static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
+{
+    unsigned int pos = PCI_CFG_SPACE_SIZE;
+
+    if ( !pdev->ext_cfg )
+        return 0;
+
+    if ( !is_hardware_domain(pdev->domain) )
+        /* Extended capabilities read as zero, write ignore for DomU */
+        return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
+                                 pos, 4, (void *)0);
+
+    do
+    {
+        uint32_t header = pci_conf_read32(pdev->sbdf, pos);
+        int rc;
+
+        if ( header == 0xffffffffU )
+        {
+            printk(XENLOG_WARNING
+                   "%pd %pp: broken extended cap list, offset %#x\n",
+                   pdev->domain, &pdev->sbdf, pos);
+            return 0;
+        }
+
+        rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
+                               pos, 4, (void *)(uintptr_t)header);
+        if ( rc == -EEXIST )
+        {
+            printk(XENLOG_WARNING
+                   "%pd %pp: overlap in extended cap list, offset %#x\n",
+                   pdev->domain, &pdev->sbdf, pos);
+            return 0;
+        }
+
+        if ( rc )
+            return rc;
+
+        pos = PCI_EXT_CAP_NEXT(header);
+    } while ( pos >= PCI_CFG_SPACE_SIZE );
+
+    return 0;
+}
+
 int vpci_init_capabilities(struct pci_dev *pdev)
 {
+    int rc;
+
+    rc = vpci_init_capability_list(pdev);
+    if ( rc )
+        return rc;
+
+    rc = vpci_init_ext_capability_list(pdev);
+    if ( rc )
+        return rc;
+
     for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
     {
         const vpci_capability_t *capability = &__start_vpci_array[i];
         const unsigned int cap = capability->id;
         const bool is_ext = capability->is_ext;
         unsigned int pos = 0;
-        int rc;
 
         if ( !is_ext )
             pos = pci_find_cap_offset(pdev->sbdf, cap);
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -744,132 +744,6 @@ static int bar_add_rangeset(const struct
     return !bar->mem ? -ENOMEM : 0;
 }
 
-static int vpci_init_capability_list(struct pci_dev *pdev)
-{
-    int rc;
-    bool mask_cap_list = false;
-    bool is_hwdom = is_hardware_domain(pdev->domain);
-
-    if ( pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST )
-    {
-        /* Only expose capabilities to the guest that vPCI can handle. */
-        unsigned int next, ttl = 48;
-        static const unsigned int supported_caps[] = {
-            PCI_CAP_ID_MSI,
-            PCI_CAP_ID_MSIX,
-        };
-        /*
-         * For dom0, we should expose all capabilities instead of a fixed
-         * capabilities array, so setting n to 0 here is to get the next
-         * capability position directly in pci_find_next_cap_ttl.
-         */
-        const unsigned int n = is_hwdom ? 0 : ARRAY_SIZE(supported_caps);
-
-        next = pci_find_next_cap_ttl(pdev->sbdf, PCI_CAPABILITY_LIST,
-                                     supported_caps, n, &ttl);
-
-        rc = vpci_add_register(pdev->vpci, vpci_read_val,
-                               is_hwdom ? vpci_hw_write8 : NULL,
-                               PCI_CAPABILITY_LIST, 1,
-                               (void *)(uintptr_t)next);
-        if ( rc )
-            return rc;
-
-        next &= ~3;
-
-        if ( !next && !is_hwdom )
-            /*
-             * If we don't have any supported capabilities to expose to the
-             * guest, mask the PCI_STATUS_CAP_LIST bit in the status
-             * register.
-             */
-            mask_cap_list = true;
-
-        while ( next && ttl )
-        {
-            unsigned int pos = next;
-
-            next = pci_find_next_cap_ttl(pdev->sbdf,
-                                         pos + PCI_CAP_LIST_NEXT,
-                                         supported_caps, n, &ttl);
-
-            if ( !is_hwdom )
-            {
-                rc = vpci_add_register(pdev->vpci, vpci_hw_read8, NULL,
-                                       pos + PCI_CAP_LIST_ID, 1, NULL);
-                if ( rc )
-                    return rc;
-            }
-
-            rc = vpci_add_register(pdev->vpci, vpci_read_val,
-                                   is_hwdom ? vpci_hw_write8 : NULL,
-                                   pos + PCI_CAP_LIST_NEXT, 1,
-                                   (void *)(uintptr_t)next);
-            if ( rc )
-                return rc;
-
-            next &= ~3;
-        }
-    }
-
-    /* Return early for the hw domain, no masking of PCI_STATUS. */
-    if ( is_hwdom )
-        return 0;
-
-    /* Utilize rsvdp_mask to hide PCI_STATUS_CAP_LIST from the guest. */
-    return vpci_add_register_mask(pdev->vpci, vpci_hw_read16, vpci_hw_write16,
-                                  PCI_STATUS, 2, NULL,
-                                  PCI_STATUS_RO_MASK &
-                                    ~(mask_cap_list ? PCI_STATUS_CAP_LIST : 0),
-                                  PCI_STATUS_RW1C_MASK,
-                                  mask_cap_list ? PCI_STATUS_CAP_LIST : 0,
-                                  PCI_STATUS_RSVDZ_MASK);
-}
-
-static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
-{
-    unsigned int pos = PCI_CFG_SPACE_SIZE;
-
-    if ( !pdev->ext_cfg )
-        return 0;
-
-    if ( !is_hardware_domain(pdev->domain) )
-        /* Extended capabilities read as zero, write ignore for DomU */
-        return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
-                                 pos, 4, (void *)0);
-
-    do
-    {
-        uint32_t header = pci_conf_read32(pdev->sbdf, pos);
-        int rc;
-
-        if ( header == 0xffffffffU )
-        {
-            printk(XENLOG_WARNING
-                   "%pd %pp: broken extended cap list, offset %#x\n",
-                   pdev->domain, &pdev->sbdf, pos);
-            return 0;
-        }
-
-        rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
-                               pos, 4, (void *)(uintptr_t)header);
-        if ( rc == -EEXIST )
-        {
-            printk(XENLOG_WARNING
-                   "%pd %pp: overlap in extended cap list, offset %#x\n",
-                   pdev->domain, &pdev->sbdf, pos);
-            return 0;
-        }
-
-        if ( rc )
-            return rc;
-
-        pos = PCI_EXT_CAP_NEXT(header);
-    } while ( pos >= PCI_CFG_SPACE_SIZE );
-
-    return 0;
-}
-
 int vpci_init_header(struct pci_dev *pdev)
 {
     uint16_t cmd;
@@ -918,14 +792,6 @@ int vpci_init_header(struct pci_dev *pde
     if ( rc )
         return rc;
 
-    rc = vpci_init_capability_list(pdev);
-    if ( rc )
-        return rc;
-
-    rc = vpci_init_ext_capability_list(pdev);
-    if ( rc )
-        return rc;
-
     if ( pdev->ignore_bars )
         return 0;
 



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed
  2026-02-10 10:51 [PATCH v4 0/5] (v)PCI: extended capability handling Jan Beulich
                   ` (3 preceding siblings ...)
  2026-02-10 10:54 ` [PATCH v4 4/5] vPCI: move capability-list init Jan Beulich
@ 2026-02-10 10:55 ` Jan Beulich
  2026-02-19 22:21   ` Stewart Hildebrand
  2026-02-20  9:28   ` Roger Pau Monné
  4 siblings, 2 replies; 21+ messages in thread
From: Jan Beulich @ 2026-02-10 10:55 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Roger Pau Monné, Stewart Hildebrand

When Dom0 informs us about MMCFG usability, this may change whether
extended capabilities are available (accessible) for devices. Zap what
might be on record, and re-initialize things.

No synchronization is added for the case where devices may already be in
use. That'll need sorting when (a) DomU support was added and (b) DomU-s
may run already while Dom0 / hwdom still boots (dom0less, Hyperlaunch).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
vpci_reinit_ext_capabilities()'es return value isn't checked, as it
doesn't feel quite right to fail the hypercall because of this. At the
same time it also doesn't feel quite right to have the function return
"void". Thoughts?
---
v4: Make sure ->cleanup() and ->init() are invoked.
v3: New.

--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -8,6 +8,8 @@
 #include <xen/guest_access.h>
 #include <xen/iocap.h>
 #include <xen/serial.h>
+#include <xen/vpci.h>
+
 #include <asm/current.h>
 #include <asm/io_apic.h>
 #include <asm/msi.h>
@@ -169,7 +171,10 @@ int cf_check physdev_check_pci_extcfg(st
 
     ASSERT(pdev->seg == info->segment);
     if ( pdev->bus >= info->start_bus && pdev->bus <= info->end_bus )
+    {
         pci_check_extcfg(pdev);
+        vpci_reinit_ext_capabilities(pdev);
+    }
 
     return 0;
 }
--- a/xen/drivers/vpci/cap.c
+++ b/xen/drivers/vpci/cap.c
@@ -285,13 +285,16 @@ static int vpci_init_ext_capability_list
     return 0;
 }
 
-int vpci_init_capabilities(struct pci_dev *pdev)
+int vpci_init_capabilities(struct pci_dev *pdev, bool ext_only)
 {
     int rc;
 
-    rc = vpci_init_capability_list(pdev);
-    if ( rc )
-        return rc;
+    if ( !ext_only )
+    {
+        rc = vpci_init_capability_list(pdev);
+        if ( rc )
+            return rc;
+    }
 
     rc = vpci_init_ext_capability_list(pdev);
     if ( rc )
@@ -305,7 +308,7 @@ int vpci_init_capabilities(struct pci_de
         unsigned int pos = 0;
 
         if ( !is_ext )
-            pos = pci_find_cap_offset(pdev->sbdf, cap);
+            pos = !ext_only ? pci_find_cap_offset(pdev->sbdf, cap) : 0;
         else if ( is_hardware_domain(pdev->domain) )
             pos = pci_find_ext_capability(pdev, cap);
 
@@ -349,7 +352,7 @@ int vpci_init_capabilities(struct pci_de
     return 0;
 }
 
-void vpci_cleanup_capabilities(struct pci_dev *pdev)
+void vpci_cleanup_capabilities(struct pci_dev *pdev, bool ext_only)
 {
     for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
     {
@@ -361,7 +364,7 @@ void vpci_cleanup_capabilities(struct pc
             continue;
 
         if ( !capability->is_ext )
-            pos = pci_find_cap_offset(pdev->sbdf, cap);
+            pos = !ext_only ? pci_find_cap_offset(pdev->sbdf, cap) : 0;
         else if ( is_hardware_domain(pdev->domain) )
             pos = pci_find_ext_capability(pdev, cap);
         if ( pos )
@@ -376,6 +379,20 @@ void vpci_cleanup_capabilities(struct pc
     }
 }
 
+int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
+{
+    if ( !pdev->vpci )
+        return 0;
+
+    vpci_cleanup_capabilities(pdev, true);
+
+    if ( vpci_remove_registers(pdev->vpci, PCI_CFG_SPACE_SIZE,
+                               PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) )
+        ASSERT_UNREACHABLE();
+
+    return vpci_init_capabilities(pdev, true);
+}
+
 /*
  * Local variables:
  * mode: C
--- a/xen/drivers/vpci/private.h
+++ b/xen/drivers/vpci/private.h
@@ -44,8 +44,8 @@ typedef struct {
 #define REGISTER_VPCI_EXTCAP(name, finit, fclean) \
     REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true)
 
-int vpci_init_capabilities(struct pci_dev *pdev);
-void vpci_cleanup_capabilities(struct pci_dev *pdev);
+int vpci_init_capabilities(struct pci_dev *pdev, bool ext_only);
+void vpci_cleanup_capabilities(struct pci_dev *pdev, bool ext_only);
 
 /* Add/remove a register handler. */
 int __must_check vpci_add_register_mask(struct vpci *vpci,
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -102,7 +102,7 @@ void vpci_deassign_device(struct pci_dev
                     &pdev->domain->vpci_dev_assigned_map);
 #endif
 
-    vpci_cleanup_capabilities(pdev);
+    vpci_cleanup_capabilities(pdev, false);
 
     spin_lock(&pdev->vpci->lock);
     while ( !list_empty(&pdev->vpci->handlers) )
@@ -159,7 +159,7 @@ int vpci_assign_device(struct pci_dev *p
     if ( rc )
         goto out;
 
-    rc = vpci_init_capabilities(pdev);
+    rc = vpci_init_capabilities(pdev, false);
 
  out:
     if ( rc )
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -20,6 +20,7 @@
 #define VPCI_MAX_VIRT_DEV       (PCI_SLOT(~0) + 1)
 
 int __must_check vpci_init_header(struct pci_dev *pdev);
+int vpci_reinit_ext_capabilities(struct pci_dev *pdev);
 
 /* Assign vPCI to device by adding handlers. */
 int __must_check vpci_assign_device(struct pci_dev *pdev);
@@ -197,6 +198,11 @@ bool vpci_ecam_read(pci_sbdf_t sbdf, uns
 #else /* !CONFIG_HAS_VPCI */
 struct vpci_vcpu {};
 
+static inline int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
+{
+    return 0;
+}
+
 static inline int vpci_assign_device(struct pci_dev *pdev)
 {
     return 0;



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/5] x86/PCI: avoid re-evaluation of extended config space accessibility
  2026-02-10 10:52 ` [PATCH v4 1/5] x86/PCI: avoid re-evaluation of extended config space accessibility Jan Beulich
@ 2026-02-10 15:59   ` Stewart Hildebrand
  0 siblings, 0 replies; 21+ messages in thread
From: Stewart Hildebrand @ 2026-02-10 15:59 UTC (permalink / raw)
  To: Jan Beulich, xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Roger Pau Monné

On 2/10/26 05:52, Jan Beulich wrote:
> When, during boot, we have already correctly determined availability of
> the MMCFG access method for a given bus range, there's then no need to
> invoke pci_check_extcfg() again for every of the devices. This in
> particular avoids ->ext_cfg to transiently indicate the wrong state.
> 
> Switch to using Xen style on lines being touched and immediately adjacent
> ones.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>

Reviewed-by: Stewart Hildebrand <stewart.hildebrand@amd.com>


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/5] vPCI: introduce private header
  2026-02-10 10:53 ` [PATCH v4 2/5] vPCI: introduce private header Jan Beulich
@ 2026-02-17 22:04   ` Stewart Hildebrand
  2026-02-18  8:52     ` Jan Beulich
  2026-02-18 21:58   ` Stewart Hildebrand
  1 sibling, 1 reply; 21+ messages in thread
From: Stewart Hildebrand @ 2026-02-17 22:04 UTC (permalink / raw)
  To: Jan Beulich, xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Anthony PERARD,
	Michal Orzel, Roger Pau Monné, Bertrand Marquis,
	Volodymyr Babchuk

On 2/10/26 05:53, Jan Beulich wrote:
> Before adding more private stuff to xen/vpci.h, split it up. In order to
> be able to include the private header first in a CU, the per-arch struct
> decls also need to move (to new asm/vpci.h files).
> 
> While adjusting the test harness'es Makefile, also switch the pre-existing
> header symlink-ing rule to a pattern one.
> 
> Apart from in the test harness code, things only move; no functional
> change intended.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Subsequently, at least on x86 more stuff may want moving into asm/vpci.h.
> ---
> v4: New.
> 
> --- a/tools/tests/vpci/Makefile
> +++ b/tools/tests/vpci/Makefile
> @@ -14,8 +14,8 @@ else
>  	$(warning HOSTCC != CC, will not run test)
>  endif
>  
> -$(TARGET): vpci.c vpci.h list.h main.c emul.h
> -	$(CC) $(CFLAGS_xeninclude) -g -o $@ vpci.c main.c
> +$(TARGET): vpci.c vpci.h list.h private.h main.c emul.h
> +	$(CC) $(CFLAGS_xeninclude) -include emul.h -g -o $@ vpci.c main.c
>  
>  .PHONY: clean
>  clean:

Can you please add the generated private.h to be removed upon "make clean"?

Also, can you please add tools/tests/vpci/private.h to .gitignore?

> --- /dev/null
> +++ b/xen/arch/arm/include/asm/vpci.h
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef ARM_VPCI_H
> +#define ARM_VPCI_H
> +
> +/* Arch-specific MSI data for vPCI. */
> +struct vpci_arch_msi {
> +};
> +
> +/* Arch-specific MSI-X entry data for vPCI. */
> +struct vpci_arch_msix_entry {
> +};
> +
> +#endif /* ARM_VPCI_H */

Out of curiosity (not asking for any changes), why did you include an emacs
footer on the x86 header but not here?

Otherwise, the rest of the patch looks good to me.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/5] vPCI: introduce private header
  2026-02-17 22:04   ` Stewart Hildebrand
@ 2026-02-18  8:52     ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2026-02-18  8:52 UTC (permalink / raw)
  To: Stewart Hildebrand
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Anthony PERARD,
	Michal Orzel, Roger Pau Monné, Bertrand Marquis,
	Volodymyr Babchuk, xen-devel@lists.xenproject.org

On 17.02.2026 23:04, Stewart Hildebrand wrote:
> On 2/10/26 05:53, Jan Beulich wrote:
>> Before adding more private stuff to xen/vpci.h, split it up. In order to
>> be able to include the private header first in a CU, the per-arch struct
>> decls also need to move (to new asm/vpci.h files).
>>
>> While adjusting the test harness'es Makefile, also switch the pre-existing
>> header symlink-ing rule to a pattern one.
>>
>> Apart from in the test harness code, things only move; no functional
>> change intended.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> Subsequently, at least on x86 more stuff may want moving into asm/vpci.h.
>> ---
>> v4: New.
>>
>> --- a/tools/tests/vpci/Makefile
>> +++ b/tools/tests/vpci/Makefile
>> @@ -14,8 +14,8 @@ else
>>  	$(warning HOSTCC != CC, will not run test)
>>  endif
>>  
>> -$(TARGET): vpci.c vpci.h list.h main.c emul.h
>> -	$(CC) $(CFLAGS_xeninclude) -g -o $@ vpci.c main.c
>> +$(TARGET): vpci.c vpci.h list.h private.h main.c emul.h
>> +	$(CC) $(CFLAGS_xeninclude) -include emul.h -g -o $@ vpci.c main.c
>>  
>>  .PHONY: clean
>>  clean:
> 
> Can you please add the generated private.h to be removed upon "make clean"?
> 
> Also, can you please add tools/tests/vpci/private.h to .gitignore?

Oops, yes, of course.

>> --- /dev/null
>> +++ b/xen/arch/arm/include/asm/vpci.h
>> @@ -0,0 +1,13 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +#ifndef ARM_VPCI_H
>> +#define ARM_VPCI_H
>> +
>> +/* Arch-specific MSI data for vPCI. */
>> +struct vpci_arch_msi {
>> +};
>> +
>> +/* Arch-specific MSI-X entry data for vPCI. */
>> +struct vpci_arch_msix_entry {
>> +};
>> +
>> +#endif /* ARM_VPCI_H */
> 
> Out of curiosity (not asking for any changes), why did you include an emacs
> footer on the x86 header but not here?
> 
> Otherwise, the rest of the patch looks good to me.

Well - I copied the original files and then reduced them to just what I
wanted to move here.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 3/5] vPCI: move vpci_init_capabilities() to a separate file
  2026-02-10 10:54 ` [PATCH v4 3/5] vPCI: move vpci_init_capabilities() to a separate file Jan Beulich
@ 2026-02-18 15:50   ` Stewart Hildebrand
  2026-02-20  8:48   ` Roger Pau Monné
  1 sibling, 0 replies; 21+ messages in thread
From: Stewart Hildebrand @ 2026-02-18 15:50 UTC (permalink / raw)
  To: Jan Beulich, xen-devel@lists.xenproject.org; +Cc: Roger Pau Monné

On 2/10/26 05:54, Jan Beulich wrote:
> Let's keep capability handling together. Start with moving
> vpci_init_capabilities() and its helpers, plus splitting out of its
> cleanup counterpart.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Stewart Hildebrand <stewart.hildebrand@amd.com>

> ---
> vpci_get_register(), while now only used by cap.c, didn't look like it
> would want moving there.

OK. It seems potential opportunities for compiler optimizations would be small
anyway, and this is not a hot path.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 4/5] vPCI: move capability-list init
  2026-02-10 10:54 ` [PATCH v4 4/5] vPCI: move capability-list init Jan Beulich
@ 2026-02-18 20:54   ` Stewart Hildebrand
  2026-02-20  8:59   ` Roger Pau Monné
  1 sibling, 0 replies; 21+ messages in thread
From: Stewart Hildebrand @ 2026-02-18 20:54 UTC (permalink / raw)
  To: Jan Beulich, xen-devel@lists.xenproject.org; +Cc: Roger Pau Monné

On 2/10/26 05:54, Jan Beulich wrote:
> ... both for when the functions are invoked and where they live in source.
> Don't invoke them directly in vpci_init_header(), but instead first thing
> in vpci_init_capabilities().
> 
> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Stewart Hildebrand <stewart.hildebrand@amd.com>


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/5] vPCI: introduce private header
  2026-02-10 10:53 ` [PATCH v4 2/5] vPCI: introduce private header Jan Beulich
  2026-02-17 22:04   ` Stewart Hildebrand
@ 2026-02-18 21:58   ` Stewart Hildebrand
  2026-02-19  6:26     ` Jan Beulich
  1 sibling, 1 reply; 21+ messages in thread
From: Stewart Hildebrand @ 2026-02-18 21:58 UTC (permalink / raw)
  To: Jan Beulich, xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Anthony PERARD,
	Michal Orzel, Roger Pau Monné, Bertrand Marquis,
	Volodymyr Babchuk

On 2/10/26 05:53, Jan Beulich wrote:
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -30,20 +19,6 @@ typedef struct {
>   */
>  #define VPCI_MAX_VIRT_DEV       (PCI_SLOT(~0) + 1)
>  
> -#define REGISTER_VPCI_CAPABILITY(cap, name, finit, fclean, ext) \
> -    static const vpci_capability_t name##_entry \
> -        __used_section(".data.rel.ro.vpci") = { \
> -        .id = (cap), \
> -        .init = (finit), \
> -        .cleanup = (fclean), \
> -        .is_ext = (ext), \
> -    }
> -
> -#define REGISTER_VPCI_CAP(name, finit, fclean) \
> -    REGISTER_VPCI_CAPABILITY(PCI_CAP_ID_##name, name, finit, fclean, false)
> -#define REGISTER_VPCI_EXTCAP(name, finit, fclean) \
> -    REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true)
> -
>  int __must_check vpci_init_header(struct pci_dev *pdev);
Nit: I suppose vpci_init_header() could also move to the new private header file


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/5] vPCI: introduce private header
  2026-02-18 21:58   ` Stewart Hildebrand
@ 2026-02-19  6:26     ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2026-02-19  6:26 UTC (permalink / raw)
  To: Stewart Hildebrand
  Cc: Andrew Cooper, Julien Grall, Stefano Stabellini, Anthony PERARD,
	Michal Orzel, Roger Pau Monné, Bertrand Marquis,
	Volodymyr Babchuk, xen-devel@lists.xenproject.org

On 18.02.2026 22:58, Stewart Hildebrand wrote:
> On 2/10/26 05:53, Jan Beulich wrote:
>> --- a/xen/include/xen/vpci.h
>> +++ b/xen/include/xen/vpci.h
>> @@ -30,20 +19,6 @@ typedef struct {
>>   */
>>  #define VPCI_MAX_VIRT_DEV       (PCI_SLOT(~0) + 1)
>>  
>> -#define REGISTER_VPCI_CAPABILITY(cap, name, finit, fclean, ext) \
>> -    static const vpci_capability_t name##_entry \
>> -        __used_section(".data.rel.ro.vpci") = { \
>> -        .id = (cap), \
>> -        .init = (finit), \
>> -        .cleanup = (fclean), \
>> -        .is_ext = (ext), \
>> -    }
>> -
>> -#define REGISTER_VPCI_CAP(name, finit, fclean) \
>> -    REGISTER_VPCI_CAPABILITY(PCI_CAP_ID_##name, name, finit, fclean, false)
>> -#define REGISTER_VPCI_EXTCAP(name, finit, fclean) \
>> -    REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true)
>> -
>>  int __must_check vpci_init_header(struct pci_dev *pdev);
> Nit: I suppose vpci_init_header() could also move to the new private header file

Hmm, yes, will do. I wonder why I didn't, as I went by being pretty aggressive
in moving, to then move back what needs exposing to the outside.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed
  2026-02-10 10:55 ` [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed Jan Beulich
@ 2026-02-19 22:21   ` Stewart Hildebrand
  2026-02-20  7:13     ` Jan Beulich
  2026-02-20  9:28   ` Roger Pau Monné
  1 sibling, 1 reply; 21+ messages in thread
From: Stewart Hildebrand @ 2026-02-19 22:21 UTC (permalink / raw)
  To: Jan Beulich, xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Roger Pau Monné

On 2/10/26 05:55, Jan Beulich wrote:
> --- a/xen/drivers/vpci/cap.c
> +++ b/xen/drivers/vpci/cap.c
> @@ -376,6 +379,20 @@ void vpci_cleanup_capabilities(struct pc
>      }
>  }
>  
> +int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
> +{
> +    if ( !pdev->vpci )
> +        return 0;
> +
> +    vpci_cleanup_capabilities(pdev, true);
In the case where pdev->ext_cfg transitions from true to false, it doesn't look
like this would actually result in the respective capability->cleanup() hook
being called, due to reliance on pci_find_ext_capability().


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed
  2026-02-19 22:21   ` Stewart Hildebrand
@ 2026-02-20  7:13     ` Jan Beulich
  2026-02-20  9:08       ` Roger Pau Monné
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2026-02-20  7:13 UTC (permalink / raw)
  To: Stewart Hildebrand
  Cc: Andrew Cooper, Roger Pau Monné,
	xen-devel@lists.xenproject.org

On 19.02.2026 23:21, Stewart Hildebrand wrote:
> On 2/10/26 05:55, Jan Beulich wrote:
>> --- a/xen/drivers/vpci/cap.c
>> +++ b/xen/drivers/vpci/cap.c
>> @@ -376,6 +379,20 @@ void vpci_cleanup_capabilities(struct pc
>>      }
>>  }
>>  
>> +int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
>> +{
>> +    if ( !pdev->vpci )
>> +        return 0;
>> +
>> +    vpci_cleanup_capabilities(pdev, true);
> In the case where pdev->ext_cfg transitions from true to false, it doesn't look
> like this would actually result in the respective capability->cleanup() hook
> being called, due to reliance on pci_find_ext_capability().

Hmm, indeed. Yet that's a problem with vpci_cleanup_capabilities(), not
with the call here. It may have been merely latent until no later than
b1543cf5751b ("PCI: don't look for ext-caps when there's no extended cfg
space"). The cleanup hooks themselves (it's only one right now) then
also may not access their respective capabilities anymore (nor even just
try to locate them).

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 3/5] vPCI: move vpci_init_capabilities() to a separate file
  2026-02-10 10:54 ` [PATCH v4 3/5] vPCI: move vpci_init_capabilities() to a separate file Jan Beulich
  2026-02-18 15:50   ` Stewart Hildebrand
@ 2026-02-20  8:48   ` Roger Pau Monné
  1 sibling, 0 replies; 21+ messages in thread
From: Roger Pau Monné @ 2026-02-20  8:48 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Stewart Hildebrand

On Tue, Feb 10, 2026 at 11:54:18AM +0100, Jan Beulich wrote:
> Let's keep capability handling together. Start with moving
> vpci_init_capabilities() and its helpers, plus splitting out of its
> cleanup counterpart.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
> vpci_get_register(), while now only used by cap.c, didn't look like it
> would want moving there.

I think it's best to keep the vpci_{get,add,remove}_register{,s}()
functions in the same file.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 4/5] vPCI: move capability-list init
  2026-02-10 10:54 ` [PATCH v4 4/5] vPCI: move capability-list init Jan Beulich
  2026-02-18 20:54   ` Stewart Hildebrand
@ 2026-02-20  8:59   ` Roger Pau Monné
  1 sibling, 0 replies; 21+ messages in thread
From: Roger Pau Monné @ 2026-02-20  8:59 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Stewart Hildebrand

On Tue, Feb 10, 2026 at 11:54:53AM +0100, Jan Beulich wrote:
> ... both for when the functions are invoked and where they live in source.
> Don't invoke them directly in vpci_init_header(), but instead first thing
> in vpci_init_capabilities().
> 
> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Thanks for doing the move.

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed
  2026-02-20  7:13     ` Jan Beulich
@ 2026-02-20  9:08       ` Roger Pau Monné
  0 siblings, 0 replies; 21+ messages in thread
From: Roger Pau Monné @ 2026-02-20  9:08 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stewart Hildebrand, Andrew Cooper, xen-devel@lists.xenproject.org

On Fri, Feb 20, 2026 at 08:13:58AM +0100, Jan Beulich wrote:
> On 19.02.2026 23:21, Stewart Hildebrand wrote:
> > On 2/10/26 05:55, Jan Beulich wrote:
> >> --- a/xen/drivers/vpci/cap.c
> >> +++ b/xen/drivers/vpci/cap.c
> >> @@ -376,6 +379,20 @@ void vpci_cleanup_capabilities(struct pc
> >>      }
> >>  }
> >>  
> >> +int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
> >> +{
> >> +    if ( !pdev->vpci )
> >> +        return 0;
> >> +
> >> +    vpci_cleanup_capabilities(pdev, true);
> > In the case where pdev->ext_cfg transitions from true to false, it doesn't look
> > like this would actually result in the respective capability->cleanup() hook
> > being called, due to reliance on pci_find_ext_capability().
> 
> Hmm, indeed. Yet that's a problem with vpci_cleanup_capabilities(), not
> with the call here. It may have been merely latent until no later than
> b1543cf5751b ("PCI: don't look for ext-caps when there's no extended cfg
> space"). The cleanup hooks themselves (it's only one right now) then
> also may not access their respective capabilities anymore (nor even just
> try to locate them).

Cleanup hooks should be idempotent, so in principle there should be no
need to check whether the capability is present before attempting to
clean it up.  However cleanup_rebar() does check for the position of
the capability, and the MMCFG having disappeared would prevent
cleanup there.  At least that capability needs to be adjusted to cache
the position in the config space and the number of BARs, so that the
cleanup hook doesn't rely on PCI config space accesses to fetch any of
this.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed
  2026-02-10 10:55 ` [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed Jan Beulich
  2026-02-19 22:21   ` Stewart Hildebrand
@ 2026-02-20  9:28   ` Roger Pau Monné
  2026-02-20 10:20     ` Jan Beulich
  1 sibling, 1 reply; 21+ messages in thread
From: Roger Pau Monné @ 2026-02-20  9:28 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Stewart Hildebrand

On Tue, Feb 10, 2026 at 11:55:34AM +0100, Jan Beulich wrote:
> When Dom0 informs us about MMCFG usability, this may change whether
> extended capabilities are available (accessible) for devices. Zap what
> might be on record, and re-initialize things.
> 
> No synchronization is added for the case where devices may already be in
> use. That'll need sorting when (a) DomU support was added and (b) DomU-s
> may run already while Dom0 / hwdom still boots (dom0less, Hyperlaunch).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> vpci_reinit_ext_capabilities()'es return value isn't checked, as it
> doesn't feel quite right to fail the hypercall because of this. At the
> same time it also doesn't feel quite right to have the function return
> "void". Thoughts?

For the non hardwware domain case we could deassign the device from
the domain?

And print a warning message for both cases.

> ---
> v4: Make sure ->cleanup() and ->init() are invoked.
> v3: New.
> 
> --- a/xen/arch/x86/physdev.c
> +++ b/xen/arch/x86/physdev.c
> @@ -8,6 +8,8 @@
>  #include <xen/guest_access.h>
>  #include <xen/iocap.h>
>  #include <xen/serial.h>
> +#include <xen/vpci.h>
> +
>  #include <asm/current.h>
>  #include <asm/io_apic.h>
>  #include <asm/msi.h>
> @@ -169,7 +171,10 @@ int cf_check physdev_check_pci_extcfg(st
>  
>      ASSERT(pdev->seg == info->segment);
>      if ( pdev->bus >= info->start_bus && pdev->bus <= info->end_bus )
> +    {
>          pci_check_extcfg(pdev);
> +        vpci_reinit_ext_capabilities(pdev);
> +    }
>  
>      return 0;
>  }
> --- a/xen/drivers/vpci/cap.c
> +++ b/xen/drivers/vpci/cap.c
> @@ -285,13 +285,16 @@ static int vpci_init_ext_capability_list
>      return 0;
>  }
>  
> -int vpci_init_capabilities(struct pci_dev *pdev)
> +int vpci_init_capabilities(struct pci_dev *pdev, bool ext_only)
>  {
>      int rc;
>  
> -    rc = vpci_init_capability_list(pdev);
> -    if ( rc )
> -        return rc;
> +    if ( !ext_only )
> +    {
> +        rc = vpci_init_capability_list(pdev);
> +        if ( rc )
> +            return rc;
> +    }
>  
>      rc = vpci_init_ext_capability_list(pdev);
>      if ( rc )
> @@ -305,7 +308,7 @@ int vpci_init_capabilities(struct pci_de
>          unsigned int pos = 0;
>  
>          if ( !is_ext )
> -            pos = pci_find_cap_offset(pdev->sbdf, cap);
> +            pos = !ext_only ? pci_find_cap_offset(pdev->sbdf, cap) : 0;
>          else if ( is_hardware_domain(pdev->domain) )
>              pos = pci_find_ext_capability(pdev, cap);
>  
> @@ -349,7 +352,7 @@ int vpci_init_capabilities(struct pci_de
>      return 0;
>  }
>  
> -void vpci_cleanup_capabilities(struct pci_dev *pdev)
> +void vpci_cleanup_capabilities(struct pci_dev *pdev, bool ext_only)
>  {
>      for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
>      {
> @@ -361,7 +364,7 @@ void vpci_cleanup_capabilities(struct pc
>              continue;
>  
>          if ( !capability->is_ext )
> -            pos = pci_find_cap_offset(pdev->sbdf, cap);
> +            pos = !ext_only ? pci_find_cap_offset(pdev->sbdf, cap) : 0;
>          else if ( is_hardware_domain(pdev->domain) )
>              pos = pci_find_ext_capability(pdev, cap);
>          if ( pos )
> @@ -376,6 +379,20 @@ void vpci_cleanup_capabilities(struct pc
>      }
>  }
>  
> +int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
> +{
> +    if ( !pdev->vpci )
> +        return 0;
> +
> +    vpci_cleanup_capabilities(pdev, true);
> +
> +    if ( vpci_remove_registers(pdev->vpci, PCI_CFG_SPACE_SIZE,
> +                               PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) )
> +        ASSERT_UNREACHABLE();
> +
> +    return vpci_init_capabilities(pdev, true);

I wonder here, in the context here, where the device is already
assigned to a domain you likely need to take the vPCI lock to safely
perform (parts of?) the cleanup and reinit.  Otherwise you could free
capability data while it's being accessed by the handlers.

The only current extended capability (reBAR) doesn't have any internal
state to free on cleanup, so it's all safe.  But a cleanup like the
MSI(-X) ones would be racy, as they free the structure without holding
the vPCI lock.  I think we need to be careful, and possibly adjust the
cleanup functions so they can tolerate cleanup with possible
concurrent accesses.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed
  2026-02-20  9:28   ` Roger Pau Monné
@ 2026-02-20 10:20     ` Jan Beulich
  2026-02-20 10:53       ` Roger Pau Monné
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2026-02-20 10:20 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Stewart Hildebrand

On 20.02.2026 10:28, Roger Pau Monné wrote:
> On Tue, Feb 10, 2026 at 11:55:34AM +0100, Jan Beulich wrote:
>> When Dom0 informs us about MMCFG usability, this may change whether
>> extended capabilities are available (accessible) for devices. Zap what
>> might be on record, and re-initialize things.
>>
>> No synchronization is added for the case where devices may already be in
>> use. That'll need sorting when (a) DomU support was added and (b) DomU-s
>> may run already while Dom0 / hwdom still boots (dom0less, Hyperlaunch).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> vpci_reinit_ext_capabilities()'es return value isn't checked, as it
>> doesn't feel quite right to fail the hypercall because of this. At the
>> same time it also doesn't feel quite right to have the function return
>> "void". Thoughts?
> 
> For the non hardwware domain case we could deassign the device from
> the domain?

Will need to check. De-assigning is generally done only from domctl context,
I think. I'm also uncertain what other things may break (in Xen or the
toolstacks) if we take away a device in such a pretty much uncontrolled way.

> And print a warning message for both cases.

Can do, albeit I'm unsure what "both" refers to - I see only ...

>> --- a/xen/arch/x86/physdev.c
>> +++ b/xen/arch/x86/physdev.c
>> @@ -8,6 +8,8 @@
>>  #include <xen/guest_access.h>
>>  #include <xen/iocap.h>
>>  #include <xen/serial.h>
>> +#include <xen/vpci.h>
>> +
>>  #include <asm/current.h>
>>  #include <asm/io_apic.h>
>>  #include <asm/msi.h>
>> @@ -169,7 +171,10 @@ int cf_check physdev_check_pci_extcfg(st
>>  
>>      ASSERT(pdev->seg == info->segment);
>>      if ( pdev->bus >= info->start_bus && pdev->bus <= info->end_bus )
>> +    {
>>          pci_check_extcfg(pdev);
>> +        vpci_reinit_ext_capabilities(pdev);
>> +    }

... this.

>> @@ -376,6 +379,20 @@ void vpci_cleanup_capabilities(struct pc
>>      }
>>  }
>>  
>> +int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
>> +{
>> +    if ( !pdev->vpci )
>> +        return 0;
>> +
>> +    vpci_cleanup_capabilities(pdev, true);
>> +
>> +    if ( vpci_remove_registers(pdev->vpci, PCI_CFG_SPACE_SIZE,
>> +                               PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) )
>> +        ASSERT_UNREACHABLE();
>> +
>> +    return vpci_init_capabilities(pdev, true);
> 
> I wonder here, in the context here, where the device is already
> assigned to a domain you likely need to take the vPCI lock to safely
> perform (parts of?) the cleanup and reinit.  Otherwise you could free
> capability data while it's being accessed by the handlers.

The lock isn't recursive, so I fear we'd deadlock if it was taken here.
Furthermore this falls into "DomU support needs dealing with"; right
now we assume Dom0 tells us about its final MCFG verdict ahead of
putting devices in use. Once we need to consider devices already in
use, I think we would further need to pause the owning domain. Also ...

> The only current extended capability (reBAR) doesn't have any internal
> state to free on cleanup, so it's all safe.  But a cleanup like the
> MSI(-X) ones would be racy, as they free the structure without holding
> the vPCI lock.  I think we need to be careful, and possibly adjust the
> cleanup functions so they can tolerate cleanup with possible
> concurrent accesses.

... to cover such. (For something like MSI(-X) it might then further be
necessary to mask/disable interrupts, but hopefully we'll never have to
deal with extended capabilities that would require this.)

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed
  2026-02-20 10:20     ` Jan Beulich
@ 2026-02-20 10:53       ` Roger Pau Monné
  0 siblings, 0 replies; 21+ messages in thread
From: Roger Pau Monné @ 2026-02-20 10:53 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Stewart Hildebrand

On Fri, Feb 20, 2026 at 11:20:52AM +0100, Jan Beulich wrote:
> On 20.02.2026 10:28, Roger Pau Monné wrote:
> > On Tue, Feb 10, 2026 at 11:55:34AM +0100, Jan Beulich wrote:
> >> When Dom0 informs us about MMCFG usability, this may change whether
> >> extended capabilities are available (accessible) for devices. Zap what
> >> might be on record, and re-initialize things.
> >>
> >> No synchronization is added for the case where devices may already be in
> >> use. That'll need sorting when (a) DomU support was added and (b) DomU-s
> >> may run already while Dom0 / hwdom still boots (dom0less, Hyperlaunch).
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> vpci_reinit_ext_capabilities()'es return value isn't checked, as it
> >> doesn't feel quite right to fail the hypercall because of this. At the
> >> same time it also doesn't feel quite right to have the function return
> >> "void". Thoughts?
> > 
> > For the non hardwware domain case we could deassign the device from
> > the domain?
> 
> Will need to check. De-assigning is generally done only from domctl context,
> I think. I'm also uncertain what other things may break (in Xen or the
> toolstacks) if we take away a device in such a pretty much uncontrolled way.
> 
> > And print a warning message for both cases.
> 
> Can do, albeit I'm unsure what "both" refers to - I see only ...

Oh, I mean print a warning message for both the hardware domain and
the domU case.

> >> --- a/xen/arch/x86/physdev.c
> >> +++ b/xen/arch/x86/physdev.c
> >> @@ -8,6 +8,8 @@
> >>  #include <xen/guest_access.h>
> >>  #include <xen/iocap.h>
> >>  #include <xen/serial.h>
> >> +#include <xen/vpci.h>
> >> +
> >>  #include <asm/current.h>
> >>  #include <asm/io_apic.h>
> >>  #include <asm/msi.h>
> >> @@ -169,7 +171,10 @@ int cf_check physdev_check_pci_extcfg(st
> >>  
> >>      ASSERT(pdev->seg == info->segment);
> >>      if ( pdev->bus >= info->start_bus && pdev->bus <= info->end_bus )
> >> +    {
> >>          pci_check_extcfg(pdev);
> >> +        vpci_reinit_ext_capabilities(pdev);
> >> +    }
> 
> ... this.
> 
> >> @@ -376,6 +379,20 @@ void vpci_cleanup_capabilities(struct pc
> >>      }
> >>  }
> >>  
> >> +int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
> >> +{
> >> +    if ( !pdev->vpci )
> >> +        return 0;
> >> +
> >> +    vpci_cleanup_capabilities(pdev, true);
> >> +
> >> +    if ( vpci_remove_registers(pdev->vpci, PCI_CFG_SPACE_SIZE,
> >> +                               PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) )
> >> +        ASSERT_UNREACHABLE();
> >> +
> >> +    return vpci_init_capabilities(pdev, true);
> > 
> > I wonder here, in the context here, where the device is already
> > assigned to a domain you likely need to take the vPCI lock to safely
> > perform (parts of?) the cleanup and reinit.  Otherwise you could free
> > capability data while it's being accessed by the handlers.
> 
> The lock isn't recursive, so I fear we'd deadlock if it was taken here.

Yeah, that's why I've put the "parts of" remark above, some of those
functions already take the lock themselves.

> Furthermore this falls into "DomU support needs dealing with"; right
> now we assume Dom0 tells us about its final MCFG verdict ahead of
> putting devices in use. Once we need to consider devices already in
> use, I think we would further need to pause the owning domain. Also ...

Agreed.  So that this is clear, you might add an ASSERT() that the
device is not assigned to a non-hardware domain when this reinit
happens?

> > The only current extended capability (reBAR) doesn't have any internal
> > state to free on cleanup, so it's all safe.  But a cleanup like the
> > MSI(-X) ones would be racy, as they free the structure without holding
> > the vPCI lock.  I think we need to be careful, and possibly adjust the
> > cleanup functions so they can tolerate cleanup with possible
> > concurrent accesses.
> 
> ... to cover such. (For something like MSI(-X) it might then further be
> necessary to mask/disable interrupts, but hopefully we'll never have to
> deal with extended capabilities that would require this.)

Indeed.  Something like MSI-X would be extra fun to deal with, but
sooner or later we will need to handle it if we ever plan to support
hot-unplug from domUs.

In fact, the current cleanup_msi{,x}() helpers should already remove any
interrupt bindings and free any mapped pIRQs, otherwise we rely on
the PVH hardware domain having cleaned up the device properly before it
being assigned to another domain?  (Not that you need to do it here,
just realized from your comment)

Thanks, Roger


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2026-02-20 10:54 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-10 10:51 [PATCH v4 0/5] (v)PCI: extended capability handling Jan Beulich
2026-02-10 10:52 ` [PATCH v4 1/5] x86/PCI: avoid re-evaluation of extended config space accessibility Jan Beulich
2026-02-10 15:59   ` Stewart Hildebrand
2026-02-10 10:53 ` [PATCH v4 2/5] vPCI: introduce private header Jan Beulich
2026-02-17 22:04   ` Stewart Hildebrand
2026-02-18  8:52     ` Jan Beulich
2026-02-18 21:58   ` Stewart Hildebrand
2026-02-19  6:26     ` Jan Beulich
2026-02-10 10:54 ` [PATCH v4 3/5] vPCI: move vpci_init_capabilities() to a separate file Jan Beulich
2026-02-18 15:50   ` Stewart Hildebrand
2026-02-20  8:48   ` Roger Pau Monné
2026-02-10 10:54 ` [PATCH v4 4/5] vPCI: move capability-list init Jan Beulich
2026-02-18 20:54   ` Stewart Hildebrand
2026-02-20  8:59   ` Roger Pau Monné
2026-02-10 10:55 ` [PATCH v4 5/5] vPCI: re-init extended-capabilities when MMCFG availability changed Jan Beulich
2026-02-19 22:21   ` Stewart Hildebrand
2026-02-20  7:13     ` Jan Beulich
2026-02-20  9:08       ` Roger Pau Monné
2026-02-20  9:28   ` Roger Pau Monné
2026-02-20 10:20     ` Jan Beulich
2026-02-20 10:53       ` Roger Pau Monné

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.