From: Tushar Dave <tdave@nvidia.com>
To: qemu-devel@nongnu.org
Cc: alwilliamson@nvidia.com, jgg@nvidia.com, skolothumtho@nvidia.com,
qemu-arm@nongnu.org, peter.maydell@linaro.org, mst@redhat.com,
marcel.apfelbaum@gmail.com, devel@edk2.groups.io
Subject: [RFC PATCH 4/8] hw/pci: pack remaining BARs and update bridge windows
Date: Fri, 8 May 2026 13:37:13 -0500 [thread overview]
Message-ID: <20260508183717.193630-5-tdave@nvidia.com> (raw)
In-Reply-To: <20260508183717.193630-1-tdave@nvidia.com>
Extend the fixed BAR allocator to handle remaining 64-bit prefetchable
BARs after fixed BAR placement.
For each bus with fixed BAR devices, collect fixed and unassigned BARs,
compute available MMIO64 holes considering both local fixed BAR anchors
and globally claimed regions, and select an appropriate region to pack
remaining BARs.
Remaining BARs are sorted by size and packed into the selected hole
using a greedy placement strategy. Fixed BAR placement is preserved,
and all allocations are tracked via the global claim list.
After BAR placement, update the PCI bridge prefetchable window to cover
both fixed and dynamically assigned BAR ranges, ensuring firmware sees
a consistent MMIO layout.
This implements the second phase of the allocator that does dynamic BAR
placement and bridge window sizing for buses with fixed BAR constraints.
Signed-off-by: Tushar Dave <tdave@nvidia.com>
---
hw/pci/pci-resource.c | 404 +++++++++++++++++++++++++++++++++++++++++-
hw/pci/pci-resource.h | 17 ++
2 files changed, 420 insertions(+), 1 deletion(-)
diff --git a/hw/pci/pci-resource.c b/hw/pci/pci-resource.c
index 5e9a78ec16..de98924aa6 100644
--- a/hw/pci/pci-resource.c
+++ b/hw/pci/pci-resource.c
@@ -7,6 +7,7 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "qemu/bitops.h"
#include "qemu/range.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bridge.h"
@@ -158,6 +159,404 @@ static void pci_program_prefetch_bars(PCIDevice *dev, PhysBAR *pbars)
}
}
+static void pci_update_prefetch_window(PCIBus *bus, uint64_t base, uint64_t limit)
+{
+ PCIDevice *bridge = pci_bridge_get_device(bus);
+ uint32_t reg_base, reg_limit;
+
+ assert(bridge);
+
+ reg_base = (uint32_t)(extract64(base, 20, 12) << 4);
+ reg_limit = (uint32_t)(extract64(limit, 20, 12) << 4);
+ pci_host_config_write_common(bridge,
+ PCI_PREF_MEMORY_BASE,
+ pci_config_size(bridge),
+ reg_base | PCI_PREF_RANGE_TYPE_64,
+ 2);
+ pci_host_config_write_common(bridge,
+ PCI_PREF_BASE_UPPER32,
+ pci_config_size(bridge),
+ (uint32_t)(base >> 32),
+ 4);
+ pci_host_config_write_common(bridge,
+ PCI_PREF_MEMORY_LIMIT,
+ pci_config_size(bridge),
+ reg_limit | PCI_PREF_RANGE_TYPE_64,
+ 2);
+ pci_host_config_write_common(bridge,
+ PCI_PREF_LIMIT_UPPER32,
+ pci_config_size(bridge),
+ (uint32_t)(limit >> 32),
+ 4);
+}
+
+static inline bool is_64bit_pref_bar(PCIIORegion *r)
+{
+ if (!r->size) {
+ return false;
+ }
+ if (r->type & PCI_BASE_ADDRESS_SPACE_IO) {
+ return false;
+ }
+ if (!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64)) {
+ return false;
+ }
+ if (!(r->type & PCI_BASE_ADDRESS_MEM_PREFETCH)) {
+ return false;
+ }
+ return true;
+}
+
+/* Comparison function for sorting intervals by start address */
+static int compare_intervals(gconstpointer a, gconstpointer b)
+{
+ const AddressInterval *ia = (const AddressInterval *)a;
+ const AddressInterval *ib = (const AddressInterval *)b;
+ if (ia->start < ib->start) return -1;
+ if (ia->start > ib->start) return 1;
+ return 0;
+}
+
+/* Comparison function for sorting BARs by descending size */
+static int compare_bar_size_desc(gconstpointer a, gconstpointer b)
+{
+ const BarEntry *ea = (const BarEntry *)a;
+ const BarEntry *eb = (const BarEntry *)b;
+ if (ea->size > eb->size) return -1;
+ if (ea->size < eb->size) return 1;
+ return 0;
+}
+
+/* Categorize holes relative to anchors */
+static CategorizedHoles categorize_holes(GArray *holes, GArray *fixed_bars)
+{
+ CategorizedHoles result = {
+ .leftmost_hole = -1,
+ .middle_holes = g_array_new(false, false, sizeof(int)),
+ .rightmost_hole = -1
+ };
+
+ /* Get anchor boundaries */
+ uint64_t first_anchor_start = g_array_index(fixed_bars, AddressInterval, 0).start;
+ uint64_t last_anchor_end = g_array_index(fixed_bars, AddressInterval,
+ fixed_bars->len - 1).end;
+ /* Categorize each hole */
+ for (guint h = 0; h < holes->len; h++) {
+ AddressInterval *hole = &g_array_index(holes, AddressInterval, h);
+
+ if (hole->end < first_anchor_start) {
+ result.leftmost_hole = h; /* Before all anchors */
+ } else if (hole->start > last_anchor_end) {
+ result.rightmost_hole = h; /* After all anchors */
+ } else {
+ g_array_append_val(result.middle_holes, h); /* Between anchors */
+ }
+ }
+ return result;
+}
+
+/*
+ * Compute REAL holes considering both local anchors and global claims.
+ * This returns actual free space that can be used for packing.
+ * Strategy: Collect all obstacles (local fixed BARs + global claims from
+ * other buses), then compute gaps between them.
+ */
+static GArray* compute_real_holes(GArray *fixed_bars, uint64_t mmio_start, uint64_t mmio_end)
+{
+ GArray *holes = g_array_new(false, false, sizeof(AddressInterval));
+ GArray *claimed_regions = g_array_new(false, false, sizeof(AddressInterval));
+ uint64_t scan;
+
+ /* Add local fixed BARs (anchors) as claimed regions */
+ for (guint i = 0; i < fixed_bars->len; i++) {
+ AddressInterval *anchor = &g_array_index(fixed_bars, AddressInterval, i);
+ g_array_append_val(claimed_regions, *anchor);
+ }
+
+ /* Add global claims from ALL buses (including other buses) */
+ if (fixed_claim_regions) {
+ for (guint i = 0; i < fixed_claim_regions->len; i++) {
+ FixedClaim *claim = &g_array_index(fixed_claim_regions, FixedClaim, i);
+ /* Only consider claims within our MMIO window */
+ if (claim->start <= mmio_end && claim->end >= mmio_start) {
+ AddressInterval region = {
+ .start = claim->start,
+ .end = claim->end
+ };
+ g_array_append_val(claimed_regions, region);
+ }
+ }
+ }
+
+ /* Handle case with no claimed regions */
+ if (claimed_regions->len == 0) {
+ AddressInterval hole = { .start = mmio_start, .end = mmio_end };
+ g_array_append_val(holes, hole);
+ g_array_free(claimed_regions, true);
+ return holes;
+ }
+
+ /* Sort claimed regions by start address */
+ g_array_sort(claimed_regions, compare_intervals);
+
+ /* Compute holes between all claimed regions */
+ scan = mmio_start;
+ for (guint i = 0; i < claimed_regions->len; i++) {
+ AddressInterval *claimed = &g_array_index(claimed_regions, AddressInterval, i);
+
+ /* Free space before this claimed region */
+ if (scan < claimed->start) {
+ AddressInterval hole = { .start = scan, .end = claimed->start - 1 };
+ g_array_append_val(holes, hole);
+ }
+
+ /* Move scan cursor past this claimed region */
+ scan = MAX(scan, claimed->end + 1);
+ }
+
+ /* Free space after last claimed region */
+ if (scan <= mmio_end) {
+ AddressInterval hole = { .start = scan, .end = mmio_end };
+ g_array_append_val(holes, hole);
+ }
+
+ g_array_free(claimed_regions, true);
+ return holes;
+}
+
+static bool pack_bars_into_region(GArray *bars, uint64_t pack_start, uint64_t pack_end,
+ uint64_t *out_min_addr, uint64_t *out_max_addr)
+{
+ uint64_t pack_cursor = pack_start;
+ uint64_t min_addr = UINT64_MAX;
+ uint64_t max_addr = 0;
+
+ for (guint i = 0; i < bars->len; i++) {
+ BarEntry *e = &g_array_index(bars, BarEntry, i);
+ PCIIORegion *r = &e->dev->io_regions[e->bar_idx];
+
+ uint64_t aligned_addr = ROUND_UP(pack_cursor, r->size);
+ uint64_t bar_start = aligned_addr;
+ uint64_t bar_end = bar_start + r->size - 1;
+
+ if (bar_end > pack_end) {
+ return false; /* Doesn't fit */
+ }
+
+ PhysBAR pbars_array[PCI_ROM_SLOT];
+ memset(pbars_array, 0, sizeof(pbars_array));
+ pbars_array[e->bar_idx].addr = bar_start;
+ pbars_array[e->bar_idx].end = bar_end;
+ pbars_array[e->bar_idx].flags = IORESOURCE_PREFETCH;
+
+ pci_program_prefetch_bars(e->dev, pbars_array);
+
+ min_addr = MIN(min_addr, bar_start);
+ max_addr = MAX(max_addr, bar_end);
+ pack_cursor = bar_end + 1;
+ }
+
+ *out_min_addr = min_addr;
+ *out_max_addr = max_addr;
+ return true;
+}
+
+static void finalize_bridge_window(PCIBus *bus, uint64_t min_addr, uint64_t max_addr)
+{
+ PCIDevice *bridge_dev = pci_bridge_get_device(bus);
+
+ if (bridge_dev) {
+ fixed_claim_regions_add(min_addr, max_addr, bridge_dev, -1);
+ pci_update_prefetch_window(bus, min_addr, max_addr);
+ }
+}
+
+static bool pci_bus_phase2_fill_bar_lists(PCIBus *bus, PciProgramCtx *pctx,
+ GArray *fixed_bars, GArray *remaining_bars)
+{
+ AddressInterval interval;
+ BarEntry bentry;
+ PCIDevice *d;
+ PCIIORegion *r;
+ bool bus_has_fixed = false;
+ bool device_has_fixed;
+ int devfn, i;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ d = bus->devices[devfn];
+ if (!d) {
+ continue;
+ }
+ device_has_fixed = g_hash_table_contains(pctx->had_fixed, d);
+ if (device_has_fixed) {
+ bus_has_fixed = true;
+ }
+ for (i = 0; i < PCI_ROM_SLOT; i++) {
+ r = &d->io_regions[i];
+ if (!is_64bit_pref_bar(r)) {
+ continue;
+ }
+ if (device_has_fixed && d->fixed_bar_addrs &&
+ d->fixed_bar_addrs[i] != PCI_BAR_UNMAPPED) {
+ interval.start = d->fixed_bar_addrs[i];
+ interval.end = d->fixed_bar_addrs[i] + r->size - 1;
+ g_array_append_val(fixed_bars, interval);
+ } else {
+ bentry.dev = d;
+ bentry.bar_idx = i;
+ bentry.size = r->size;
+ g_array_append_val(remaining_bars, bentry);
+ }
+ }
+ }
+ return bus_has_fixed;
+}
+
+/* Find a mmio64 hole, pack unassigned BARs and program the bridge */
+static void
+pci_bus_phase2_hole_pack_and_update_bridge(PCIBus *bus, GArray *fixed_bars,
+ GArray *remaining_bars,
+ uint64_t mmio_start,
+ uint64_t mmio_end)
+{
+ GArray *holes;
+ FixedClaim *claim;
+ CategorizedHoles cat;
+ AddressInterval *holep, *selected;
+ int selected_hole, largest_middle, h_idx;
+ guint c, mid_i, f;
+ uint64_t bus_min_addr, bus_max_addr, remaining_demand;
+ uint64_t leftmost_anchor, rightmost_anchor_end, valid_start, valid_end;
+ uint64_t largest_size, hole_size, pack_start, pack_end;
+
+ g_array_sort(fixed_bars, compare_intervals);
+ g_array_sort(remaining_bars, compare_bar_size_desc);
+
+ remaining_demand = 0;
+ for (c = 0; c < remaining_bars->len; c++) {
+ remaining_demand += g_array_index(remaining_bars, BarEntry, c).size;
+ }
+
+ leftmost_anchor = g_array_index(fixed_bars, AddressInterval, 0).start;
+ rightmost_anchor_end = g_array_index(fixed_bars, AddressInterval,
+ fixed_bars->len - 1).end;
+
+ valid_start = mmio_start;
+ valid_end = mmio_end;
+
+ if (fixed_claim_regions) {
+ for (c = 0; c < fixed_claim_regions->len; c++) {
+ claim = &g_array_index(fixed_claim_regions, FixedClaim, c);
+ if (claim->end < leftmost_anchor && claim->end >= valid_start) {
+ valid_start = claim->end + 1;
+ }
+ if (claim->start > rightmost_anchor_end && claim->start <= valid_end) {
+ valid_end = claim->start - 1;
+ }
+ }
+ }
+
+ holes = compute_real_holes(fixed_bars, valid_start, valid_end);
+ cat = categorize_holes(holes, fixed_bars);
+
+ selected_hole = -1;
+ pack_start = 0;
+ pack_end = 0;
+
+ if (cat.middle_holes->len > 0) {
+ largest_middle = -1;
+ largest_size = 0;
+ for (mid_i = 0; mid_i < cat.middle_holes->len; mid_i++) {
+ h_idx = g_array_index(cat.middle_holes, int, mid_i);
+ holep = &g_array_index(holes, AddressInterval, h_idx);
+ hole_size = holep->end - holep->start + 1;
+ if (hole_size >= remaining_demand && hole_size > largest_size) {
+ largest_size = hole_size;
+ largest_middle = h_idx;
+ }
+ }
+ if (largest_middle >= 0) {
+ selected_hole = largest_middle;
+ }
+ }
+ if (selected_hole < 0 && cat.rightmost_hole >= 0) {
+ holep = &g_array_index(holes, AddressInterval, cat.rightmost_hole);
+ hole_size = holep->end - holep->start + 1;
+ if (hole_size >= remaining_demand) {
+ selected_hole = cat.rightmost_hole;
+ }
+ }
+ if (selected_hole < 0 && cat.leftmost_hole >= 0) {
+ holep = &g_array_index(holes, AddressInterval, cat.leftmost_hole);
+ hole_size = holep->end - holep->start + 1;
+ if (hole_size >= remaining_demand) {
+ selected_hole = cat.leftmost_hole;
+ }
+ }
+ g_array_free(cat.middle_holes, true);
+ if (selected_hole < 0) {
+ error_report("bus [%02x] insufficient contiguous space for "
+ "remaining_demand=0x%"PRIx64,
+ pci_bus_num(bus), remaining_demand);
+ g_array_free(holes, true);
+ g_array_free(fixed_bars, true);
+ g_array_free(remaining_bars, true);
+ exit(1);
+ }
+ selected = &g_array_index(holes, AddressInterval, selected_hole);
+ pack_start = selected->start;
+ pack_end = selected->end;
+ g_array_free(holes, true);
+ if (!pack_bars_into_region(remaining_bars, pack_start, pack_end,
+ &bus_min_addr, &bus_max_addr)) {
+ error_report("bus [%02x] failed to pack BARs", pci_bus_num(bus));
+ g_array_free(fixed_bars, true);
+ g_array_free(remaining_bars, true);
+ exit(1);
+ }
+ for (f = 0; f < fixed_bars->len; f++) {
+ holep = &g_array_index(fixed_bars, AddressInterval, f);
+ bus_min_addr = MIN(bus_min_addr, holep->start);
+ bus_max_addr = MAX(bus_max_addr, holep->end);
+ }
+ finalize_bridge_window(bus, bus_min_addr, bus_max_addr);
+ g_array_free(fixed_bars, true);
+ g_array_free(remaining_bars, true);
+}
+
+static void pci_bus_phase2_pack_remaining_bars(PCIBus *bus, void *opaque)
+{
+ PciProgramCtx *pctx = (PciProgramCtx *)opaque;
+ GArray *fixed_bars, *remaining_bars;
+ uint64_t mmio_start, mmio_end, bus_min_addr, bus_max_addr;
+ bool bus_has_fixed;
+
+ mmio_start = pctx->mmio64_base;
+ mmio_end = pctx->mmio64_base + pctx->mmio64_size - 1;
+ fixed_bars = g_array_new(false, false, sizeof(AddressInterval));
+ remaining_bars = g_array_new(false, false, sizeof(BarEntry));
+ bus_has_fixed = pci_bus_phase2_fill_bar_lists(bus, pctx, fixed_bars,
+ remaining_bars);
+ if (!bus_has_fixed) {
+ g_array_free(fixed_bars, true);
+ g_array_free(remaining_bars, true);
+ return;
+ }
+ if (remaining_bars->len == 0) {
+ if (fixed_bars->len > 0) {
+ g_array_sort(fixed_bars, compare_intervals);
+ bus_min_addr = g_array_index(fixed_bars, AddressInterval, 0).start;
+ bus_max_addr = g_array_index(fixed_bars, AddressInterval,
+ fixed_bars->len - 1).end;
+ finalize_bridge_window(bus, bus_min_addr, bus_max_addr);
+ }
+ g_array_free(fixed_bars, true);
+ g_array_free(remaining_bars, true);
+ return;
+ }
+ pci_bus_phase2_hole_pack_and_update_bridge(bus, fixed_bars, remaining_bars,
+ mmio_start, mmio_end);
+}
/* Phase 1: claim and program fixed BARs for one device (per-device callback) */
static void pci_dev_claim_and_program_fixed_bars(PCIBus *bus, PCIDevice *dev, void *opaque)
{
@@ -247,7 +646,10 @@ void pci_fixed_bar_allocator(PCIBus *root, const PciFixedBarMmioParams *mmio)
/* Phase 1: program all fixed BARs and claim them */
pci_for_each_bus(bus, pci_bus_claim_and_program_fixed_bars, &pctx);
- /* TODOs: Phases 2–3, program remaining BARs, bridge window refresh etc,. */
+ /* Phase 2: pack remaining 64-bit prefetchable BARs and size parent bridge window */
+ pci_for_each_bus(bus, pci_bus_phase2_pack_remaining_bars, &pctx);
+
+ /* Phase 3: buses with no fixed-BAR devices; final bridge pass: follow-up */
/* Cleanup */
g_hash_table_destroy(pctx.had_fixed);
diff --git a/hw/pci/pci-resource.h b/hw/pci/pci-resource.h
index cc4d6f71cb..5155a7cefa 100644
--- a/hw/pci/pci-resource.h
+++ b/hw/pci/pci-resource.h
@@ -47,6 +47,23 @@ typedef struct FixedClaim {
int bar;
} FixedClaim;
+typedef struct {
+ uint64_t start;
+ uint64_t end;
+} AddressInterval;
+
+typedef struct {
+ PCIDevice *dev;
+ int bar_idx;
+ uint64_t size;
+} BarEntry;
+
+typedef struct {
+ int leftmost_hole; /* Index of hole before first anchor, or -1 */
+ GArray *middle_holes; /* Array of hole indices between anchors */
+ int rightmost_hole; /* Index of hole after last anchor, or -1 */
+} CategorizedHoles;
+
typedef struct {
hwaddr mmio64_base;
hwaddr mmio64_size;
--
2.34.1
next prev parent reply other threads:[~2026-05-08 20:43 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-08 18:37 [RFC PATCH 0/8] hw/arm/virt, hw/pci: PCI pre-enumeration and fixed BAR allocation Tushar Dave
2026-05-08 18:37 ` [RFC PATCH 1/8] hw/pci: add fixed-bars property to allow fixed BAR addresses Tushar Dave
2026-05-08 18:37 ` [RFC PATCH 2/8] hw/pci: enumerate PCI bus and program bridge bus numbers Tushar Dave
2026-05-08 18:37 ` [RFC PATCH 3/8] hw/pci: introduce allocator for fixed BAR placement Tushar Dave
2026-05-08 18:37 ` Tushar Dave [this message]
2026-05-08 18:37 ` [RFC PATCH 5/8] hw/pci: allocate remaining BARs for buses without fixed BARs Tushar Dave
2026-05-08 18:37 ` [RFC PATCH 6/8] hw/pci: finalize bridge prefetch windows after BAR allocation Tushar Dave
2026-05-08 18:37 ` [RFC PATCH 7/8] hw/arm/virt: add pcie-mmio-window machine property Tushar Dave
2026-05-08 18:37 ` [RFC PATCH 8/8] hw/arm/virt: add pci-pre-enum " Tushar Dave
2026-05-11 7:46 ` [RFC PATCH 0/8] hw/arm/virt, hw/pci: PCI pre-enumeration and fixed BAR allocation Peter Maydell
2026-05-11 12:26 ` Jason Gunthorpe
2026-05-11 18:38 ` Mohamed Mediouni
2026-05-11 20:28 ` Jason Gunthorpe
2026-05-11 9:09 ` Michael S. Tsirkin
2026-05-11 18:10 ` Tushar Dave
2026-05-11 22:09 ` Michael S. Tsirkin
2026-05-11 11:43 ` [edk2-devel] " Ard Biesheuvel
2026-05-12 17:25 ` Tushar Dave
2026-05-12 23:06 ` Alex Williamson
2026-05-12 23:12 ` Michael S. Tsirkin
2026-05-12 23:57 ` Alex Williamson
2026-05-13 11:36 ` Jason Gunthorpe
2026-05-13 14:25 ` Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260508183717.193630-5-tdave@nvidia.com \
--to=tdave@nvidia.com \
--cc=alwilliamson@nvidia.com \
--cc=devel@edk2.groups.io \
--cc=jgg@nvidia.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=mst@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=skolothumtho@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.