* [PATCH v4 1/4] vpci: Use pervcpu ranges for BAR mapping
2026-04-06 19:11 [PATCH v4 0/4] vpci: allow 32-bit BAR writes with memory decoding enabled Stewart Hildebrand
@ 2026-04-06 19:11 ` Stewart Hildebrand
2026-04-16 14:26 ` Jan Beulich
2026-04-16 15:29 ` Jan Beulich
2026-04-06 19:11 ` [PATCH v4 2/4] vpci: allow queueing of mapping operations Stewart Hildebrand
` (2 subsequent siblings)
3 siblings, 2 replies; 14+ messages in thread
From: Stewart Hildebrand @ 2026-04-06 19:11 UTC (permalink / raw)
To: xen-devel
Cc: Mykyta Poturai, Andrew Cooper, Anthony PERARD, Michal Orzel,
Jan Beulich, Julien Grall, Roger Pau Monné,
Stefano Stabellini, Stewart Hildebrand, Mykyta Poturai
From: Mykyta Poturai <Mykyta_Poturai@epam.com>
There is no need to store ranges for each PCI device, as they are only
used during the mapping/unmapping process and can be reused for each
device. This also allows to avoid the need to allocate and destroy
rangesets for each device.
Move the rangesets from struct vpci_bar to struct vpci_vcpu and perform
(de-)allocation with vcpu (de-)allocation. Introduce RANGESET_DESTROY()
macro to free a rangeset and set the pointer to NULL.
Amends: 622bdd962822 ("vpci/header: handle p2m range sets per BAR")
Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
It seems a bit awkward to introduce various vpci vcpu alloc/dealloc
functions here only to undo most of it in the next patch. Thoughts on
folding the next patch into this one?
v3->v4:
* no change
v2->v3:
* new patch in this series, borrowed from [1]
* add Amends tag
* remove unused variable i due to rebasing over 998060dd9101 ("vPCI:
move vpci_init_capabilities() to a separate file")
* enclose entire struct vpci_vcpu inside #ifdef __XEN__
* s/bar_mem/mem/
* use ARRAY_SIZE
* put init/destroy in functions
* only allocate for domains with vPCI and idle domain
* replace 'if ( !mem ) continue;' with ASSERT
v1->v2 (in SR-IOV series [1]):
* new patch
[1] https://lore.kernel.org/xen-devel/cover.1772806036.git.mykyta_poturai@epam.com/T/#t
---
xen/common/domain.c | 5 +++
xen/drivers/vpci/header.c | 67 ++++++++++++++------------------------
xen/drivers/vpci/vpci.c | 36 +++++++++++++++++---
xen/include/xen/rangeset.h | 7 ++++
xen/include/xen/vpci.h | 10 ++++--
5 files changed, 75 insertions(+), 50 deletions(-)
diff --git a/xen/common/domain.c b/xen/common/domain.c
index bb9e210c2895..5ef7db8f0960 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -455,6 +455,8 @@ static int vcpu_teardown(struct vcpu *v)
*/
static void vcpu_destroy(struct vcpu *v)
{
+ vpci_vcpu_destroy(v);
+
free_vcpu_struct(v);
}
@@ -512,6 +514,9 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
if ( arch_vcpu_create(v) != 0 )
goto fail_sched;
+ if ( vpci_vcpu_init(v) )
+ goto fail_sched;
+
d->vcpu[vcpu_id] = v;
if ( vcpu_id != 0 )
{
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index a760d8c32fd6..89dce932f3b1 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -196,6 +196,7 @@ bool vpci_process_pending(struct vcpu *v)
for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
{
struct vpci_bar *bar = &header->bars[i];
+ struct rangeset *mem = v->vpci.mem[i];
struct map_data data = {
.d = v->domain,
.map = v->vpci.cmd & PCI_COMMAND_MEMORY,
@@ -203,10 +204,10 @@ bool vpci_process_pending(struct vcpu *v)
};
int rc;
- if ( rangeset_is_empty(bar->mem) )
+ if ( rangeset_is_empty(mem) )
continue;
- rc = rangeset_consume_ranges(bar->mem, map_range, &data);
+ rc = rangeset_consume_ranges(mem, map_range, &data);
if ( rc == -ERESTART )
{
@@ -224,8 +225,8 @@ bool vpci_process_pending(struct vcpu *v)
/* Clean all the rangesets */
for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
- if ( !rangeset_is_empty(header->bars[i].mem) )
- rangeset_purge(header->bars[i].mem);
+ if ( !rangeset_is_empty(v->vpci.mem[i]) )
+ rangeset_purge(v->vpci.mem[i]);
v->vpci.pdev = NULL;
@@ -260,13 +261,14 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
{
struct vpci_bar *bar = &header->bars[i];
+ struct rangeset *mem = current->vpci.mem[i];
struct map_data data = { .d = d, .map = true, .bar = bar };
- if ( rangeset_is_empty(bar->mem) )
+ if ( rangeset_is_empty(mem) )
continue;
- while ( (rc = rangeset_consume_ranges(bar->mem, map_range,
- &data)) == -ERESTART )
+ while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) ==
+ -ERESTART )
{
/*
* It's safe to drop and reacquire the lock in this context
@@ -331,13 +333,13 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
{
struct vpci_bar *bar = &header->bars[i];
+ struct rangeset *mem = current->vpci.mem[i];
unsigned long start = PFN_DOWN(bar->addr);
unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
unsigned long start_guest = PFN_DOWN(bar->guest_addr);
unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1);
- if ( !bar->mem )
- continue;
+ ASSERT(mem);
if ( !MAPPABLE_BAR(bar) ||
(rom_only ? bar->type != VPCI_BAR_ROM
@@ -354,7 +356,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
continue;
}
- ASSERT(rangeset_is_empty(bar->mem));
+ ASSERT(rangeset_is_empty(mem));
/*
* Make sure that the guest set address has the same page offset
@@ -369,7 +371,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
return -EINVAL;
}
- rc = rangeset_add_range(bar->mem, start_guest, end_guest);
+ rc = rangeset_add_range(mem, start_guest, end_guest);
if ( rc )
{
printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
@@ -380,12 +382,12 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
/* Check for overlap with the already setup BAR ranges. */
for ( j = 0; j < i; j++ )
{
- struct vpci_bar *prev_bar = &header->bars[j];
+ struct rangeset *prev_mem = current->vpci.mem[j];
- if ( rangeset_is_empty(prev_bar->mem) )
+ if ( rangeset_is_empty(prev_mem) )
continue;
- rc = rangeset_remove_range(prev_bar->mem, start_guest, end_guest);
+ rc = rangeset_remove_range(prev_mem, start_guest, end_guest);
if ( rc )
{
gprintk(XENLOG_WARNING,
@@ -395,7 +397,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
}
}
- rc = pci_sanitize_bar_memory(bar->mem);
+ rc = pci_sanitize_bar_memory(mem);
if ( rc )
{
gprintk(XENLOG_WARNING,
@@ -412,14 +414,14 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
vmsix_table_size(pdev->vpci, i) - 1);
- for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
+ for ( j = 0; j < ARRAY_SIZE(current->vpci.mem); j++ )
{
- const struct vpci_bar *bar = &header->bars[j];
+ struct rangeset *mem = current->vpci.mem[j];
- if ( rangeset_is_empty(bar->mem) )
+ if ( rangeset_is_empty(mem) )
continue;
- rc = rangeset_remove_range(bar->mem, start, end);
+ rc = rangeset_remove_range(mem, start, end);
if ( rc )
{
gprintk(XENLOG_WARNING,
@@ -469,8 +471,9 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
for ( j = 0; j < ARRAY_SIZE(header->bars); j++)
{
const struct vpci_bar *bar = &header->bars[j];
+ struct rangeset *mem = current->vpci.mem[j];
- if ( !rangeset_overlaps_range(bar->mem, start, end) ||
+ if ( !rangeset_overlaps_range(mem, start, end) ||
/*
* If only the ROM enable bit is toggled check against
* other BARs in the same device for overlaps, but not
@@ -481,7 +484,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
bar->type == VPCI_BAR_ROM) )
continue;
- rc = rangeset_remove_range(bar->mem, start, end);
+ rc = rangeset_remove_range(mem, start, end);
if ( rc )
{
gprintk(XENLOG_WARNING,
@@ -732,18 +735,6 @@ static void cf_check rom_write(
}
}
-static int bar_add_rangeset(const struct pci_dev *pdev, struct vpci_bar *bar,
- unsigned int i)
-{
- char str[32];
-
- snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i);
-
- bar->mem = rangeset_new(pdev->domain, str, RANGESETF_no_print);
-
- return !bar->mem ? -ENOMEM : 0;
-}
-
int vpci_init_header(struct pci_dev *pdev)
{
uint16_t cmd;
@@ -853,10 +844,6 @@ int vpci_init_header(struct pci_dev *pdev)
else
bars[i].type = VPCI_BAR_MEM32;
- rc = bar_add_rangeset(pdev, &bars[i], i);
- if ( rc )
- goto fail;
-
rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size,
(i == num_bars - 1) ? PCI_BAR_LAST : 0);
if ( rc < 0 )
@@ -909,12 +896,6 @@ int vpci_init_header(struct pci_dev *pdev)
4, rom);
if ( rc )
rom->type = VPCI_BAR_EMPTY;
- else
- {
- rc = bar_add_rangeset(pdev, rom, num_bars);
- if ( rc )
- goto fail;
- }
}
else if ( !is_hwdom )
{
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 0ac9ec8b0475..8e6343653078 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -24,6 +24,37 @@
#ifdef __XEN__
+void vpci_vcpu_destroy(struct vcpu *v)
+{
+ unsigned int i;
+
+ if ( !has_vpci(v->domain) && !is_idle_domain(v->domain) )
+ return;
+
+ for ( i = 0; i < ARRAY_SIZE(v->vpci.mem); i++ )
+ RANGESET_DESTROY(v->vpci.mem[i]);
+}
+
+int vpci_vcpu_init(struct vcpu *v)
+{
+ unsigned int i;
+
+ if ( !has_vpci(v->domain) && !is_idle_domain(v->domain) )
+ return 0;
+
+ for ( i = 0; i < ARRAY_SIZE(v->vpci.mem); i++ )
+ {
+ char str[32];
+
+ snprintf(str, sizeof(str), "%pv:BAR%u", v, i);
+ v->vpci.mem[i] = rangeset_new(v->domain, str, RANGESETF_no_print);
+ if ( !v->vpci.mem[i] )
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
static int assign_virtual_sbdf(struct pci_dev *pdev)
{
@@ -89,8 +120,6 @@ struct vpci_register *vpci_get_register(const struct vpci *vpci,
void vpci_deassign_device(struct pci_dev *pdev)
{
- unsigned int i;
-
ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
if ( !has_vpci(pdev->domain) || !pdev->vpci )
@@ -116,9 +145,6 @@ void vpci_deassign_device(struct pci_dev *pdev)
}
spin_unlock(&pdev->vpci->lock);
- for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
- rangeset_destroy(pdev->vpci->header.bars[i].mem);
-
xfree(pdev->vpci);
pdev->vpci = NULL;
}
diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
index 817505badf6f..f01e00ec9234 100644
--- a/xen/include/xen/rangeset.h
+++ b/xen/include/xen/rangeset.h
@@ -40,6 +40,13 @@ struct rangeset *rangeset_new(
void rangeset_destroy(
struct rangeset *r);
+/* Destroy a rangeset, and zero the pointer to it. */
+#define RANGESET_DESTROY(r) \
+ ({ \
+ rangeset_destroy(r); \
+ (r) = NULL; \
+ })
+
/*
* Set a limit on the number of ranges that may exist in set @r.
* NOTE: This must be called while @r is empty.
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 877aa391d178..b55bacbe6e01 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -19,6 +19,9 @@
*/
#define VPCI_MAX_VIRT_DEV (PCI_SLOT(~0) + 1)
+void vpci_vcpu_destroy(struct vcpu *v);
+int vpci_vcpu_init(struct vcpu *v);
+
/* Assign vPCI to device by adding handlers. */
int __must_check vpci_assign_device(struct pci_dev *pdev);
@@ -54,7 +57,6 @@ struct vpci {
uint64_t guest_addr;
uint64_t size;
uint64_t resizable_sizes;
- struct rangeset *mem;
enum {
VPCI_BAR_EMPTY,
VPCI_BAR_IO,
@@ -152,14 +154,15 @@ struct vpci {
#endif
};
+#ifdef __XEN__
struct vpci_vcpu {
/* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
const struct pci_dev *pdev;
+ struct rangeset *mem[ARRAY_SIZE(((struct vpci_header *)NULL)->bars)];
uint16_t cmd;
bool rom_only : 1;
};
-#ifdef __XEN__
void vpci_dump_msi(void);
/* Arch-specific vPCI MSI helpers. */
@@ -204,6 +207,9 @@ bool vpci_ecam_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int len,
#else /* !CONFIG_HAS_VPCI */
struct vpci_vcpu {};
+static inline void vpci_vcpu_destroy(struct vcpu *v) { }
+static inline int vpci_vcpu_init(struct vcpu *v) { return 0; }
+
static inline int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
{
return 0;
--
2.53.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v4 1/4] vpci: Use pervcpu ranges for BAR mapping
2026-04-06 19:11 ` [PATCH v4 1/4] vpci: Use pervcpu ranges for BAR mapping Stewart Hildebrand
@ 2026-04-16 14:26 ` Jan Beulich
2026-04-16 15:29 ` Jan Beulich
1 sibling, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2026-04-16 14:26 UTC (permalink / raw)
To: Stewart Hildebrand, Mykyta Poturai
Cc: Andrew Cooper, Anthony PERARD, Michal Orzel, Julien Grall,
Roger Pau Monné, Stefano Stabellini, xen-devel
On 06.04.2026 21:11, Stewart Hildebrand wrote:
> From: Mykyta Poturai <Mykyta_Poturai@epam.com>
>
> There is no need to store ranges for each PCI device, as they are only
> used during the mapping/unmapping process and can be reused for each
> device. This also allows to avoid the need to allocate and destroy
> rangesets for each device.
>
> Move the rangesets from struct vpci_bar to struct vpci_vcpu and perform
> (de-)allocation with vcpu (de-)allocation. Introduce RANGESET_DESTROY()
> macro to free a rangeset and set the pointer to NULL.
I'm struggling some with this description. On a typical system I might
expect far more vCPU-s to be there than there are PCI devices. In which
case "There is no need to ..." and "This also allows to avoid ..." feels
like an attempt to mislead readers: We may end up with bigger memory
footprint, when my reading of the description suggests you're trying to
hint at a reduction.
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -455,6 +455,8 @@ static int vcpu_teardown(struct vcpu *v)
> */
> static void vcpu_destroy(struct vcpu *v)
> {
> + vpci_vcpu_destroy(v);
> +
> free_vcpu_struct(v);
> }
Can't the resources be released much earlier, somewhere during
domain_relinquish_resources()?
> --- a/xen/include/xen/rangeset.h
> +++ b/xen/include/xen/rangeset.h
> @@ -40,6 +40,13 @@ struct rangeset *rangeset_new(
> void rangeset_destroy(
> struct rangeset *r);
>
> +/* Destroy a rangeset, and zero the pointer to it. */
> +#define RANGESET_DESTROY(r) \
> + ({ \
> + rangeset_destroy(r); \
> + (r) = NULL; \
> + })
Please note the subtle but important difference (in ordering of operations)
from e.g. XFREE() or FREE_XENHEAP_PAGES().
Jan
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 1/4] vpci: Use pervcpu ranges for BAR mapping
2026-04-06 19:11 ` [PATCH v4 1/4] vpci: Use pervcpu ranges for BAR mapping Stewart Hildebrand
2026-04-16 14:26 ` Jan Beulich
@ 2026-04-16 15:29 ` Jan Beulich
2026-04-24 7:49 ` Roger Pau Monné
1 sibling, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2026-04-16 15:29 UTC (permalink / raw)
To: Stewart Hildebrand, Mykyta Poturai
Cc: Andrew Cooper, Anthony PERARD, Michal Orzel, Julien Grall,
Roger Pau Monné, Stefano Stabellini, xen-devel
On 06.04.2026 21:11, Stewart Hildebrand wrote:
> From: Mykyta Poturai <Mykyta_Poturai@epam.com>
>
> There is no need to store ranges for each PCI device, as they are only
> used during the mapping/unmapping process and can be reused for each
> device. This also allows to avoid the need to allocate and destroy
> rangesets for each device.
>
> Move the rangesets from struct vpci_bar to struct vpci_vcpu and perform
> (de-)allocation with vcpu (de-)allocation. Introduce RANGESET_DESTROY()
> macro to free a rangeset and set the pointer to NULL.
>
> Amends: 622bdd962822 ("vpci/header: handle p2m range sets per BAR")
> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> ---
> It seems a bit awkward to introduce various vpci vcpu alloc/dealloc
> functions here only to undo most of it in the next patch. Thoughts on
> folding the next patch into this one?
>
> v3->v4:
> * no change
There are some complexities here due to the patch being part of two series.
Once Mykyta re-submits the SR-IOV series, we'll have two (likely diverging)
v4-s. Already here ...
> v2->v3:
> * new patch in this series, borrowed from [1]
> * add Amends tag
> * remove unused variable i due to rebasing over 998060dd9101 ("vPCI:
> move vpci_init_capabilities() to a separate file")
> * enclose entire struct vpci_vcpu inside #ifdef __XEN__
> * s/bar_mem/mem/
> * use ARRAY_SIZE
> * put init/destroy in functions
> * only allocate for domains with vPCI and idle domain
> * replace 'if ( !mem ) continue;' with ASSERT
... the v3 there has one more item on this ChangeLog list ("* synced with
BAR write with memory decoding enabled series[1]"), albeit maybe (now that I
read it again) this merely is the counterpart of the first bullet point here.
It would be clearer if there the other series' title was supplied verbatim
and in quotes.
Jan
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 1/4] vpci: Use pervcpu ranges for BAR mapping
2026-04-16 15:29 ` Jan Beulich
@ 2026-04-24 7:49 ` Roger Pau Monné
0 siblings, 0 replies; 14+ messages in thread
From: Roger Pau Monné @ 2026-04-24 7:49 UTC (permalink / raw)
To: Jan Beulich
Cc: Stewart Hildebrand, Mykyta Poturai, Andrew Cooper, Anthony PERARD,
Michal Orzel, Julien Grall, Stefano Stabellini, xen-devel
On Thu, Apr 16, 2026 at 05:29:26PM +0200, Jan Beulich wrote:
> On 06.04.2026 21:11, Stewart Hildebrand wrote:
> > From: Mykyta Poturai <Mykyta_Poturai@epam.com>
> >
> > There is no need to store ranges for each PCI device, as they are only
> > used during the mapping/unmapping process and can be reused for each
> > device. This also allows to avoid the need to allocate and destroy
> > rangesets for each device.
> >
> > Move the rangesets from struct vpci_bar to struct vpci_vcpu and perform
> > (de-)allocation with vcpu (de-)allocation. Introduce RANGESET_DESTROY()
> > macro to free a rangeset and set the pointer to NULL.
> >
> > Amends: 622bdd962822 ("vpci/header: handle p2m range sets per BAR")
> > Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
> > Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> > ---
> > It seems a bit awkward to introduce various vpci vcpu alloc/dealloc
> > functions here only to undo most of it in the next patch. Thoughts on
> > folding the next patch into this one?
> >
> > v3->v4:
> > * no change
>
> There are some complexities here due to the patch being part of two series.
> Once Mykyta re-submits the SR-IOV series, we'll have two (likely diverging)
> v4-s. Already here ...
>
> > v2->v3:
> > * new patch in this series, borrowed from [1]
> > * add Amends tag
> > * remove unused variable i due to rebasing over 998060dd9101 ("vPCI:
> > move vpci_init_capabilities() to a separate file")
> > * enclose entire struct vpci_vcpu inside #ifdef __XEN__
> > * s/bar_mem/mem/
> > * use ARRAY_SIZE
> > * put init/destroy in functions
> > * only allocate for domains with vPCI and idle domain
> > * replace 'if ( !mem ) continue;' with ASSERT
>
> ... the v3 there has one more item on this ChangeLog list ("* synced with
> BAR write with memory decoding enabled series[1]"), albeit maybe (now that I
> read it again) this merely is the counterpart of the first bullet point here.
> It would be clearer if there the other series' title was supplied verbatim
> and in quotes.
Indeed. I reviewed the one from Mykyta part of the SR-IOV series.
I see both Jan and myself made similar comments, I'm also a bit
concerned about the claim that moving to per-vCPU ranges is better
than using per-pdev ranges, see the comments on the SR-IOV series.
Overall however it seems to me for the SR-IOV series we will need to
allocate mapping tasks on demand, so we will neither have per-vCPU or
per-pdev per-allocated tasks (that include the rangeset). I think we
need a pre-series that introduce the map queueing plus the on-demand
allocation of tasks in a clean way (iow: there's no need to introduce
per-vCPU tasks if they need to be replaced with runtime allocation
tasks for the SR-IOV support).
Then both this series and the SR-IOV one should be rebased over that
work.
Thanks, Roger.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 2/4] vpci: allow queueing of mapping operations
2026-04-06 19:11 [PATCH v4 0/4] vpci: allow 32-bit BAR writes with memory decoding enabled Stewart Hildebrand
2026-04-06 19:11 ` [PATCH v4 1/4] vpci: Use pervcpu ranges for BAR mapping Stewart Hildebrand
@ 2026-04-06 19:11 ` Stewart Hildebrand
2026-04-09 15:17 ` Mykyta Poturai
2026-04-16 14:59 ` Jan Beulich
2026-04-06 19:11 ` [PATCH v4 3/4] vpci: allow BAR map/unmap without affecting memory decoding bit Stewart Hildebrand
2026-04-06 19:11 ` [PATCH v4 4/4] vpci: allow 32-bit BAR writes with memory decoding enabled Stewart Hildebrand
3 siblings, 2 replies; 14+ messages in thread
From: Stewart Hildebrand @ 2026-04-06 19:11 UTC (permalink / raw)
To: xen-devel
Cc: Stewart Hildebrand, Andrew Cooper, Anthony PERARD, Michal Orzel,
Jan Beulich, Julien Grall, Roger Pau Monné,
Stefano Stabellini, Mykyta Poturai
Introduce vPCI BAR mapping task queue. Store information needed to
map/unmap BARs in struct vpci_map_task. Allow queueing of BAR map/unmap
operations in a list, thus making it possible to perform multiple p2m
operations associated with single PCI device.
This is preparatory work for further changes that need to perform
multiple unmap/map operations before returning to guest.
At the moment, only a single operation will be queued. However, when
multiple operations are queued, there is a check in modify_bars() to
skip BARs already in the requested state that will no longer be
accurate. Remove this check in preparation of upcoming changes.
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
apply_map() and vpci_process_map_task() are very similar. Should we try
to combine them into a single function?
I concede that the dynamic allocation/deallocation of struct
vpci_map_task is not ideal. However, to support SR-IOV, there will be a
need to queue many mapping operations (one per VF), and statically
pre-allocating that much would seem wasteful. Only the hardware and/or
control domain would need to queue many operations, and only when
configuring SR-IOV.
v3->v4:
* switch back to dynamically allocated queue elements
v2->v3:
* base on ("vpci: Use pervcpu ranges for BAR mapping") from [1]
* rework with fixed array of map/unmap slots
[1] https://lore.kernel.org/xen-devel/cover.1772806036.git.mykyta_poturai@epam.com/T/#t
v1->v2:
* new patch
---
xen/common/domain.c | 5 +-
xen/drivers/vpci/header.c | 227 ++++++++++++++++++++++++++-----------
xen/drivers/vpci/vpci.c | 30 +----
xen/include/xen/rangeset.h | 7 --
xen/include/xen/vpci.h | 21 ++--
5 files changed, 179 insertions(+), 111 deletions(-)
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 5ef7db8f0960..b1931be9870b 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -455,8 +455,6 @@ static int vcpu_teardown(struct vcpu *v)
*/
static void vcpu_destroy(struct vcpu *v)
{
- vpci_vcpu_destroy(v);
-
free_vcpu_struct(v);
}
@@ -514,8 +512,7 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
if ( arch_vcpu_create(v) != 0 )
goto fail_sched;
- if ( vpci_vcpu_init(v) )
- goto fail_sched;
+ vpci_vcpu_init(v);
d->vcpu[vcpu_id] = v;
if ( vcpu_id != 0 )
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index 89dce932f3b1..146915e28c50 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -23,6 +23,7 @@
#include <xen/lib.h>
#include <xen/sched.h>
#include <xen/softirq.h>
+#include <xen/xvmalloc.h>
#include <xsm/xsm.h>
@@ -35,7 +36,7 @@
struct map_data {
struct domain *d;
- const struct vpci_bar *bar;
+ const struct vpci_bar_map *bar;
bool map;
};
@@ -174,32 +175,20 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
ASSERT_UNREACHABLE();
}
-bool vpci_process_pending(struct vcpu *v)
+static int vpci_process_map_task(const struct pci_dev *pdev,
+ struct vpci_map_task *task)
{
- const struct pci_dev *pdev = v->vpci.pdev;
- struct vpci_header *header = NULL;
unsigned int i;
- if ( !pdev )
- return false;
-
- read_lock(&v->domain->pci_lock);
-
- if ( !pdev->vpci || (v->domain != pdev->domain) )
- {
- v->vpci.pdev = NULL;
- read_unlock(&v->domain->pci_lock);
- return false;
- }
+ ASSERT(rw_is_locked(&pdev->domain->pci_lock));
- header = &pdev->vpci->header;
- for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+ for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
{
- struct vpci_bar *bar = &header->bars[i];
- struct rangeset *mem = v->vpci.mem[i];
+ struct vpci_bar_map *bar = &task->bars[i];
+ struct rangeset *mem = bar->mem;
struct map_data data = {
- .d = v->domain,
- .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
+ .d = pdev->domain,
+ .map = task->cmd & PCI_COMMAND_MEMORY,
.bar = bar,
};
int rc;
@@ -210,58 +199,116 @@ bool vpci_process_pending(struct vcpu *v)
rc = rangeset_consume_ranges(mem, map_range, &data);
if ( rc == -ERESTART )
- {
- read_unlock(&v->domain->pci_lock);
- return true;
- }
+ return rc;
if ( rc )
{
spin_lock(&pdev->vpci->lock);
/* Disable memory decoding unconditionally on failure. */
- modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY,
- false);
+ modify_decoding(pdev, task->cmd & ~PCI_COMMAND_MEMORY, false);
spin_unlock(&pdev->vpci->lock);
- /* Clean all the rangesets */
- for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
- if ( !rangeset_is_empty(v->vpci.mem[i]) )
- rangeset_purge(v->vpci.mem[i]);
+ if ( !is_hardware_domain(pdev->domain) )
+ domain_crash(pdev->domain);
+
+ return rc;
+ }
+ }
+
+ spin_lock(&pdev->vpci->lock);
+ modify_decoding(pdev, task->cmd, task->rom_only);
+ spin_unlock(&pdev->vpci->lock);
+
+ return 0;
+}
+
+static void destroy_map_task(struct vpci_map_task *task)
+{
+ unsigned int i;
+
+ if ( !task )
+ {
+ ASSERT_UNREACHABLE();
+ return;
+ }
+
+ for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
+ rangeset_destroy(task->bars[i].mem);
+
+ xvfree(task);
+}
+
+static void clear_map_queue(struct vcpu *v)
+{
+ struct vpci_map_task *task;
+
+ while ( (task = list_first_entry_or_null(&v->vpci.task_queue,
+ struct vpci_map_task,
+ next)) != NULL )
+ {
+ list_del(&task->next);
+ destroy_map_task(task);
+ }
+}
+
+bool vpci_process_pending(struct vcpu *v)
+{
+ const struct pci_dev *pdev = v->vpci.pdev;
+ struct vpci_map_task *task;
- v->vpci.pdev = NULL;
+ if ( !pdev )
+ return false;
+ read_lock(&v->domain->pci_lock);
+
+ if ( !pdev->vpci || (v->domain != pdev->domain) )
+ {
+ clear_map_queue(v);
+ v->vpci.pdev = NULL;
+ read_unlock(&v->domain->pci_lock);
+ return false;
+ }
+
+ while ( (task = list_first_entry_or_null(&v->vpci.task_queue,
+ struct vpci_map_task,
+ next)) != NULL )
+ {
+ int rc = vpci_process_map_task(pdev, task);
+
+ if ( rc == -ERESTART )
+ {
read_unlock(&v->domain->pci_lock);
+ return true;
+ }
- if ( !is_hardware_domain(v->domain) )
- domain_crash(v->domain);
+ list_del(&task->next);
+ destroy_map_task(task);
- return false;
+ if ( rc )
+ {
+ clear_map_queue(v);
+ break;
}
}
v->vpci.pdev = NULL;
- spin_lock(&pdev->vpci->lock);
- modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only);
- spin_unlock(&pdev->vpci->lock);
-
read_unlock(&v->domain->pci_lock);
return false;
}
static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
- uint16_t cmd)
+ struct vpci_map_task *task)
{
- struct vpci_header *header = &pdev->vpci->header;
int rc = 0;
unsigned int i;
ASSERT(rw_is_write_locked(&d->pci_lock));
- for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+ for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
{
- struct vpci_bar *bar = &header->bars[i];
- struct rangeset *mem = current->vpci.mem[i];
+ struct vpci_bar_map *bar = &task->bars[i];
+ struct rangeset *mem = bar->mem;
struct map_data data = { .d = d, .map = true, .bar = bar };
if ( rangeset_is_empty(mem) )
@@ -281,15 +328,52 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
}
}
if ( !rc )
- modify_decoding(pdev, cmd, false);
+ modify_decoding(pdev, task->cmd, false);
return rc;
}
-static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
+static struct vpci_map_task *alloc_map_task(const struct pci_dev *pdev,
+ uint16_t cmd, bool rom_only)
+{
+ struct vpci_map_task *task;
+ unsigned int i;
+
+ task = xvzalloc(struct vpci_map_task);
+
+ if ( !task )
+ return NULL;
+
+ for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
+ {
+ if ( !MAPPABLE_BAR(&pdev->vpci->header.bars[i]) )
+ continue;
+
+ task->bars[i].mem = rangeset_new(pdev->domain, NULL,
+ RANGESETF_no_print);
+
+ if ( !task->bars[i].mem )
+ {
+ destroy_map_task(task);
+ return NULL;
+ }
+
+ task->bars[i].addr = pdev->vpci->header.bars[i].addr;
+ task->bars[i].guest_addr = pdev->vpci->header.bars[i].guest_addr;
+ }
+
+ task->cmd = cmd;
+ task->rom_only = rom_only;
+
+ return task;
+}
+
+static void defer_map(const struct pci_dev *pdev, struct vpci_map_task *task)
{
struct vcpu *curr = current;
+ ASSERT(!curr->vpci.pdev || curr->vpci.pdev == pdev);
+
/*
* FIXME: when deferring the {un}map the state of the device should not
* be trusted. For example the enable bit is toggled after the device
@@ -297,8 +381,8 @@ static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
* started for the same device if the domain is not well-behaved.
*/
curr->vpci.pdev = pdev;
- curr->vpci.cmd = cmd;
- curr->vpci.rom_only = rom_only;
+ list_add_tail(&task->next, &curr->vpci.task_queue);
+
/*
* Raise a scheduler softirq in order to prevent the guest from resuming
* execution with pending mapping operations, to trigger the invocation
@@ -313,11 +397,17 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
struct pci_dev *tmp;
const struct domain *d;
const struct vpci_msix *msix = pdev->vpci->msix;
+ struct vpci_map_task *task;
unsigned int i, j;
int rc;
ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
+ task = alloc_map_task(pdev, cmd, rom_only);
+
+ if ( !task )
+ return -ENOMEM;
+
/*
* Create a rangeset per BAR that represents the current device memory
* region and compare it against all the currently active BAR memory
@@ -333,19 +423,18 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
{
struct vpci_bar *bar = &header->bars[i];
- struct rangeset *mem = current->vpci.mem[i];
+ struct rangeset *mem = task->bars[i].mem;
unsigned long start = PFN_DOWN(bar->addr);
unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
unsigned long start_guest = PFN_DOWN(bar->guest_addr);
unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1);
- ASSERT(mem);
+ if ( !mem )
+ continue;
if ( !MAPPABLE_BAR(bar) ||
(rom_only ? bar->type != VPCI_BAR_ROM
- : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) ||
- /* Skip BARs already in the requested state. */
- bar->enabled == !!(cmd & PCI_COMMAND_MEMORY) )
+ : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) )
continue;
if ( !pci_check_bar(pdev, _mfn(start), _mfn(end)) )
@@ -368,7 +457,8 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
gprintk(XENLOG_G_WARNING,
"%pp: can't map BAR%u - offset mismatch: %#lx vs %#lx\n",
&pdev->sbdf, i, bar->guest_addr, bar->addr);
- return -EINVAL;
+ rc = -EINVAL;
+ goto fail;
}
rc = rangeset_add_range(mem, start_guest, end_guest);
@@ -376,13 +466,13 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
{
printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
start_guest, end_guest, rc);
- return rc;
+ goto fail;
}
/* Check for overlap with the already setup BAR ranges. */
for ( j = 0; j < i; j++ )
{
- struct rangeset *prev_mem = current->vpci.mem[j];
+ struct rangeset *prev_mem = task->bars[j].mem;
if ( rangeset_is_empty(prev_mem) )
continue;
@@ -393,7 +483,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
gprintk(XENLOG_WARNING,
"%pp: failed to remove overlapping range [%lx, %lx]: %d\n",
&pdev->sbdf, start_guest, end_guest, rc);
- return rc;
+ goto fail;
}
}
@@ -403,7 +493,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
gprintk(XENLOG_WARNING,
"%pp: failed to sanitize BAR#%u memory: %d\n",
&pdev->sbdf, i, rc);
- return rc;
+ goto fail;
}
}
@@ -414,9 +504,9 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
vmsix_table_size(pdev->vpci, i) - 1);
- for ( j = 0; j < ARRAY_SIZE(current->vpci.mem); j++ )
+ for ( j = 0; j < ARRAY_SIZE(task->bars); j++ )
{
- struct rangeset *mem = current->vpci.mem[j];
+ struct rangeset *mem = task->bars[j].mem;
if ( rangeset_is_empty(mem) )
continue;
@@ -427,7 +517,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
gprintk(XENLOG_WARNING,
"%pp: failed to remove MSIX table [%lx, %lx]: %d\n",
&pdev->sbdf, start, end, rc);
- return rc;
+ goto fail;
}
}
}
@@ -471,7 +561,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
for ( j = 0; j < ARRAY_SIZE(header->bars); j++)
{
const struct vpci_bar *bar = &header->bars[j];
- struct rangeset *mem = current->vpci.mem[j];
+ struct rangeset *mem = task->bars[j].mem;
if ( !rangeset_overlaps_range(mem, start, end) ||
/*
@@ -490,7 +580,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
gprintk(XENLOG_WARNING,
"%pp: failed to remove [%lx, %lx]: %d\n",
&pdev->sbdf, start, end, rc);
- return rc;
+ goto fail;
}
}
}
@@ -513,12 +603,19 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
* will always be to establish mappings and process all the BARs.
*/
ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only);
- return apply_map(pdev->domain, pdev, cmd);
+ rc = apply_map(pdev->domain, pdev, task);
+ destroy_map_task(task);
+ return rc;
}
- defer_map(pdev, cmd, rom_only);
+ defer_map(pdev, task);
return 0;
+
+ fail:
+ destroy_map_task(task);
+
+ return rc;
}
static void cf_check cmd_write(
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 8e6343653078..ce9fb5b357ff 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -24,35 +24,9 @@
#ifdef __XEN__
-void vpci_vcpu_destroy(struct vcpu *v)
+void vpci_vcpu_init(struct vcpu *v)
{
- unsigned int i;
-
- if ( !has_vpci(v->domain) && !is_idle_domain(v->domain) )
- return;
-
- for ( i = 0; i < ARRAY_SIZE(v->vpci.mem); i++ )
- RANGESET_DESTROY(v->vpci.mem[i]);
-}
-
-int vpci_vcpu_init(struct vcpu *v)
-{
- unsigned int i;
-
- if ( !has_vpci(v->domain) && !is_idle_domain(v->domain) )
- return 0;
-
- for ( i = 0; i < ARRAY_SIZE(v->vpci.mem); i++ )
- {
- char str[32];
-
- snprintf(str, sizeof(str), "%pv:BAR%u", v, i);
- v->vpci.mem[i] = rangeset_new(v->domain, str, RANGESETF_no_print);
- if ( !v->vpci.mem[i] )
- return -ENOMEM;
- }
-
- return 0;
+ INIT_LIST_HEAD(&v->vpci.task_queue);
}
#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
index f01e00ec9234..817505badf6f 100644
--- a/xen/include/xen/rangeset.h
+++ b/xen/include/xen/rangeset.h
@@ -40,13 +40,6 @@ struct rangeset *rangeset_new(
void rangeset_destroy(
struct rangeset *r);
-/* Destroy a rangeset, and zero the pointer to it. */
-#define RANGESET_DESTROY(r) \
- ({ \
- rangeset_destroy(r); \
- (r) = NULL; \
- })
-
/*
* Set a limit on the number of ranges that may exist in set @r.
* NOTE: This must be called while @r is empty.
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index b55bacbe6e01..e34f7abe6da2 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -19,8 +19,7 @@
*/
#define VPCI_MAX_VIRT_DEV (PCI_SLOT(~0) + 1)
-void vpci_vcpu_destroy(struct vcpu *v);
-int vpci_vcpu_init(struct vcpu *v);
+void vpci_vcpu_init(struct vcpu *v);
/* Assign vPCI to device by adding handlers. */
int __must_check vpci_assign_device(struct pci_dev *pdev);
@@ -155,14 +154,23 @@ struct vpci {
};
#ifdef __XEN__
-struct vpci_vcpu {
+struct vpci_map_task {
/* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
- const struct pci_dev *pdev;
- struct rangeset *mem[ARRAY_SIZE(((struct vpci_header *)NULL)->bars)];
+ struct list_head next;
+ struct vpci_bar_map {
+ uint64_t addr;
+ uint64_t guest_addr;
+ struct rangeset *mem;
+ } bars[ARRAY_SIZE(((struct vpci_header *)NULL)->bars)];
uint16_t cmd;
bool rom_only : 1;
};
+struct vpci_vcpu {
+ const struct pci_dev *pdev;
+ struct list_head task_queue;
+};
+
void vpci_dump_msi(void);
/* Arch-specific vPCI MSI helpers. */
@@ -207,8 +215,7 @@ bool vpci_ecam_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int len,
#else /* !CONFIG_HAS_VPCI */
struct vpci_vcpu {};
-static inline void vpci_vcpu_destroy(struct vcpu *v) { }
-static inline int vpci_vcpu_init(struct vcpu *v) { return 0; }
+static inline void vpci_vcpu_init(struct vcpu *v) { }
static inline int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
{
--
2.53.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v4 2/4] vpci: allow queueing of mapping operations
2026-04-06 19:11 ` [PATCH v4 2/4] vpci: allow queueing of mapping operations Stewart Hildebrand
@ 2026-04-09 15:17 ` Mykyta Poturai
2026-04-16 14:59 ` Jan Beulich
1 sibling, 0 replies; 14+ messages in thread
From: Mykyta Poturai @ 2026-04-09 15:17 UTC (permalink / raw)
To: Stewart Hildebrand, xen-devel@lists.xenproject.org
Cc: Andrew Cooper, Anthony PERARD, Michal Orzel, Jan Beulich,
Julien Grall, Roger Pau Monné, Stefano Stabellini
On 4/6/26 22:11, Stewart Hildebrand wrote:
> Introduce vPCI BAR mapping task queue. Store information needed to
> map/unmap BARs in struct vpci_map_task. Allow queueing of BAR map/unmap
> operations in a list, thus making it possible to perform multiple p2m
> operations associated with single PCI device.
>
> This is preparatory work for further changes that need to perform
> multiple unmap/map operations before returning to guest.
>
> At the moment, only a single operation will be queued. However, when
> multiple operations are queued, there is a check in modify_bars() to
> skip BARs already in the requested state that will no longer be
> accurate. Remove this check in preparation of upcoming changes.
>
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> ---
> apply_map() and vpci_process_map_task() are very similar. Should we try
> to combine them into a single function?
>
> I concede that the dynamic allocation/deallocation of struct
> vpci_map_task is not ideal. However, to support SR-IOV, there will be a
> need to queue many mapping operations (one per VF), and statically
> pre-allocating that much would seem wasteful. Only the hardware and/or
> control domain would need to queue many operations, and only when
> configuring SR-IOV.
>
> v3->v4:
> * switch back to dynamically allocated queue elements
>
> v2->v3:
> * base on ("vpci: Use pervcpu ranges for BAR mapping") from [1]
> * rework with fixed array of map/unmap slots
>
> [1] https://lore.kernel.org/xen-devel/cover.1772806036.git.mykyta_poturai@epam.com/T/#t
>
> v1->v2:
> * new patch
It seems like this patch is modifying a lot of the same code as the
previous one. Maybe it will be a good idea to squash them into a single one?
--
Mykyta
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 2/4] vpci: allow queueing of mapping operations
2026-04-06 19:11 ` [PATCH v4 2/4] vpci: allow queueing of mapping operations Stewart Hildebrand
2026-04-09 15:17 ` Mykyta Poturai
@ 2026-04-16 14:59 ` Jan Beulich
1 sibling, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2026-04-16 14:59 UTC (permalink / raw)
To: Stewart Hildebrand
Cc: Andrew Cooper, Anthony PERARD, Michal Orzel, Julien Grall,
Roger Pau Monné, Stefano Stabellini, Mykyta Poturai,
xen-devel
On 06.04.2026 21:11, Stewart Hildebrand wrote:
> Introduce vPCI BAR mapping task queue. Store information needed to
> map/unmap BARs in struct vpci_map_task. Allow queueing of BAR map/unmap
> operations in a list, thus making it possible to perform multiple p2m
> operations associated with single PCI device.
>
> This is preparatory work for further changes that need to perform
> multiple unmap/map operations before returning to guest.
>
> At the moment, only a single operation will be queued. However, when
> multiple operations are queued, there is a check in modify_bars() to
> skip BARs already in the requested state that will no longer be
> accurate. Remove this check in preparation of upcoming changes.
>
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> ---
> apply_map() and vpci_process_map_task() are very similar. Should we try
> to combine them into a single function?
>
> I concede that the dynamic allocation/deallocation of struct
> vpci_map_task is not ideal. However, to support SR-IOV, there will be a
> need to queue many mapping operations (one per VF), and statically
> pre-allocating that much would seem wasteful. Only the hardware and/or
> control domain would need to queue many operations, and only when
> configuring SR-IOV.
The address ranges are pretty regular though across the VFs, so I wonder
whether getting away with less than one instance per VF (perhaps one per
PF) wouldn't be possible by adjusting what data is being put into the
"task" structure.
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -19,8 +19,7 @@
> */
> #define VPCI_MAX_VIRT_DEV (PCI_SLOT(~0) + 1)
>
> -void vpci_vcpu_destroy(struct vcpu *v);
> -int vpci_vcpu_init(struct vcpu *v);
> +void vpci_vcpu_init(struct vcpu *v);
>
> /* Assign vPCI to device by adding handlers. */
> int __must_check vpci_assign_device(struct pci_dev *pdev);
> @@ -155,14 +154,23 @@ struct vpci {
> };
>
> #ifdef __XEN__
> -struct vpci_vcpu {
> +struct vpci_map_task {
> /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
This comment needs adjusting then, too, doesn't it?
Jan
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 3/4] vpci: allow BAR map/unmap without affecting memory decoding bit
2026-04-06 19:11 [PATCH v4 0/4] vpci: allow 32-bit BAR writes with memory decoding enabled Stewart Hildebrand
2026-04-06 19:11 ` [PATCH v4 1/4] vpci: Use pervcpu ranges for BAR mapping Stewart Hildebrand
2026-04-06 19:11 ` [PATCH v4 2/4] vpci: allow queueing of mapping operations Stewart Hildebrand
@ 2026-04-06 19:11 ` Stewart Hildebrand
2026-04-24 8:38 ` Roger Pau Monné
2026-04-06 19:11 ` [PATCH v4 4/4] vpci: allow 32-bit BAR writes with memory decoding enabled Stewart Hildebrand
3 siblings, 1 reply; 14+ messages in thread
From: Stewart Hildebrand @ 2026-04-06 19:11 UTC (permalink / raw)
To: xen-devel; +Cc: Stewart Hildebrand, Roger Pau Monné
Introduce 'bool map' and allow invoking modify_bars() without changing
the memory decoding bit. This will allow hardware domain to reposition
BARs without affecting the memory decoding bit.
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
This also lays some groundwork to allow domUs to toggle the guest view
without affecting hardware, a step toward addressing the FIXME in [1].
[1] https://lore.kernel.org/xen-devel/20250814160358.95543-4-roger.pau@citrix.com/
v3->v4:
* rebase on dynamically allocated map queue
v2->v3:
* use bool
* switch to task->map in more places
v1->v2:
* new patch
---
xen/drivers/vpci/header.c | 43 +++++++++++++++++++++------------------
xen/include/xen/vpci.h | 1 +
2 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index 146915e28c50..20fe380552f4 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -117,11 +117,12 @@ static int cf_check map_range(
* BAR's enable bit has changed with the memory decoding bit already enabled.
* If rom_only is not set then it's the memory decoding bit that changed.
*/
-static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
- bool rom_only)
+static void modify_decoding(const struct pci_dev *pdev,
+ struct vpci_map_task *task)
{
struct vpci_header *header = &pdev->vpci->header;
- bool map = cmd & PCI_COMMAND_MEMORY;
+ bool rom_only = task->rom_only;
+ bool map = task->map;
unsigned int i;
for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
@@ -168,7 +169,7 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
if ( !rom_only )
{
- pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd);
+ pci_conf_write16(pdev->sbdf, PCI_COMMAND, task->cmd);
header->bars_mapped = map;
}
else
@@ -188,7 +189,7 @@ static int vpci_process_map_task(const struct pci_dev *pdev,
struct rangeset *mem = bar->mem;
struct map_data data = {
.d = pdev->domain,
- .map = task->cmd & PCI_COMMAND_MEMORY,
+ .map = task->map,
.bar = bar,
};
int rc;
@@ -203,9 +204,11 @@ static int vpci_process_map_task(const struct pci_dev *pdev,
if ( rc )
{
- spin_lock(&pdev->vpci->lock);
/* Disable memory decoding unconditionally on failure. */
- modify_decoding(pdev, task->cmd & ~PCI_COMMAND_MEMORY, false);
+ task->cmd &= ~PCI_COMMAND_MEMORY;
+ task->map = false;
+ spin_lock(&pdev->vpci->lock);
+ modify_decoding(pdev, task);
spin_unlock(&pdev->vpci->lock);
if ( !is_hardware_domain(pdev->domain) )
@@ -216,7 +219,7 @@ static int vpci_process_map_task(const struct pci_dev *pdev,
}
spin_lock(&pdev->vpci->lock);
- modify_decoding(pdev, task->cmd, task->rom_only);
+ modify_decoding(pdev, task);
spin_unlock(&pdev->vpci->lock);
return 0;
@@ -328,13 +331,14 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
}
}
if ( !rc )
- modify_decoding(pdev, task->cmd, false);
+ modify_decoding(pdev, task);
return rc;
}
static struct vpci_map_task *alloc_map_task(const struct pci_dev *pdev,
- uint16_t cmd, bool rom_only)
+ uint16_t cmd, bool rom_only,
+ bool map)
{
struct vpci_map_task *task;
unsigned int i;
@@ -364,6 +368,7 @@ static struct vpci_map_task *alloc_map_task(const struct pci_dev *pdev,
task->cmd = cmd;
task->rom_only = rom_only;
+ task->map = map;
return task;
}
@@ -391,7 +396,8 @@ static void defer_map(const struct pci_dev *pdev, struct vpci_map_task *task)
raise_softirq(SCHEDULE_SOFTIRQ);
}
-static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
+static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only,
+ bool map)
{
struct vpci_header *header = &pdev->vpci->header;
struct pci_dev *tmp;
@@ -403,7 +409,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
- task = alloc_map_task(pdev, cmd, rom_only);
+ task = alloc_map_task(pdev, cmd, rom_only, map);
if ( !task )
return -ENOMEM;
@@ -602,7 +608,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
* be called iff the memory decoding bit is enabled, thus the operation
* will always be to establish mappings and process all the BARs.
*/
- ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only);
+ ASSERT(map && !rom_only);
rc = apply_map(pdev->domain, pdev, task);
destroy_map_task(task);
return rc;
@@ -646,7 +652,7 @@ static void cf_check cmd_write(
* memory decoding bit has not been changed, so leave everything as-is,
* hoping the guest will realize and try again.
*/
- modify_bars(pdev, cmd, false);
+ modify_bars(pdev, cmd, false, cmd & PCI_COMMAND_MEMORY);
else
pci_conf_write16(pdev->sbdf, reg, cmd);
}
@@ -810,11 +816,8 @@ static void cf_check rom_write(
header->rom_enabled = new_enabled;
pci_conf_write32(pdev->sbdf, reg, val);
}
- /*
- * Pass PCI_COMMAND_MEMORY or 0 to signal a map/unmap request, note that
- * this fabricated command is never going to be written to the register.
- */
- else if ( modify_bars(pdev, new_enabled ? PCI_COMMAND_MEMORY : 0, true) )
+ /* Note that the command value 0 will never be written to the register */
+ else if ( modify_bars(pdev, 0, true, new_enabled) )
/*
* No memory has been added or removed from the p2m (because the actual
* p2m changes are deferred in defer_map) and the ROM enable bit has
@@ -1004,7 +1007,7 @@ int vpci_init_header(struct pci_dev *pdev)
goto fail;
}
- return (cmd & PCI_COMMAND_MEMORY) ? modify_bars(pdev, cmd, false) : 0;
+ return (cmd & PCI_COMMAND_MEMORY) ? modify_bars(pdev, cmd, false, true) : 0;
fail:
pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd);
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index e34f7abe6da2..e6d40827a43a 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -164,6 +164,7 @@ struct vpci_map_task {
} bars[ARRAY_SIZE(((struct vpci_header *)NULL)->bars)];
uint16_t cmd;
bool rom_only : 1;
+ bool map : 1;
};
struct vpci_vcpu {
--
2.53.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v4 3/4] vpci: allow BAR map/unmap without affecting memory decoding bit
2026-04-06 19:11 ` [PATCH v4 3/4] vpci: allow BAR map/unmap without affecting memory decoding bit Stewart Hildebrand
@ 2026-04-24 8:38 ` Roger Pau Monné
0 siblings, 0 replies; 14+ messages in thread
From: Roger Pau Monné @ 2026-04-24 8:38 UTC (permalink / raw)
To: Stewart Hildebrand; +Cc: xen-devel
On Mon, Apr 06, 2026 at 03:11:57PM -0400, Stewart Hildebrand wrote:
> Introduce 'bool map' and allow invoking modify_bars() without changing
> the memory decoding bit. This will allow hardware domain to reposition
> BARs without affecting the memory decoding bit.
>
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> ---
> This also lays some groundwork to allow domUs to toggle the guest view
> without affecting hardware, a step toward addressing the FIXME in [1].
>
> [1] https://lore.kernel.org/xen-devel/20250814160358.95543-4-roger.pau@citrix.com/
The deferred toggling of the command register bits seems overly
complex IMO. It would be much simpler if domUs simply don't get to
play with the real decoding bits. For the hardware domain we could
toggle the decoding bits ahead of the mapping operations, so that we
don't have to defer the toggling after mappings have been sorted.
Long term we should aim to remove the cmd field from vpci_map_task
struct.
I don't mind if you want to pick the patch in [1] in your series, as
we could then drop the cmd field and possibly simplify some of the
logic in this patch by no longer requiring the deferred write to the
command register?
> v3->v4:
> * rebase on dynamically allocated map queue
>
> v2->v3:
> * use bool
> * switch to task->map in more places
>
> v1->v2:
> * new patch
> ---
> xen/drivers/vpci/header.c | 43 +++++++++++++++++++++------------------
> xen/include/xen/vpci.h | 1 +
> 2 files changed, 24 insertions(+), 20 deletions(-)
>
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index 146915e28c50..20fe380552f4 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -117,11 +117,12 @@ static int cf_check map_range(
> * BAR's enable bit has changed with the memory decoding bit already enabled.
> * If rom_only is not set then it's the memory decoding bit that changed.
> */
> -static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
> - bool rom_only)
> +static void modify_decoding(const struct pci_dev *pdev,
> + struct vpci_map_task *task)
task wants to be const here, unless further patches require modifying
task inside of modify_decoding().
Thanks, Roger.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 4/4] vpci: allow 32-bit BAR writes with memory decoding enabled
2026-04-06 19:11 [PATCH v4 0/4] vpci: allow 32-bit BAR writes with memory decoding enabled Stewart Hildebrand
` (2 preceding siblings ...)
2026-04-06 19:11 ` [PATCH v4 3/4] vpci: allow BAR map/unmap without affecting memory decoding bit Stewart Hildebrand
@ 2026-04-06 19:11 ` Stewart Hildebrand
2026-04-21 13:34 ` Jan Beulich
2026-04-24 8:50 ` Roger Pau Monné
3 siblings, 2 replies; 14+ messages in thread
From: Stewart Hildebrand @ 2026-04-06 19:11 UTC (permalink / raw)
To: xen-devel; +Cc: Stewart Hildebrand, Roger Pau Monné
Currently, Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If
firmware initializes a 32-bit BAR to a bad address, Linux may try to
write a new address to the 32-bit BAR without disabling memory decoding.
Since Xen refuses such writes, the BAR (and thus PCI device) will be
non-functional.
Allow the hardware domain to issue 32-bit BAR writes with memory
decoding enabled. This increases the compatibility of PVH dom0 with more
hardware.
Note that Linux aims at disabling memory decoding before writing 64-bit
BARs. Continue to refuse 64-bit BAR writes in Xen while those BARs are
mapped for now to avoid mapping half-updated BARs in p2m.
Resolves: https://gitlab.com/xen-project/xen/-/issues/197
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
v3->v4:
* rebase on dynamically allocated map queue
v2->v3:
* minor tweaks for fixed number of map/unmap slots
v1->v2:
* rework on top of queued BAR map/unmap operation machinery
RFC->v1:
* keep memory decoding enabled in hardware
* allow write while memory decoding is enabled for 32-bit BARs only
* rework BAR mapping machinery to support unmap-then-map operation
---
xen/drivers/vpci/header.c | 32 +++++++++++++++++++++++---------
1 file changed, 23 insertions(+), 9 deletions(-)
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index 20fe380552f4..dc4f585b4e40 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -670,6 +670,7 @@ static void cf_check bar_write(
{
struct vpci_bar *bar = data;
bool hi = false;
+ uint16_t cmd = 0;
ASSERT(is_hardware_domain(pdev->domain));
@@ -683,19 +684,29 @@ static void cf_check bar_write(
val &= PCI_BASE_ADDRESS_MEM_MASK;
/*
- * Xen only cares whether the BAR is mapped into the p2m, so allow BAR
- * writes as long as the BAR is not mapped into the p2m.
+ * Allow 64-bit BAR writes only when the BAR is not mapped in p2m. Always
+ * allow 32-bit BAR writes.
*/
if ( bar->enabled )
{
- /* If the value written is the current one avoid printing a warning. */
- if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
- gprintk(XENLOG_WARNING,
- "%pp: ignored BAR %zu write while mapped\n",
- &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
- return;
- }
+ if ( bar->type == VPCI_BAR_MEM32 )
+ {
+ if ( val == bar->addr )
+ return;
+ cmd = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
+ modify_bars(pdev, cmd, false, false);
+ }
+ else
+ {
+ /* If the value written is the same avoid printing a warning. */
+ if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
+ gprintk(XENLOG_WARNING,
+ "%pp: ignored BAR %zu write while mapped\n",
+ &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
+ return;
+ }
+ }
/*
* Update the cached address, so that when memory decoding is enabled
@@ -715,6 +726,9 @@ static void cf_check bar_write(
}
pci_conf_write32(pdev->sbdf, reg, val);
+
+ if ( bar->enabled )
+ modify_bars(pdev, cmd, false, true);
}
static void cf_check guest_mem_bar_write(const struct pci_dev *pdev,
--
2.53.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v4 4/4] vpci: allow 32-bit BAR writes with memory decoding enabled
2026-04-06 19:11 ` [PATCH v4 4/4] vpci: allow 32-bit BAR writes with memory decoding enabled Stewart Hildebrand
@ 2026-04-21 13:34 ` Jan Beulich
2026-04-24 8:50 ` Roger Pau Monné
1 sibling, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2026-04-21 13:34 UTC (permalink / raw)
To: Stewart Hildebrand; +Cc: Roger Pau Monné, xen-devel
On 06.04.2026 21:11, Stewart Hildebrand wrote:
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -670,6 +670,7 @@ static void cf_check bar_write(
> {
> struct vpci_bar *bar = data;
> bool hi = false;
> + uint16_t cmd = 0;
>
> ASSERT(is_hardware_domain(pdev->domain));
>
> @@ -683,19 +684,29 @@ static void cf_check bar_write(
> val &= PCI_BASE_ADDRESS_MEM_MASK;
>
> /*
> - * Xen only cares whether the BAR is mapped into the p2m, so allow BAR
> - * writes as long as the BAR is not mapped into the p2m.
> + * Allow 64-bit BAR writes only when the BAR is not mapped in p2m. Always
> + * allow 32-bit BAR writes.
> */
> if ( bar->enabled )
> {
> - /* If the value written is the current one avoid printing a warning. */
> - if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
> - gprintk(XENLOG_WARNING,
> - "%pp: ignored BAR %zu write while mapped\n",
> - &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
> - return;
> - }
> + if ( bar->type == VPCI_BAR_MEM32 )
> + {
> + if ( val == bar->addr )
> + return;
>
> + cmd = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
> + modify_bars(pdev, cmd, false, false);
> + }
> + else
> + {
> + /* If the value written is the same avoid printing a warning. */
> + if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
> + gprintk(XENLOG_WARNING,
> + "%pp: ignored BAR %zu write while mapped\n",
> + &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
> + return;
> + }
> + }
>
> /*
> * Update the cached address, so that when memory decoding is enabled
> @@ -715,6 +726,9 @@ static void cf_check bar_write(
> }
>
> pci_conf_write32(pdev->sbdf, reg, val);
> +
> + if ( bar->enabled )
> + modify_bars(pdev, cmd, false, true);
> }
>
> static void cf_check guest_mem_bar_write(const struct pci_dev *pdev,
While this all looks plausible, isn't something similar needed in rom_write()
then as well?
Jan
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 4/4] vpci: allow 32-bit BAR writes with memory decoding enabled
2026-04-06 19:11 ` [PATCH v4 4/4] vpci: allow 32-bit BAR writes with memory decoding enabled Stewart Hildebrand
2026-04-21 13:34 ` Jan Beulich
@ 2026-04-24 8:50 ` Roger Pau Monné
2026-05-04 5:52 ` Jan Beulich
1 sibling, 1 reply; 14+ messages in thread
From: Roger Pau Monné @ 2026-04-24 8:50 UTC (permalink / raw)
To: Stewart Hildebrand; +Cc: xen-devel
On Mon, Apr 06, 2026 at 03:11:58PM -0400, Stewart Hildebrand wrote:
> Currently, Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If
> firmware initializes a 32-bit BAR to a bad address, Linux may try to
> write a new address to the 32-bit BAR without disabling memory decoding.
> Since Xen refuses such writes, the BAR (and thus PCI device) will be
> non-functional.
>
> Allow the hardware domain to issue 32-bit BAR writes with memory
> decoding enabled. This increases the compatibility of PVH dom0 with more
> hardware.
>
> Note that Linux aims at disabling memory decoding before writing 64-bit
> BARs. Continue to refuse 64-bit BAR writes in Xen while those BARs are
> mapped for now to avoid mapping half-updated BARs in p2m.
While Linux does the disabling for 64bit BARs, I wonder if at some we
will need to handle 64bit BAR writes anyway for other OSes, specially
with domU support.
> Resolves: https://gitlab.com/xen-project/xen/-/issues/197
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> ---
> v3->v4:
> * rebase on dynamically allocated map queue
>
> v2->v3:
> * minor tweaks for fixed number of map/unmap slots
>
> v1->v2:
> * rework on top of queued BAR map/unmap operation machinery
>
> RFC->v1:
> * keep memory decoding enabled in hardware
> * allow write while memory decoding is enabled for 32-bit BARs only
> * rework BAR mapping machinery to support unmap-then-map operation
> ---
> xen/drivers/vpci/header.c | 32 +++++++++++++++++++++++---------
> 1 file changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index 20fe380552f4..dc4f585b4e40 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -670,6 +670,7 @@ static void cf_check bar_write(
> {
> struct vpci_bar *bar = data;
> bool hi = false;
> + uint16_t cmd = 0;
>
> ASSERT(is_hardware_domain(pdev->domain));
>
> @@ -683,19 +684,29 @@ static void cf_check bar_write(
> val &= PCI_BASE_ADDRESS_MEM_MASK;
>
> /*
> - * Xen only cares whether the BAR is mapped into the p2m, so allow BAR
> - * writes as long as the BAR is not mapped into the p2m.
> + * Allow 64-bit BAR writes only when the BAR is not mapped in p2m. Always
> + * allow 32-bit BAR writes.
> */
> if ( bar->enabled )
> {
> - /* If the value written is the current one avoid printing a warning. */
> - if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
> - gprintk(XENLOG_WARNING,
> - "%pp: ignored BAR %zu write while mapped\n",
> - &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
> - return;
> - }
> + if ( bar->type == VPCI_BAR_MEM32 )
> + {
> + if ( val == bar->addr )
> + return;
>
> + cmd = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
> + modify_bars(pdev, cmd, false, false);
> + }
> + else
> + {
> + /* If the value written is the same avoid printing a warning. */
> + if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
> + gprintk(XENLOG_WARNING,
> + "%pp: ignored BAR %zu write while mapped\n",
> + &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
> + return;
> + }
> + }
>
> /*
> * Update the cached address, so that when memory decoding is enabled
> @@ -715,6 +726,9 @@ static void cf_check bar_write(
> }
>
> pci_conf_write32(pdev->sbdf, reg, val);
I don't think it matters a lot, but here we are changing the position
of the BAR in the host memory map while the mappings are still active.
> +
> + if ( bar->enabled )
> + modify_bars(pdev, cmd, false, true);
> }
>
> static void cf_check guest_mem_bar_write(const struct pci_dev *pdev,
As it's in context here - we possibly want to do something similar
with guest writes now that we can?
While we might not have a user-case for domUs ATM, I think it's vest
if we also fix that use case at the same time that dom0 is fixed
(unless there's something that prevents it).
Thanks, Roger.
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v4 4/4] vpci: allow 32-bit BAR writes with memory decoding enabled
2026-04-24 8:50 ` Roger Pau Monné
@ 2026-05-04 5:52 ` Jan Beulich
0 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2026-05-04 5:52 UTC (permalink / raw)
To: Roger Pau Monné; +Cc: xen-devel, Stewart Hildebrand
On 24.04.2026 10:50, Roger Pau Monné wrote:
> On Mon, Apr 06, 2026 at 03:11:58PM -0400, Stewart Hildebrand wrote:
>> --- a/xen/drivers/vpci/header.c
>> +++ b/xen/drivers/vpci/header.c
>> @@ -670,6 +670,7 @@ static void cf_check bar_write(
>> {
>> struct vpci_bar *bar = data;
>> bool hi = false;
>> + uint16_t cmd = 0;
>>
>> ASSERT(is_hardware_domain(pdev->domain));
>>
>> @@ -683,19 +684,29 @@ static void cf_check bar_write(
>> val &= PCI_BASE_ADDRESS_MEM_MASK;
>>
>> /*
>> - * Xen only cares whether the BAR is mapped into the p2m, so allow BAR
>> - * writes as long as the BAR is not mapped into the p2m.
>> + * Allow 64-bit BAR writes only when the BAR is not mapped in p2m. Always
>> + * allow 32-bit BAR writes.
>> */
>> if ( bar->enabled )
>> {
>> - /* If the value written is the current one avoid printing a warning. */
>> - if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
>> - gprintk(XENLOG_WARNING,
>> - "%pp: ignored BAR %zu write while mapped\n",
>> - &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
>> - return;
>> - }
>> + if ( bar->type == VPCI_BAR_MEM32 )
>> + {
>> + if ( val == bar->addr )
>> + return;
>>
>> + cmd = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
>> + modify_bars(pdev, cmd, false, false);
>> + }
>> + else
>> + {
>> + /* If the value written is the same avoid printing a warning. */
>> + if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
>> + gprintk(XENLOG_WARNING,
>> + "%pp: ignored BAR %zu write while mapped\n",
>> + &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
>> + return;
>> + }
>> + }
>>
>> /*
>> * Update the cached address, so that when memory decoding is enabled
>> @@ -715,6 +726,9 @@ static void cf_check bar_write(
>> }
>>
>> pci_conf_write32(pdev->sbdf, reg, val);
>
> I don't think it matters a lot, but here we are changing the position
> of the BAR in the host memory map while the mappings are still active.
It would matter if the original address space could be re-used for another
purpose while those mappings are still there?
Jan
^ permalink raw reply [flat|nested] 14+ messages in thread