[PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
@ 2026-02-09 11:34 Julian Vetter
  2026-02-09 13:16 ` Roger Pau Monné
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Julian Vetter @ 2026-02-09 11:34 UTC (permalink / raw)
  To: xen-devel; +Cc: Jan Beulich, Andrew Cooper, Roger Pau Monné, Julian Vetter

x2APIC guests with more than 128 vCPUs have APIC IDs above 255, but MSI
addresses and IO-APIC RTEs only provide an 8-bit destination field.
Without extended destination ID support, Linux limits the maximum usable
APIC ID to 255, refusing to bring up vCPUs beyond that limit. So,
advertise XEN_HVM_CPUID_EXT_DEST_ID in the HVM hypervisor CPUID leaf,
signalling that guests may use MSI address bits 11:5 and IO-APIC RTE
bits 55:49 as additional high destination ID bits. This expands the
destination ID from 8 to 15 bits.

Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
---
 xen/arch/x86/cpuid.c                   |  9 +++++++++
 xen/arch/x86/hvm/irq.c                 | 11 ++++++++++-
 xen/arch/x86/hvm/vioapic.c             |  2 +-
 xen/arch/x86/hvm/vmsi.c                |  4 ++--
 xen/arch/x86/include/asm/hvm/hvm.h     |  4 ++--
 xen/arch/x86/include/asm/hvm/vioapic.h | 13 +++++++++++++
 xen/arch/x86/include/asm/msi.h         |  3 +++
 7 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index d85be20d86..fb17c71d74 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -148,6 +148,15 @@ static void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
         res->a |= XEN_HVM_CPUID_DOMID_PRESENT;
         res->c = d->domain_id;
 
+        /*
+         * Advertise extended destination ID support. This allows guests to use
+         * bits 11:5 of the MSI address and bits 55:49 of the IO-APIC RTE for
+         * additional destination ID bits, expanding the addressable APIC ID
+         * range from 8 to 15 bits. This is required for x2APIC guests with
+         * APIC IDs > 255.
+         */
+        res->a |= XEN_HVM_CPUID_EXT_DEST_ID;
+
         /*
          * Per-vCPU event channel upcalls are implemented and work
          * correctly with PIRQs routed over event channels.
diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index 5f64361113..2cc14d37d4 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -374,7 +374,16 @@ int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq)
 int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data)
 {
     uint32_t tmp = (uint32_t) addr;
-    uint8_t  dest = (tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    /*
+     * Standard MSI destination: address bits 19:12 (8 bits).
+     * Extended MSI destination: address bits 11:5 (7 more bits).
+     * When XEN_HVM_CPUID_EXT_DEST_ID is advertised, the guest may use
+     * bits 11:5 for high destination ID bits, expanding to 15 bits total.
+     * For legacy guests these bits are 0, so this is backwards-compatible.
+     */
+    uint32_t dest =
+        (((tmp & MSI_ADDR_EXT_DEST_ID_MASK) >> MSI_ADDR_EXT_DEST_ID_SHIFT) << 8) |
+        ((tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT);
     uint8_t  dest_mode = !!(tmp & MSI_ADDR_DESTMODE_MASK);
     uint8_t  delivery_mode = (data & MSI_DATA_DELIVERY_MODE_MASK)
         >> MSI_DATA_DELIVERY_MODE_SHIFT;
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index 7c725f9e47..263b1bd5cb 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -411,7 +411,7 @@ static void ioapic_inj_irq(
 
 static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
 {
-    uint16_t dest = vioapic->redirtbl[pin].fields.dest_id;
+    uint32_t dest = VIOAPIC_RTE_DEST(vioapic->redirtbl[pin].bits);
     uint8_t dest_mode = vioapic->redirtbl[pin].fields.dest_mode;
     uint8_t delivery_mode = vioapic->redirtbl[pin].fields.delivery_mode;
     uint8_t vector = vioapic->redirtbl[pin].fields.vector;
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index 27b1f089e2..dca191b4f1 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -66,7 +66,7 @@ static void vmsi_inj_irq(
 
 int vmsi_deliver(
     struct domain *d, int vector,
-    uint8_t dest, uint8_t dest_mode,
+    uint32_t dest, uint8_t dest_mode,
     uint8_t delivery_mode, uint8_t trig_mode)
 {
     struct vlapic *target;
@@ -125,7 +125,7 @@ void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
 }
 
 /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
-int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
+int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t dest_mode)
 {
     int dest_vcpu_id = -1, w = 0;
     struct vcpu *v;
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 7d9774df59..11256d5e67 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -295,11 +295,11 @@ uint64_t hvm_get_guest_time_fixed(const struct vcpu *v, uint64_t at_tsc);
 
 int vmsi_deliver(
     struct domain *d, int vector,
-    uint8_t dest, uint8_t dest_mode,
+    uint32_t dest, uint8_t dest_mode,
     uint8_t delivery_mode, uint8_t trig_mode);
 struct hvm_pirq_dpci;
 void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci);
-int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode);
+int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t dest_mode);
 
 enum hvm_intblk
 hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack);
diff --git a/xen/arch/x86/include/asm/hvm/vioapic.h b/xen/arch/x86/include/asm/hvm/vioapic.h
index 68af6dce79..b49eb348d5 100644
--- a/xen/arch/x86/include/asm/hvm/vioapic.h
+++ b/xen/arch/x86/include/asm/hvm/vioapic.h
@@ -32,6 +32,19 @@
 #define VIOAPIC_EDGE_TRIG  0
 #define VIOAPIC_LEVEL_TRIG 1
 
+/*
+ * Extract the destination ID from a 64-bit IO-APIC RTE, including the
+ * extended bits (55:49) used when XEN_HVM_CPUID_EXT_DEST_ID is advertised.
+ */
+#define IO_APIC_REDIR_DEST_SHIFT        56
+#define IO_APIC_REDIR_DEST_MASK         0xffULL
+#define IO_APIC_REDIR_EXT_DEST_SHIFT    49
+#define IO_APIC_REDIR_EXT_DEST_MASK     0x7fULL
+
+#define VIOAPIC_RTE_DEST(rte) \
+    ((((rte) >> IO_APIC_REDIR_DEST_SHIFT) & IO_APIC_REDIR_DEST_MASK) | \
+     (((rte) >> IO_APIC_REDIR_EXT_DEST_SHIFT) & IO_APIC_REDIR_EXT_DEST_MASK) << 8)
+
 #define VIOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000U
 #define VIOAPIC_MEM_LENGTH            0x100
 
diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
index 00059d4a3a..b7a132e5b5 100644
--- a/xen/arch/x86/include/asm/msi.h
+++ b/xen/arch/x86/include/asm/msi.h
@@ -54,6 +54,9 @@
 #define	 MSI_ADDR_DEST_ID_MASK		0x00ff000
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
 
+#define MSI_ADDR_EXT_DEST_ID_SHIFT	5
+#define MSI_ADDR_EXT_DEST_ID_MASK	0x0000fe0
+
 /* MAX fixed pages reserved for mapping MSIX tables. */
 #define FIX_MSIX_MAX_PAGES              512
 
-- 
2.51.0



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-09 11:34 [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC Julian Vetter
@ 2026-02-09 13:16 ` Roger Pau Monné
  2026-02-19 12:44   ` Julian Vetter
  2026-02-09 13:40 ` Teddy Astie
  2026-02-09 14:10 ` Jan Beulich
  2 siblings, 1 reply; 10+ messages in thread
From: Roger Pau Monné @ 2026-02-09 13:16 UTC (permalink / raw)
  To: Julian Vetter; +Cc: xen-devel, Jan Beulich, Andrew Cooper

On Mon, Feb 09, 2026 at 11:34:18AM +0000, Julian Vetter wrote:
> x2APIC guests with more than 128 vCPUs have APIC IDs above 255, but MSI
> addresses and IO-APIC RTEs only provide an 8-bit destination field.
> Without extended destination ID support, Linux limits the maximum usable
> APIC ID to 255, refusing to bring up vCPUs beyond that limit. So,
> advertise XEN_HVM_CPUID_EXT_DEST_ID in the HVM hypervisor CPUID leaf,
> signalling that guests may use MSI address bits 11:5 and IO-APIC RTE
> bits 55:49 as additional high destination ID bits. This expands the
> destination ID from 8 to 15 bits.
> 
> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
> ---
>  xen/arch/x86/cpuid.c                   |  9 +++++++++
>  xen/arch/x86/hvm/irq.c                 | 11 ++++++++++-
>  xen/arch/x86/hvm/vioapic.c             |  2 +-
>  xen/arch/x86/hvm/vmsi.c                |  4 ++--
>  xen/arch/x86/include/asm/hvm/hvm.h     |  4 ++--
>  xen/arch/x86/include/asm/hvm/vioapic.h | 13 +++++++++++++
>  xen/arch/x86/include/asm/msi.h         |  3 +++
>  7 files changed, 40 insertions(+), 6 deletions(-)
> 
> diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
> index d85be20d86..fb17c71d74 100644
> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -148,6 +148,15 @@ static void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
>          res->a |= XEN_HVM_CPUID_DOMID_PRESENT;
>          res->c = d->domain_id;
>  
> +        /*
> +         * Advertise extended destination ID support. This allows guests to use
> +         * bits 11:5 of the MSI address and bits 55:49 of the IO-APIC RTE for
> +         * additional destination ID bits, expanding the addressable APIC ID
> +         * range from 8 to 15 bits. This is required for x2APIC guests with
> +         * APIC IDs > 255.
> +         */
> +        res->a |= XEN_HVM_CPUID_EXT_DEST_ID;

This cannot be unilaterally advertised: you need a QEMU (or in general
any device model that manages PCI passthrough) to understand the
extended destination mode.  This requires the introduction of
a new XEN_DOMCTL_bind_pt_irq equivalent hypercall, that can take an
extended destination ID not limited to 256 values:

struct xen_domctl_bind_pt_irq {
[...]
             uint32_t gflags;
#define XEN_DOMCTL_VMSI_X86_DEST_ID_MASK 0x0000ff

When doing PCI passthrough it's QEMU the entity that decodes the MSI
address and data fields, and hence would need expanding (and
negotiation with Xen) about whether the Extended ID feature can be
advertised.

It would be good to introduce a new XEN_DMOP_* set of hypercalls that
support Extended ID to do the PCI passthrough interrupt binding.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-09 13:16 ` Roger Pau Monné
@ 2026-02-19 12:44   ` Julian Vetter
  2026-03-06 13:40     ` Roger Pau Monné
  0 siblings, 1 reply; 10+ messages in thread
From: Julian Vetter @ 2026-02-19 12:44 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, Jan Beulich, Andrew Cooper



On 2/9/26 14:16, Roger Pau Monné wrote:
> On Mon, Feb 09, 2026 at 11:34:18AM +0000, Julian Vetter wrote:
>> x2APIC guests with more than 128 vCPUs have APIC IDs above 255, but MSI
>> addresses and IO-APIC RTEs only provide an 8-bit destination field.
>> Without extended destination ID support, Linux limits the maximum usable
>> APIC ID to 255, refusing to bring up vCPUs beyond that limit. So,
>> advertise XEN_HVM_CPUID_EXT_DEST_ID in the HVM hypervisor CPUID leaf,
>> signalling that guests may use MSI address bits 11:5 and IO-APIC RTE
>> bits 55:49 as additional high destination ID bits. This expands the
>> destination ID from 8 to 15 bits.
>>
>> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
>> ---
>>   xen/arch/x86/cpuid.c                   |  9 +++++++++
>>   xen/arch/x86/hvm/irq.c                 | 11 ++++++++++-
>>   xen/arch/x86/hvm/vioapic.c             |  2 +-
>>   xen/arch/x86/hvm/vmsi.c                |  4 ++--
>>   xen/arch/x86/include/asm/hvm/hvm.h     |  4 ++--
>>   xen/arch/x86/include/asm/hvm/vioapic.h | 13 +++++++++++++
>>   xen/arch/x86/include/asm/msi.h         |  3 +++
>>   7 files changed, 40 insertions(+), 6 deletions(-)
>>
>> diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
>> index d85be20d86..fb17c71d74 100644
>> --- a/xen/arch/x86/cpuid.c
>> +++ b/xen/arch/x86/cpuid.c
>> @@ -148,6 +148,15 @@ static void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
>>           res->a |= XEN_HVM_CPUID_DOMID_PRESENT;
>>           res->c = d->domain_id;
>>
>> +        /*
>> +         * Advertise extended destination ID support. This allows guests to use
>> +         * bits 11:5 of the MSI address and bits 55:49 of the IO-APIC RTE for
>> +         * additional destination ID bits, expanding the addressable APIC ID
>> +         * range from 8 to 15 bits. This is required for x2APIC guests with
>> +         * APIC IDs > 255.
>> +         */
>> +        res->a |= XEN_HVM_CPUID_EXT_DEST_ID;
>
> This cannot be unilaterally advertised: you need a QEMU (or in general
> any device model that manages PCI passthrough) to understand the
> extended destination mode.  This requires the introduction of
> a new XEN_DOMCTL_bind_pt_irq equivalent hypercall, that can take an
> extended destination ID not limited to 256 values:
>
> struct xen_domctl_bind_pt_irq {
> [...]
>               uint32_t gflags;
> #define XEN_DOMCTL_VMSI_X86_DEST_ID_MASK 0x0000ff
>
> When doing PCI passthrough it's QEMU the entity that decodes the MSI
> address and data fields, and hence would need expanding (and
> negotiation with Xen) about whether the Extended ID feature can be
> advertised.
>
> It would be good to introduce a new XEN_DMOP_* set of hypercalls that
> support Extended ID to do the PCI passthrough interrupt binding.

Thank you for your feedback. But wouldn't it be enough if QEMU extracts
the additional bits from the gflags and pass it on to XEN? In
pt_irq_create_bind I already extract the additional bits. In QEMU the
function msi_dest_id would just need to extract the additional bits
before calling xc_domain_update_msi_irq. The gflags argument in
xc_domain_update_msi_irq is 32Bits, so there is enough room to pass the
additional bits. What do you think?

Thank you
Julian

>
> Thanks, Roger.



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-19 12:44   ` Julian Vetter
@ 2026-03-06 13:40     ` Roger Pau Monné
  0 siblings, 0 replies; 10+ messages in thread
From: Roger Pau Monné @ 2026-03-06 13:40 UTC (permalink / raw)
  To: Julian Vetter; +Cc: xen-devel, Jan Beulich, Andrew Cooper

On Thu, Feb 19, 2026 at 12:44:47PM +0000, Julian Vetter wrote:
> 
> 
> On 2/9/26 14:16, Roger Pau Monné wrote:
> > On Mon, Feb 09, 2026 at 11:34:18AM +0000, Julian Vetter wrote:
> >> x2APIC guests with more than 128 vCPUs have APIC IDs above 255, but MSI
> >> addresses and IO-APIC RTEs only provide an 8-bit destination field.
> >> Without extended destination ID support, Linux limits the maximum usable
> >> APIC ID to 255, refusing to bring up vCPUs beyond that limit. So,
> >> advertise XEN_HVM_CPUID_EXT_DEST_ID in the HVM hypervisor CPUID leaf,
> >> signalling that guests may use MSI address bits 11:5 and IO-APIC RTE
> >> bits 55:49 as additional high destination ID bits. This expands the
> >> destination ID from 8 to 15 bits.
> >>
> >> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
> >> ---
> >>   xen/arch/x86/cpuid.c                   |  9 +++++++++
> >>   xen/arch/x86/hvm/irq.c                 | 11 ++++++++++-
> >>   xen/arch/x86/hvm/vioapic.c             |  2 +-
> >>   xen/arch/x86/hvm/vmsi.c                |  4 ++--
> >>   xen/arch/x86/include/asm/hvm/hvm.h     |  4 ++--
> >>   xen/arch/x86/include/asm/hvm/vioapic.h | 13 +++++++++++++
> >>   xen/arch/x86/include/asm/msi.h         |  3 +++
> >>   7 files changed, 40 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
> >> index d85be20d86..fb17c71d74 100644
> >> --- a/xen/arch/x86/cpuid.c
> >> +++ b/xen/arch/x86/cpuid.c
> >> @@ -148,6 +148,15 @@ static void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
> >>           res->a |= XEN_HVM_CPUID_DOMID_PRESENT;
> >>           res->c = d->domain_id;
> >>   
> >> +        /*
> >> +         * Advertise extended destination ID support. This allows guests to use
> >> +         * bits 11:5 of the MSI address and bits 55:49 of the IO-APIC RTE for
> >> +         * additional destination ID bits, expanding the addressable APIC ID
> >> +         * range from 8 to 15 bits. This is required for x2APIC guests with
> >> +         * APIC IDs > 255.
> >> +         */
> >> +        res->a |= XEN_HVM_CPUID_EXT_DEST_ID;
> > 
> > This cannot be unilaterally advertised: you need a QEMU (or in general
> > any device model that manages PCI passthrough) to understand the
> > extended destination mode.  This requires the introduction of
> > a new XEN_DOMCTL_bind_pt_irq equivalent hypercall, that can take an
> > extended destination ID not limited to 256 values:
> > 
> > struct xen_domctl_bind_pt_irq {
> > [...]
> >               uint32_t gflags;
> > #define XEN_DOMCTL_VMSI_X86_DEST_ID_MASK 0x0000ff
> > 
> > When doing PCI passthrough it's QEMU the entity that decodes the MSI
> > address and data fields, and hence would need expanding (and
> > negotiation with Xen) about whether the Extended ID feature can be
> > advertised.
> > 
> > It would be good to introduce a new XEN_DMOP_* set of hypercalls that
> > support Extended ID to do the PCI passthrough interrupt binding.
> 
> Thank you for your feedback. But wouldn't it be enough if QEMU extracts 
> the additional bits from the gflags and pass it on to XEN?

Possibly, you need to use the still unused 7 bits at the top of the
flags field AFAICT.

> In 
> pt_irq_create_bind I already extract the additional bits. In QEMU the 
> function msi_dest_id would just need to extract the additional bits 
> before calling xc_domain_update_msi_irq. The gflags argument in 
> xc_domain_update_msi_irq is 32Bits, so there is enough room to pass the 
> additional bits. What do you think?

It's possible.  However there's still a question of how does QEMU
signal Xen that it implements the extended destination logic?? QMEU
and Xen are two separate components, and Xen cannot unilaterally
advertise support for Extended IDs if QEMU doesn't actually implement
it.  You need some kind of negotiation between the device model and
Xen.

It would IMO be way better if we could simply avoid having to parse
the MSI address and data fields in QEMU, and just forward them to Xen.
Then Xen could interpret them in whatever format it wants, and there
would be no negotiation needed between QEMU and Xen.

XEN_DOMCTL_{un}bind_pt_irq hypercalls have no reason to be domctls, it
would be much better if we introduced equivalent DM ops, as that would
remove toe usage of two unstable hypercalls from QEMU and would
bring us closer to QEMU not being tied to running Xen version.  Hence
my recommendation to take this opportinity to introduce a new pair of
DM ops to replace those domctls.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-09 11:34 [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC Julian Vetter
  2026-02-09 13:16 ` Roger Pau Monné
@ 2026-02-09 13:40 ` Teddy Astie
  2026-02-19 13:08   ` Julian Vetter
  2026-02-09 14:10 ` Jan Beulich
  2 siblings, 1 reply; 10+ messages in thread
From: Teddy Astie @ 2026-02-09 13:40 UTC (permalink / raw)
  To: Julian Vetter, xen-devel; +Cc: Jan Beulich, Andrew Cooper, Roger Pau Monné

Hello,

Some comments, mostly code style, nothing functionnal.

Le 09/02/2026 à 12:36, Julian Vetter a écrit :
> x2APIC guests with more than 128 vCPUs have APIC IDs above 255, but MSI
> addresses and IO-APIC RTEs only provide an 8-bit destination field.
> Without extended destination ID support, Linux limits the maximum usable
> APIC ID to 255, refusing to bring up vCPUs beyond that limit. So,
> advertise XEN_HVM_CPUID_EXT_DEST_ID in the HVM hypervisor CPUID leaf,
> signalling that guests may use MSI address bits 11:5 and IO-APIC RTE
> bits 55:49 as additional high destination ID bits. This expands the
> destination ID from 8 to 15 bits.
>
> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
> ---
>   xen/arch/x86/cpuid.c                   |  9 +++++++++
>   xen/arch/x86/hvm/irq.c                 | 11 ++++++++++-
>   xen/arch/x86/hvm/vioapic.c             |  2 +-
>   xen/arch/x86/hvm/vmsi.c                |  4 ++--
>   xen/arch/x86/include/asm/hvm/hvm.h     |  4 ++--
>   xen/arch/x86/include/asm/hvm/vioapic.h | 13 +++++++++++++
>   xen/arch/x86/include/asm/msi.h         |  3 +++
>   7 files changed, 40 insertions(+), 6 deletions(-)
>
> diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
> index d85be20d86..fb17c71d74 100644
> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -148,6 +148,15 @@ static void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
>           res->a |= XEN_HVM_CPUID_DOMID_PRESENT;
>           res->c = d->domain_id;
>
> +        /*
> +         * Advertise extended destination ID support. This allows guests to use
> +         * bits 11:5 of the MSI address and bits 55:49 of the IO-APIC RTE for
> +         * additional destination ID bits, expanding the addressable APIC ID
> +         * range from 8 to 15 bits. This is required for x2APIC guests with
> +         * APIC IDs > 255.
> +         */
> +        res->a |= XEN_HVM_CPUID_EXT_DEST_ID;
> +
>           /*
>            * Per-vCPU event channel upcalls are implemented and work
>            * correctly with PIRQs routed over event channels.
> diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
> index 5f64361113..2cc14d37d4 100644
> --- a/xen/arch/x86/hvm/irq.c
> +++ b/xen/arch/x86/hvm/irq.c
> @@ -374,7 +374,16 @@ int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq)
>   int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data)
>   {
>       uint32_t tmp = (uint32_t) addr;
> -    uint8_t  dest = (tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
> +    /*
> +     * Standard MSI destination: address bits 19:12 (8 bits).
> +     * Extended MSI destination: address bits 11:5 (7 more bits).
> +     * When XEN_HVM_CPUID_EXT_DEST_ID is advertised, the guest may use
> +     * bits 11:5 for high destination ID bits, expanding to 15 bits total.

As we always advertise XEN_HVM_CPUID_EXT_DEST_ID, I would rather say

 > As XEN_HVM_CPUID_EXT_DEST_ID is advertised, ...

> +     * For legacy guests these bits are 0, so this is backwards-compatible.

"Guests unaware of this feature set these bits to 0, ..."

> +     */
> +    uint32_t dest =
> +        (((tmp & MSI_ADDR_EXT_DEST_ID_MASK) >> MSI_ADDR_EXT_DEST_ID_SHIFT) << 8) |
> +        ((tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT);

I wonder if we should introduce a macro like you did for IO-APIC
(VIOAPIC_RTE_DEST).

>       uint8_t  dest_mode = !!(tmp & MSI_ADDR_DESTMODE_MASK);
>       uint8_t  delivery_mode = (data & MSI_DATA_DELIVERY_MODE_MASK)
>           >> MSI_DATA_DELIVERY_MODE_SHIFT;
> diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
> index 7c725f9e47..263b1bd5cb 100644
> --- a/xen/arch/x86/hvm/vioapic.c
> +++ b/xen/arch/x86/hvm/vioapic.c
> @@ -411,7 +411,7 @@ static void ioapic_inj_irq(
>
>   static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>   {
> -    uint16_t dest = vioapic->redirtbl[pin].fields.dest_id;
> +    uint32_t dest = VIOAPIC_RTE_DEST(vioapic->redirtbl[pin].bits);

I would rather introduce a new field in vioapic_redir_entry for the
extended dest part; and compute dest from that and dest_id.

>       uint8_t dest_mode = vioapic->redirtbl[pin].fields.dest_mode;
>       uint8_t delivery_mode = vioapic->redirtbl[pin].fields.delivery_mode;
>       uint8_t vector = vioapic->redirtbl[pin].fields.vector;
> diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
> index 27b1f089e2..dca191b4f1 100644
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -66,7 +66,7 @@ static void vmsi_inj_irq(
>
>   int vmsi_deliver(
>       struct domain *d, int vector,
> -    uint8_t dest, uint8_t dest_mode,
> +    uint32_t dest, uint8_t dest_mode,
>       uint8_t delivery_mode, uint8_t trig_mode)
>   {
>       struct vlapic *target;
> @@ -125,7 +125,7 @@ void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
>   }
>
>   /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
> -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
> +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t dest_mode)
>   {
>       int dest_vcpu_id = -1, w = 0;
>       struct vcpu *v;
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
> index 7d9774df59..11256d5e67 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -295,11 +295,11 @@ uint64_t hvm_get_guest_time_fixed(const struct vcpu *v, uint64_t at_tsc);
>
>   int vmsi_deliver(
>       struct domain *d, int vector,
> -    uint8_t dest, uint8_t dest_mode,
> +    uint32_t dest, uint8_t dest_mode,
>       uint8_t delivery_mode, uint8_t trig_mode);
>   struct hvm_pirq_dpci;
>   void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci);
> -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode);
> +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t dest_mode);
>
>   enum hvm_intblk
>   hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack);
> diff --git a/xen/arch/x86/include/asm/hvm/vioapic.h b/xen/arch/x86/include/asm/hvm/vioapic.h
> index 68af6dce79..b49eb348d5 100644
> --- a/xen/arch/x86/include/asm/hvm/vioapic.h
> +++ b/xen/arch/x86/include/asm/hvm/vioapic.h
> @@ -32,6 +32,19 @@
>   #define VIOAPIC_EDGE_TRIG  0
>   #define VIOAPIC_LEVEL_TRIG 1
>
> +/*
> + * Extract the destination ID from a 64-bit IO-APIC RTE, including the
> + * extended bits (55:49) used when XEN_HVM_CPUID_EXT_DEST_ID is advertised.
> + */
> +#define IO_APIC_REDIR_DEST_SHIFT        56
> +#define IO_APIC_REDIR_DEST_MASK         0xffULL
> +#define IO_APIC_REDIR_EXT_DEST_SHIFT    49
> +#define IO_APIC_REDIR_EXT_DEST_MASK     0x7fULL
> +
> +#define VIOAPIC_RTE_DEST(rte) \
> +    ((((rte) >> IO_APIC_REDIR_DEST_SHIFT) & IO_APIC_REDIR_DEST_MASK) | \
> +     (((rte) >> IO_APIC_REDIR_EXT_DEST_SHIFT) & IO_APIC_REDIR_EXT_DEST_MASK) << 8)
> +
>   #define VIOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000U
>   #define VIOAPIC_MEM_LENGTH            0x100
>
> diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
> index 00059d4a3a..b7a132e5b5 100644
> --- a/xen/arch/x86/include/asm/msi.h
> +++ b/xen/arch/x86/include/asm/msi.h
> @@ -54,6 +54,9 @@
>   #define	 MSI_ADDR_DEST_ID_MASK		0x00ff000
>   #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
>
> +#define MSI_ADDR_EXT_DEST_ID_SHIFT	5
> +#define MSI_ADDR_EXT_DEST_ID_MASK	0x0000fe0
> +
>   /* MAX fixed pages reserved for mapping MSIX tables. */
>   #define FIX_MSIX_MAX_PAGES              512
>



--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-09 13:40 ` Teddy Astie
@ 2026-02-19 13:08   ` Julian Vetter
  2026-02-19 13:28     ` Jan Beulich
  0 siblings, 1 reply; 10+ messages in thread
From: Julian Vetter @ 2026-02-19 13:08 UTC (permalink / raw)
  To: Teddy Astie, xen-devel; +Cc: Jan Beulich, Andrew Cooper, Roger Pau Monné

On 2/9/26 14:40, Teddy Astie wrote:
> Hello,
>
> Some comments, mostly code style, nothing functionnal.
>
> Le 09/02/2026 à 12:36, Julian Vetter a écrit :
>> x2APIC guests with more than 128 vCPUs have APIC IDs above 255, but MSI
>> addresses and IO-APIC RTEs only provide an 8-bit destination field.
>> Without extended destination ID support, Linux limits the maximum usable
>> APIC ID to 255, refusing to bring up vCPUs beyond that limit. So,
>> advertise XEN_HVM_CPUID_EXT_DEST_ID in the HVM hypervisor CPUID leaf,
>> signalling that guests may use MSI address bits 11:5 and IO-APIC RTE
>> bits 55:49 as additional high destination ID bits. This expands the
>> destination ID from 8 to 15 bits.
>>
>> Signed-off-by: Julian Vetter <julian.vetter@vates.tech>
>> ---
>>    xen/arch/x86/cpuid.c                   |  9 +++++++++
>>    xen/arch/x86/hvm/irq.c                 | 11 ++++++++++-
>>    xen/arch/x86/hvm/vioapic.c             |  2 +-
>>    xen/arch/x86/hvm/vmsi.c                |  4 ++--
>>    xen/arch/x86/include/asm/hvm/hvm.h     |  4 ++--
>>    xen/arch/x86/include/asm/hvm/vioapic.h | 13 +++++++++++++
>>    xen/arch/x86/include/asm/msi.h         |  3 +++
>>    7 files changed, 40 insertions(+), 6 deletions(-)
>>
>> diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
>> index d85be20d86..fb17c71d74 100644
>> --- a/xen/arch/x86/cpuid.c
>> +++ b/xen/arch/x86/cpuid.c
>> @@ -148,6 +148,15 @@ static void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
>>            res->a |= XEN_HVM_CPUID_DOMID_PRESENT;
>>            res->c = d->domain_id;
>>
>> +        /*
>> +         * Advertise extended destination ID support. This allows guests to use
>> +         * bits 11:5 of the MSI address and bits 55:49 of the IO-APIC RTE for
>> +         * additional destination ID bits, expanding the addressable APIC ID
>> +         * range from 8 to 15 bits. This is required for x2APIC guests with
>> +         * APIC IDs > 255.
>> +         */
>> +        res->a |= XEN_HVM_CPUID_EXT_DEST_ID;
>> +
>>            /*
>>             * Per-vCPU event channel upcalls are implemented and work
>>             * correctly with PIRQs routed over event channels.
>> diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
>> index 5f64361113..2cc14d37d4 100644
>> --- a/xen/arch/x86/hvm/irq.c
>> +++ b/xen/arch/x86/hvm/irq.c
>> @@ -374,7 +374,16 @@ int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq)
>>    int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data)
>>    {
>>        uint32_t tmp = (uint32_t) addr;
>> -    uint8_t  dest = (tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
>> +    /*
>> +     * Standard MSI destination: address bits 19:12 (8 bits).
>> +     * Extended MSI destination: address bits 11:5 (7 more bits).
>> +     * When XEN_HVM_CPUID_EXT_DEST_ID is advertised, the guest may use
>> +     * bits 11:5 for high destination ID bits, expanding to 15 bits total.
>
> As we always advertise XEN_HVM_CPUID_EXT_DEST_ID, I would rather say
>
>   > As XEN_HVM_CPUID_EXT_DEST_ID is advertised, ...
>
>> +     * For legacy guests these bits are 0, so this is backwards-compatible.
>
> "Guests unaware of this feature set these bits to 0, ..."
>
>> +     */
>> +    uint32_t dest =
>> +        (((tmp & MSI_ADDR_EXT_DEST_ID_MASK) >> MSI_ADDR_EXT_DEST_ID_SHIFT) << 8) |
>> +        ((tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT);
>
> I wonder if we should introduce a macro like you did for IO-APIC
> (VIOAPIC_RTE_DEST).
>
>>        uint8_t  dest_mode = !!(tmp & MSI_ADDR_DESTMODE_MASK);
>>        uint8_t  delivery_mode = (data & MSI_DATA_DELIVERY_MODE_MASK)
>>            >> MSI_DATA_DELIVERY_MODE_SHIFT;
>> diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
>> index 7c725f9e47..263b1bd5cb 100644
>> --- a/xen/arch/x86/hvm/vioapic.c
>> +++ b/xen/arch/x86/hvm/vioapic.c
>> @@ -411,7 +411,7 @@ static void ioapic_inj_irq(
>>
>>    static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>>    {
>> -    uint16_t dest = vioapic->redirtbl[pin].fields.dest_id;
>> +    uint32_t dest = VIOAPIC_RTE_DEST(vioapic->redirtbl[pin].bits);
>
> I would rather introduce a new field in vioapic_redir_entry for the
> extended dest part; and compute dest from that and dest_id.

Here I have a question. This struct is a public ABI struct.
vioapic_redir_entry is defined in
xen/include/public/arch-x86/hvm/save.h. It's part of XENs VM
save/restore operation. It is used by libxc and toolstacks to migrate
VMs between hosts. Changing the struct might be undesirable? Yes, it
would make the code cleaner. having a bit entry for the extended dest
bits. What's the general opinion on this? With the VIOAPIC_RTE_DEST
macro I avoided touching this struct...

>
>>        uint8_t dest_mode = vioapic->redirtbl[pin].fields.dest_mode;
>>        uint8_t delivery_mode = vioapic->redirtbl[pin].fields.delivery_mode;
>>        uint8_t vector = vioapic->redirtbl[pin].fields.vector;
>> diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
>> index 27b1f089e2..dca191b4f1 100644
>> --- a/xen/arch/x86/hvm/vmsi.c
>> +++ b/xen/arch/x86/hvm/vmsi.c
>> @@ -66,7 +66,7 @@ static void vmsi_inj_irq(
>>
>>    int vmsi_deliver(
>>        struct domain *d, int vector,
>> -    uint8_t dest, uint8_t dest_mode,
>> +    uint32_t dest, uint8_t dest_mode,
>>        uint8_t delivery_mode, uint8_t trig_mode)
>>    {
>>        struct vlapic *target;
>> @@ -125,7 +125,7 @@ void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
>>    }
>>
>>    /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
>> -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
>> +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t dest_mode)
>>    {
>>        int dest_vcpu_id = -1, w = 0;
>>        struct vcpu *v;
>> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
>> index 7d9774df59..11256d5e67 100644
>> --- a/xen/arch/x86/include/asm/hvm/hvm.h
>> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
>> @@ -295,11 +295,11 @@ uint64_t hvm_get_guest_time_fixed(const struct vcpu *v, uint64_t at_tsc);
>>
>>    int vmsi_deliver(
>>        struct domain *d, int vector,
>> -    uint8_t dest, uint8_t dest_mode,
>> +    uint32_t dest, uint8_t dest_mode,
>>        uint8_t delivery_mode, uint8_t trig_mode);
>>    struct hvm_pirq_dpci;
>>    void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci);
>> -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode);
>> +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t dest_mode);
>>
>>    enum hvm_intblk
>>    hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack);
>> diff --git a/xen/arch/x86/include/asm/hvm/vioapic.h b/xen/arch/x86/include/asm/hvm/vioapic.h
>> index 68af6dce79..b49eb348d5 100644
>> --- a/xen/arch/x86/include/asm/hvm/vioapic.h
>> +++ b/xen/arch/x86/include/asm/hvm/vioapic.h
>> @@ -32,6 +32,19 @@
>>    #define VIOAPIC_EDGE_TRIG  0
>>    #define VIOAPIC_LEVEL_TRIG 1
>>
>> +/*
>> + * Extract the destination ID from a 64-bit IO-APIC RTE, including the
>> + * extended bits (55:49) used when XEN_HVM_CPUID_EXT_DEST_ID is advertised.
>> + */
>> +#define IO_APIC_REDIR_DEST_SHIFT        56
>> +#define IO_APIC_REDIR_DEST_MASK         0xffULL
>> +#define IO_APIC_REDIR_EXT_DEST_SHIFT    49
>> +#define IO_APIC_REDIR_EXT_DEST_MASK     0x7fULL
>> +
>> +#define VIOAPIC_RTE_DEST(rte) \
>> +    ((((rte) >> IO_APIC_REDIR_DEST_SHIFT) & IO_APIC_REDIR_DEST_MASK) | \
>> +     (((rte) >> IO_APIC_REDIR_EXT_DEST_SHIFT) & IO_APIC_REDIR_EXT_DEST_MASK) << 8)
>> +
>>    #define VIOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000U
>>    #define VIOAPIC_MEM_LENGTH            0x100
>>
>> diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
>> index 00059d4a3a..b7a132e5b5 100644
>> --- a/xen/arch/x86/include/asm/msi.h
>> +++ b/xen/arch/x86/include/asm/msi.h
>> @@ -54,6 +54,9 @@
>>    #define	 MSI_ADDR_DEST_ID_MASK		0x00ff000
>>    #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
>>
>> +#define MSI_ADDR_EXT_DEST_ID_SHIFT	5
>> +#define MSI_ADDR_EXT_DEST_ID_MASK	0x0000fe0
>> +
>>    /* MAX fixed pages reserved for mapping MSIX tables. */
>>    #define FIX_MSIX_MAX_PAGES              512
>>
>
>
>
> --
> Teddy Astie | Vates XCP-ng Developer
>
> XCP-ng & Xen Orchestra - Vates solutions
>
> web: https://vates.tech



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-19 13:08   ` Julian Vetter
@ 2026-02-19 13:28     ` Jan Beulich
  2026-02-19 13:52       ` Julian Vetter
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2026-02-19 13:28 UTC (permalink / raw)
  To: Julian Vetter; +Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie, xen-devel

On 19.02.2026 14:08, Julian Vetter wrote:
> On 2/9/26 14:40, Teddy Astie wrote:
>> Le 09/02/2026 à 12:36, Julian Vetter a écrit :
>>> --- a/xen/arch/x86/hvm/vioapic.c
>>> +++ b/xen/arch/x86/hvm/vioapic.c
>>> @@ -411,7 +411,7 @@ static void ioapic_inj_irq(
>>>    
>>>    static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>>>    {
>>> -    uint16_t dest = vioapic->redirtbl[pin].fields.dest_id;
>>> +    uint32_t dest = VIOAPIC_RTE_DEST(vioapic->redirtbl[pin].bits);
>>
>> I would rather introduce a new field in vioapic_redir_entry for the
>> extended dest part; and compute dest from that and dest_id.
> 
> Here I have a question. This struct is a public ABI struct. 
> vioapic_redir_entry is defined in 
> xen/include/public/arch-x86/hvm/save.h. It's part of XENs VM 
> save/restore operation. It is used by libxc and toolstacks to migrate 
> VMs between hosts. Changing the struct might be undesirable? Yes, it 
> would make the code cleaner. having a bit entry for the extended dest 
> bits. What's the general opinion on this? With the VIOAPIC_RTE_DEST 
> macro I avoided touching this struct...

Which in turn raises the question: How do you migrate those bits? It looks
like you're losing them.

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-19 13:28     ` Jan Beulich
@ 2026-02-19 13:52       ` Julian Vetter
  2026-02-19 15:10         ` Jan Beulich
  0 siblings, 1 reply; 10+ messages in thread
From: Julian Vetter @ 2026-02-19 13:52 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie, xen-devel



On 2/19/26 14:28, Jan Beulich wrote:
> On 19.02.2026 14:08, Julian Vetter wrote:
>> On 2/9/26 14:40, Teddy Astie wrote:
>>> Le 09/02/2026 à 12:36, Julian Vetter a écrit :
>>>> --- a/xen/arch/x86/hvm/vioapic.c
>>>> +++ b/xen/arch/x86/hvm/vioapic.c
>>>> @@ -411,7 +411,7 @@ static void ioapic_inj_irq(
>>>>
>>>>     static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>>>>     {
>>>> -    uint16_t dest = vioapic->redirtbl[pin].fields.dest_id;
>>>> +    uint32_t dest = VIOAPIC_RTE_DEST(vioapic->redirtbl[pin].bits);
>>>
>>> I would rather introduce a new field in vioapic_redir_entry for the
>>> extended dest part; and compute dest from that and dest_id.
>>
>> Here I have a question. This struct is a public ABI struct.
>> vioapic_redir_entry is defined in
>> xen/include/public/arch-x86/hvm/save.h. It's part of XENs VM
>> save/restore operation. It is used by libxc and toolstacks to migrate
>> VMs between hosts. Changing the struct might be undesirable? Yes, it
>> would make the code cleaner. having a bit entry for the extended dest
>> bits. What's the general opinion on this? With the VIOAPIC_RTE_DEST
>> macro I avoided touching this struct...
>
> Which in turn raises the question: How do you migrate those bits? It looks
> like you're losing them.

The bits are preserved correctly. The migration saves/restores the full
uint64_t bit union member, not the individual bit fields. But, the issue
is maybe in the other direction? If we migrate a VM from a new XEN
(where the guest has programmed extended dest IDs into RTEs) to an old
XEN that doesn't understand them. The raw bits would be restored
correctly into bits, but the old XENs vioapic_deliver only reads
fields.dest_id (8 bits) and would silently ignore the extended bits. The
guest would end up with broken interrupt routing to vCPUs with APIC ID >
255. But there is not much we can do?!

>
> Jan



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-19 13:52       ` Julian Vetter
@ 2026-02-19 15:10         ` Jan Beulich
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2026-02-19 15:10 UTC (permalink / raw)
  To: Julian Vetter; +Cc: Andrew Cooper, Roger Pau Monné, Teddy Astie, xen-devel

On 19.02.2026 14:52, Julian Vetter wrote:
> 
> 
> On 2/19/26 14:28, Jan Beulich wrote:
>> On 19.02.2026 14:08, Julian Vetter wrote:
>>> On 2/9/26 14:40, Teddy Astie wrote:
>>>> Le 09/02/2026 à 12:36, Julian Vetter a écrit :
>>>>> --- a/xen/arch/x86/hvm/vioapic.c
>>>>> +++ b/xen/arch/x86/hvm/vioapic.c
>>>>> @@ -411,7 +411,7 @@ static void ioapic_inj_irq(
>>>>>     
>>>>>     static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>>>>>     {
>>>>> -    uint16_t dest = vioapic->redirtbl[pin].fields.dest_id;
>>>>> +    uint32_t dest = VIOAPIC_RTE_DEST(vioapic->redirtbl[pin].bits);
>>>>
>>>> I would rather introduce a new field in vioapic_redir_entry for the
>>>> extended dest part; and compute dest from that and dest_id.
>>>
>>> Here I have a question. This struct is a public ABI struct.
>>> vioapic_redir_entry is defined in
>>> xen/include/public/arch-x86/hvm/save.h. It's part of XENs VM
>>> save/restore operation. It is used by libxc and toolstacks to migrate
>>> VMs between hosts. Changing the struct might be undesirable? Yes, it
>>> would make the code cleaner. having a bit entry for the extended dest
>>> bits. What's the general opinion on this? With the VIOAPIC_RTE_DEST
>>> macro I avoided touching this struct...
>>
>> Which in turn raises the question: How do you migrate those bits? It looks
>> like you're losing them.
> 
> The bits are preserved correctly. The migration saves/restores the full 
> uint64_t bit union member, not the individual bit fields.

Which is, aiui, only because for the IO-APIC we haven't introduced "check"
hooks, yet. Which, yes, ...

> But, the issue 
> is maybe in the other direction? If we migrate a VM from a new XEN 
> (where the guest has programmed extended dest IDs into RTEs) to an old 
> XEN that doesn't understand them. The raw bits would be restored 
> correctly into bits, but the old XENs vioapic_deliver only reads 
> fields.dest_id (8 bits) and would silently ignore the extended bits. The 
> guest would end up with broken interrupt routing to vCPUs with APIC ID > 
> 255.

... goes along of what you're saying here.

> But there is not much we can do?!

The bits need explicitly migrating (which may well be as part of the RTE,
as you describe), with checking as mentioned above in place. That checking
would refuse the migration prior to your patch (or when the feature is
disabled for the guest), and allow it through afterwards (when the feature
is enabled for the guest).

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC
  2026-02-09 11:34 [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC Julian Vetter
  2026-02-09 13:16 ` Roger Pau Monné
  2026-02-09 13:40 ` Teddy Astie
@ 2026-02-09 14:10 ` Jan Beulich
  2 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2026-02-09 14:10 UTC (permalink / raw)
  To: Julian Vetter; +Cc: Andrew Cooper, Roger Pau Monné, xen-devel

On 09.02.2026 12:34, Julian Vetter wrote:
> --- a/xen/arch/x86/include/asm/hvm/vioapic.h
> +++ b/xen/arch/x86/include/asm/hvm/vioapic.h
> @@ -32,6 +32,19 @@
>  #define VIOAPIC_EDGE_TRIG  0
>  #define VIOAPIC_LEVEL_TRIG 1
>  
> +/*
> + * Extract the destination ID from a 64-bit IO-APIC RTE, including the
> + * extended bits (55:49) used when XEN_HVM_CPUID_EXT_DEST_ID is advertised.
> + */
> +#define IO_APIC_REDIR_DEST_SHIFT        56
> +#define IO_APIC_REDIR_DEST_MASK         0xffULL
> +#define IO_APIC_REDIR_EXT_DEST_SHIFT    49
> +#define IO_APIC_REDIR_EXT_DEST_MASK     0x7fULL

Can we please stop introducing pairs of mask and shift values? Already these
versus ...

> --- a/xen/arch/x86/include/asm/msi.h
> +++ b/xen/arch/x86/include/asm/msi.h
> @@ -54,6 +54,9 @@
>  #define	 MSI_ADDR_DEST_ID_MASK		0x00ff000
>  #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
>  
> +#define MSI_ADDR_EXT_DEST_ID_SHIFT	5
> +#define MSI_ADDR_EXT_DEST_ID_MASK	0x0000fe0

... this shows one of the problems: It's never clear whether the mask is
intended to be applied to the shifted or un-shifted value. Like you have it
down here, the mask constant alone is enough. You use it with MASK_EXTR()
and MASK_INSR().

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-03-06 13:40 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-09 11:34 [PATCH] x86/hvm: Advertise and support extended destination IDs for MSI/IO-APIC Julian Vetter
2026-02-09 13:16 ` Roger Pau Monné
2026-02-19 12:44   ` Julian Vetter
2026-03-06 13:40     ` Roger Pau Monné
2026-02-09 13:40 ` Teddy Astie
2026-02-19 13:08   ` Julian Vetter
2026-02-19 13:28     ` Jan Beulich
2026-02-19 13:52       ` Julian Vetter
2026-02-19 15:10         ` Jan Beulich
2026-02-09 14:10 ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.