* [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
@ 2023-11-02 14:35 Kunkun Jiang
2023-11-04 10:29 ` Marc Zyngier
0 siblings, 1 reply; 10+ messages in thread
From: Kunkun Jiang @ 2023-11-02 14:35 UTC (permalink / raw)
To: Marc Zyngier, Oliver Upton, James Morse, Suzuki K Poulose,
Zenghui Yu, Catalin Marinas, Will Deacon, Gavin Shan,
Jean-Philippe Brucker, open list:IRQCHIP DRIVERS
Cc: linux-arm-kernel, kvmarm, wanghaibin.wang, Kunkun Jiang
In some scenarios, the guest virtio-pci driver will request two MSI-X,
one vector for config, one shared for queues. However, the host driver
(vDPA or VFIO) will request a vector for each queue.
In the current implementation of GICv4/4.1 direct injection of vLPI,
pINTID and vINTID have one-to-one correspondence. Therefore, the
above scenario cannot be handled correctly. The host kernel will
execute its_map_vlpi multiple times but only execute its_unmap_vlpi
once. This may cause guest hang[1].
| WARN_ON(!(irq->hw && irq->host_irq == virq));
| if (irq->hw) {
| atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
| irq->hw = false;
| ret = its_unmap_vlpi(virq);
| }
Add a list to struct vgic_irq to record all host irqs mapped to the vlpi.
When performing an action on the vlpi, traverse the list and perform this
action on all host irqs.
Link: https://lore.kernel.org/all/0d9fdf42-76b1-afc6-85a9-159c5490bbd4@huawei.com/#t
Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
---
arch/arm64/kvm/vgic/vgic-its.c | 74 ++++++++++++++++++++++++----------
arch/arm64/kvm/vgic/vgic-v4.c | 37 ++++++++++++++---
include/kvm/arm_vgic.h | 7 ++++
3 files changed, 91 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 5fe2365a629f..c4b453155fcf 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -54,6 +54,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
INIT_LIST_HEAD(&irq->lpi_list);
INIT_LIST_HEAD(&irq->ap_list);
+ INIT_LIST_HEAD(&irq->host_irq_head);
raw_spin_lock_init(&irq->irq_lock);
irq->config = VGIC_CONFIG_EDGE;
@@ -61,6 +62,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
irq->intid = intid;
irq->target_vcpu = vcpu;
irq->group = 1;
+ atomic_set(&irq->map_count, 0);
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
@@ -284,6 +286,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
u8 prop;
int ret;
unsigned long flags;
+ struct vlpi_host_irq *vlpi_hirq;
ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET,
&prop, 1);
@@ -305,8 +308,13 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- if (irq->hw)
- return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
+ if (irq->hw) {
+ list_for_each_entry(vlpi_hirq, &irq->host_irq_head, host_irq_list) {
+ ret = its_prop_update_vlpi(vlpi_hirq->host_irq, prop, needs_inv);
+ if (ret)
+ return ret;
+ }
+ }
return 0;
}
@@ -354,25 +362,31 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
{
int ret = 0;
- unsigned long flags;
+ unsigned long flags, counts;
+ struct vlpi_host_irq *vlpi_hirq;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->target_vcpu = vcpu;
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
if (irq->hw) {
- struct its_vlpi_map map;
+ counts = atomic_read(&irq->map_count);
+ list_for_each_entry(vlpi_hirq, &irq->host_irq_head, host_irq_list) {
+ struct its_vlpi_map map;
- ret = its_get_vlpi(irq->host_irq, &map);
- if (ret)
- return ret;
+ ret = its_get_vlpi(vlpi_hirq->host_irq, &map);
+ if (ret)
+ return ret;
- if (map.vpe)
- atomic_dec(&map.vpe->vlpi_count);
- map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
- atomic_inc(&map.vpe->vlpi_count);
+ counts--;
+ if (map.vpe && !counts)
+ atomic_dec(&map.vpe->vlpi_count);
+ map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ if (!counts)
+ atomic_inc(&map.vpe->vlpi_count);
- ret = its_map_vlpi(irq->host_irq, &map);
+ ret = its_map_vlpi(vlpi_hirq->host_irq, &map);
+ }
}
return ret;
@@ -731,6 +745,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid)
{
struct vgic_irq *irq = NULL;
+ struct vlpi_host_irq *vlpi_hirq;
unsigned long flags;
int err;
@@ -738,9 +753,15 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
if (err)
return err;
- if (irq->hw)
- return irq_set_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING, true);
+ if (irq->hw) {
+ list_for_each_entry(vlpi_hirq, &irq->host_irq_head, host_irq_list) {
+ err = irq_set_irqchip_state(vlpi_hirq->host_irq,
+ IRQCHIP_STATE_PENDING, true);
+ if (err)
+ return err;
+ return 0;
+ }
+ }
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
@@ -806,12 +827,17 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
/* Requires the its_lock to be held. */
static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
{
+ struct vlpi_host_irq *vlpi_hirq;
+
list_del(&ite->ite_list);
/* This put matches the get in vgic_add_lpi. */
if (ite->irq) {
- if (ite->irq->hw)
- WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
+ if (ite->irq->hw) {
+ list_for_each_entry(vlpi_hirq, &ite->irq->host_irq_head, host_irq_list) {
+ WARN_ON(its_unmap_vlpi(vlpi_hirq->host_irq));
+ }
+ }
vgic_put_irq(kvm, ite->irq);
}
@@ -1290,7 +1316,8 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
u32 device_id = its_cmd_get_deviceid(its_cmd);
u32 event_id = its_cmd_get_id(its_cmd);
struct its_ite *ite;
-
+ struct vlpi_host_irq *vlpi_hirq;
+ int ret;
ite = find_ite(its, device_id, event_id);
if (!ite)
@@ -1298,9 +1325,14 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
ite->irq->pending_latch = false;
- if (ite->irq->hw)
- return irq_set_irqchip_state(ite->irq->host_irq,
- IRQCHIP_STATE_PENDING, false);
+ if (ite->irq->hw) {
+ list_for_each_entry(vlpi_hirq, &ite->irq->host_irq_head, host_irq_list) {
+ ret = irq_set_irqchip_state(vlpi_hirq->host_irq,
+ IRQCHIP_STATE_PENDING, false);
+ if (ret)
+ return ret;
+ }
+ }
return 0;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 339a55194b2c..d634ba3dd225 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -413,6 +413,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
{
struct vgic_its *its;
struct vgic_irq *irq;
+ struct vlpi_host_irq *vlpi_hirq;
struct its_vlpi_map map;
unsigned long flags;
int ret;
@@ -456,9 +457,19 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
if (ret)
goto out;
- irq->hw = true;
- irq->host_irq = virq;
- atomic_inc(&map.vpe->vlpi_count);
+ vlpi_hirq = kzalloc(sizeof(struct vlpi_host_irq), GFP_KERNEL_ACCOUNT);
+ if (!vlpi_hirq)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&vlpi_hirq->host_irq_list);
+ vlpi_hirq->host_irq = virq;
+ list_add_tail(&vlpi_hirq->host_irq_list, &irq->host_irq_head);
+
+ if (!atomic_read(&irq->map_count)) {
+ irq->hw = true;
+ atomic_inc(&map.vpe->vlpi_count);
+ }
+ atomic_inc(&irq->map_count);
/* Transfer pending state */
raw_spin_lock_irqsave(&irq->irq_lock, flags);
@@ -488,6 +499,8 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
{
struct vgic_its *its;
struct vgic_irq *irq;
+ struct vlpi_host_irq *vlpi_hirq;
+ struct its_vpe *vpe;
int ret;
if (!vgic_supports_direct_msis(kvm))
@@ -508,10 +521,22 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
if (ret)
goto out;
- WARN_ON(!(irq->hw && irq->host_irq == virq));
+ WARN_ON(!(irq->hw));
if (irq->hw) {
- atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
- irq->hw = false;
+ list_for_each_entry(vlpi_hirq, &irq->host_irq_head, host_irq_list) {
+ if (vlpi_hirq->host_irq == virq) {
+ list_del(&vlpi_hirq->host_irq_list);
+ kfree(vlpi_hirq);
+ break;
+ }
+ }
+
+ atomic_dec(&irq->map_count);
+ if (!atomic_read(&irq->map_count)) {
+ vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ atomic_dec(&vpe->vlpi_count);
+ irq->hw = false;
+ }
ret = its_unmap_vlpi(virq);
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 5b27f94d4fad..2b8f25d1eff2 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -114,6 +114,11 @@ struct irq_ops {
bool (*get_input_level)(int vintid);
};
+struct vlpi_host_irq {
+ struct list_head host_irq_list;
+ unsigned int host_irq;
+};
+
struct vgic_irq {
raw_spinlock_t irq_lock; /* Protects the content of the struct */
struct list_head lpi_list; /* Used to link all LPIs together */
@@ -138,6 +143,8 @@ struct vgic_irq {
bool active; /* not used for LPIs */
bool enabled;
bool hw; /* Tied to HW IRQ */
+ atomic_t map_count; /* record vLPI map times */
+ struct list_head host_irq_head; /* record host irqs list of a vLPI */
struct kref refcount; /* Used for LPIs */
u32 hwintid; /* HW INTID number */
unsigned int host_irq; /* linux irq corresponding to hwintid */
--
2.33.0
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
2023-11-02 14:35 [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI Kunkun Jiang
@ 2023-11-04 10:29 ` Marc Zyngier
[not found] ` <1fb8353e-e9c4-2570-c2ca-ec537c18ac4d@huawei.com>
0 siblings, 1 reply; 10+ messages in thread
From: Marc Zyngier @ 2023-11-04 10:29 UTC (permalink / raw)
To: Kunkun Jiang
Cc: Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
Catalin Marinas, Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
On Thu, 02 Nov 2023 14:35:07 +0000,
Kunkun Jiang <jiangkunkun@huawei.com> wrote:
>
> In some scenarios, the guest virtio-pci driver will request two MSI-X,
> one vector for config, one shared for queues. However, the host driver
> (vDPA or VFIO) will request a vector for each queue.
Well, VFIO will request *all* available MSI-X. It doesn't know what a
queue is.
>
> In the current implementation of GICv4/4.1 direct injection of vLPI,
> pINTID and vINTID have one-to-one correspondence. Therefore, the
This matching is a hard requirement that matches the architecture. You
cannot change it.
> above scenario cannot be handled correctly. The host kernel will
> execute its_map_vlpi multiple times but only execute its_unmap_vlpi
> once. This may cause guest hang[1].
Why does it hang? As far as it is concerned, it has unmapped the
interrupts it cares about. Where are the calls to its_map_vlpi()
coming from? It should only occur if the guest actively programs the
MSI-X registers. What is your VMM? How can I reproduce this issue?
>
> | WARN_ON(!(irq->hw && irq->host_irq == virq));
> | if (irq->hw) {
> | atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
> | irq->hw = false;
> | ret = its_unmap_vlpi(virq);
> | }
>
> Add a list to struct vgic_irq to record all host irqs mapped to the vlpi.
> When performing an action on the vlpi, traverse the list and perform this
> action on all host irqs.
This makes no sense. You are blindly associating multiple host
interrupts with a single guest interrupt. This is a blatant violation
of the architecture. When unmapping a VLPI from a guest, only this one
should be turned again into an LPI. Not two, not all, just this one.
Maybe you have found an actual issue, but this patch is absolutely
unacceptable. Please fully describe the problem, provide traces, and
if possible a reproducer.
>
> Link: https://lore.kernel.org/all/0d9fdf42-76b1-afc6-85a9-159c5490bbd4@huawei.com/#t
I tried to parse this, but it hardly makes sense either. You seem to
imply that the host driver pre-configures the device, which is
completely wrong. The host driver (VFIO) should simply request all
possible physical LPIs, and that's all. It is expected that this
requesting has no other effect on the HW. Also, since your guest
driver only configures a single vLPI, there should be only a single
its_map_vlpi() call.
So it seems to me that your HW and SW are doing things that are not
expected at all.
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
[not found] ` <1fb8353e-e9c4-2570-c2ca-ec537c18ac4d@huawei.com>
@ 2023-11-06 15:33 ` Marc Zyngier
2023-11-08 9:45 ` Kunkun Jiang
0 siblings, 1 reply; 10+ messages in thread
From: Marc Zyngier @ 2023-11-06 15:33 UTC (permalink / raw)
To: Kunkun Jiang
Cc: Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
Catalin Marinas, Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
On Mon, 06 Nov 2023 14:59:01 +0000,
Kunkun Jiang <jiangkunkun@huawei.com> wrote:
>
> The virtio-pci driver write entry1-6
> massage.data in the msix-table and trap to QEMU for processing. The
> massage.data is as follow:
> > entry-0 0
> > entry-1 1
> > entry-2 1
> > entry-3 1
> > entry-4 1
> > entry-5 1
> > entry-6 1
Urgh... is vp_modern_queue_vector() used in your configuration? This
is ... terrible.
I wonder if PCIe actually allows this sort of thing.
In any case, this sort of behaviour breaks so many thing in KVM's
implementation that I'd recommend you disable GICv4 until we have a
good solution for that.
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
2023-11-06 15:33 ` Marc Zyngier
@ 2023-11-08 9:45 ` Kunkun Jiang
2023-12-02 12:20 ` Marc Zyngier
0 siblings, 1 reply; 10+ messages in thread
From: Kunkun Jiang @ 2023-11-08 9:45 UTC (permalink / raw)
To: Marc Zyngier, dongli.zhang, cohuck, jasowang, stefanha, mst
Cc: Oliver Upton, James Morse, Suzuki K Poulose, Zenghui Yu,
Catalin Marinas, Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
Hi Marc,
On 2023/11/6 23:33, Marc Zyngier wrote:
> On Mon, 06 Nov 2023 14:59:01 +0000,
> Kunkun Jiang <jiangkunkun@huawei.com> wrote:
>> The virtio-pci driver write entry1-6
>> massage.data in the msix-table and trap to QEMU for processing. The
>> massage.data is as follow:
>>> entry-0 0
>>> entry-1 1
>>> entry-2 1
>>> entry-3 1
>>> entry-4 1
>>> entry-5 1
>>> entry-6 1
> Urgh... is vp_modern_queue_vector() used in your configuration? This
> is ... terrible.
I encountered this problem using the 4.19 version kernel, but not the
5.10 version. This vp_modern_queue_vector() function does not exist
in 4.19, but it uses 'vp_iowrite16(msix_vec, &cfg->queue_msix_vector)',
the same as vp_modern_queue_vector().
In the past two days, I learned about the virtio driver and made some
new discoveries. When 'num_queues' is greater than maxcpus, it will
fall back into MSI-X with one shared for queues. The two patches[1],
submitted by Dongli, limits the number of hw queues used by
virtio-blk/virtio-scsi by 'nr_cpu_ids'. The two patches were merged
in 5.1-rc2. And the patch related virtio-blk was merged into the 4.19
stable branch.The patch related virtio-scsi was not merged.
[1]
https://lore.kernel.org/all/1553682995-5682-1-git-send-email-dongli.zhang@oracle.com/
This is the earliest discussion.
https://lore.kernel.org/all/e4afe4c5-0262-4500-aeec-60f30734b4fc@default/
I don't know if there are other circumstances that would cause it to
fall back into MSI-X with one shared for queues. At least the hack
method is possible.
> I wonder if PCIe actually allows this sort of thing.
Do you think the virtio driver should be modified?
> In any case, this sort of behaviour breaks so many thing in KVM's
> implementation that I'd recommend you disable GICv4 until we have a
> good solution for that.
There seems to be no restriction in the GIC specification that multiple
host irqs cannot be mapped to the same vlpi. Or maybe I didn't notice.
Do you think there are any risks?
GICv3 does not have this issue, but is this configuration legal?
Thanks,
Kunkun Jiang
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
2023-11-08 9:45 ` Kunkun Jiang
@ 2023-12-02 12:20 ` Marc Zyngier
2023-12-04 8:39 ` Jason Wang
0 siblings, 1 reply; 10+ messages in thread
From: Marc Zyngier @ 2023-12-02 12:20 UTC (permalink / raw)
To: Kunkun Jiang
Cc: dongli.zhang, cohuck, jasowang, stefanha, mst, Oliver Upton,
James Morse, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
Hi Kunkun,
On Wed, 08 Nov 2023 09:45:51 +0000,
Kunkun Jiang <jiangkunkun@huawei.com> wrote:
>
> Hi Marc,
>
> On 2023/11/6 23:33, Marc Zyngier wrote:
> > On Mon, 06 Nov 2023 14:59:01 +0000,
> > Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> >> The virtio-pci driver write entry1-6
> >> massage.data in the msix-table and trap to QEMU for processing. The
> >> massage.data is as follow:
> >>> entry-0 0
> >>> entry-1 1
> >>> entry-2 1
> >>> entry-3 1
> >>> entry-4 1
> >>> entry-5 1
> >>> entry-6 1
> > Urgh... is vp_modern_queue_vector() used in your configuration? This
> > is ... terrible.
> I encountered this problem using the 4.19 version kernel, but not the
> 5.10 version. This vp_modern_queue_vector() function does not exist
> in 4.19, but it uses 'vp_iowrite16(msix_vec, &cfg->queue_msix_vector)',
> the same as vp_modern_queue_vector().
>
> In the past two days, I learned about the virtio driver and made some
> new discoveries. When 'num_queues' is greater than maxcpus, it will
> fall back into MSI-X with one shared for queues. The two patches[1],
> submitted by Dongli, limits the number of hw queues used by
> virtio-blk/virtio-scsi by 'nr_cpu_ids'. The two patches were merged
> in 5.1-rc2. And the patch related virtio-blk was merged into the 4.19
> stable branch.The patch related virtio-scsi was not merged.
> [1]
> https://lore.kernel.org/all/1553682995-5682-1-git-send-email-dongli.zhang@oracle.com/
>
> This is the earliest discussion.
> https://lore.kernel.org/all/e4afe4c5-0262-4500-aeec-60f30734b4fc@default/
>
> I don't know if there are other circumstances that would cause it to
> fall back into MSI-X with one shared for queues. At least the hack
> method is possible.
> > I wonder if PCIe actually allows this sort of thing.
> Do you think the virtio driver should be modified?
I think the virtio driver should stop messing with the MSI-X
configuration behind the kernel's back. For example, what happens if
the kernel needs to do a disable_irq() on the "shared" interrupt? It
will mask the interrupt in *one* of the vectors, and the interrupt
will still be screaming.
This is terribly broken, even on x86.
> > In any case, this sort of behaviour breaks so many thing in KVM's
> > implementation that I'd recommend you disable GICv4 until we have a
> > good solution for that.
> There seems to be no restriction in the GIC specification that multiple
> host irqs cannot be mapped to the same vlpi. Or maybe I didn't notice.
> Do you think there are any risks?
Please see 5.2.10 ("Restrictions for INTID mapping rules"), which
clearly forbids the case we have here: "Maps multiple EventID-DeviceID
combinations to the same virtual LPI INTID-vPEID.".
> GICv3 does not have this issue, but is this configuration legal?
With GICv3, the ITS doesn't see multiple mappings to the same
LPI. Each DeviceID/EventID pair has its own LPI, and KVM will just see
the injection callback from VFIO.
Honestly, the virtio driver is broken (irrespective of the
architecture), and incompatible with the GIC architecture.
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
2023-12-02 12:20 ` Marc Zyngier
@ 2023-12-04 8:39 ` Jason Wang
2023-12-04 8:47 ` Jason Wang
2023-12-04 8:52 ` Marc Zyngier
0 siblings, 2 replies; 10+ messages in thread
From: Jason Wang @ 2023-12-04 8:39 UTC (permalink / raw)
To: Marc Zyngier
Cc: Kunkun Jiang, dongli.zhang, cohuck, stefanha, mst, Oliver Upton,
James Morse, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
On Sat, Dec 2, 2023 at 8:20 PM Marc Zyngier <maz@kernel.org> wrote:
>
> Hi Kunkun,
>
> On Wed, 08 Nov 2023 09:45:51 +0000,
> Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> >
> > Hi Marc,
> >
> > On 2023/11/6 23:33, Marc Zyngier wrote:
> > > On Mon, 06 Nov 2023 14:59:01 +0000,
> > > Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> > >> The virtio-pci driver write entry1-6
> > >> massage.data in the msix-table and trap to QEMU for processing. The
> > >> massage.data is as follow:
> > >>> entry-0 0
> > >>> entry-1 1
> > >>> entry-2 1
> > >>> entry-3 1
> > >>> entry-4 1
> > >>> entry-5 1
> > >>> entry-6 1
It looks like a bug from the driver. It should only use vector 0 and 1
in this case.
Could you please check the queue_msix_vector for each queue in this case?
> > > Urgh... is vp_modern_queue_vector() used in your configuration? This
> > > is ... terrible.
> > I encountered this problem using the 4.19 version kernel, but not the
> > 5.10 version. This vp_modern_queue_vector() function does not exist
> > in 4.19, but it uses 'vp_iowrite16(msix_vec, &cfg->queue_msix_vector)',
> > the same as vp_modern_queue_vector().
> >
> > In the past two days, I learned about the virtio driver and made some
> > new discoveries. When 'num_queues' is greater than maxcpus, it will
> > fall back into MSI-X with one shared for queues. The two patches[1],
> > submitted by Dongli, limits the number of hw queues used by
> > virtio-blk/virtio-scsi by 'nr_cpu_ids'. The two patches were merged
> > in 5.1-rc2. And the patch related virtio-blk was merged into the 4.19
> > stable branch.The patch related virtio-scsi was not merged.
> > [1]
> > https://lore.kernel.org/all/1553682995-5682-1-git-send-email-dongli.zhang@oracle.com/
> >
> > This is the earliest discussion.
> > https://lore.kernel.org/all/e4afe4c5-0262-4500-aeec-60f30734b4fc@default/
> >
> > I don't know if there are other circumstances that would cause it to
> > fall back into MSI-X with one shared for queues. At least the hack
> > method is possible.
> > > I wonder if PCIe actually allows this sort of thing.
> > Do you think the virtio driver should be modified?
>
> I think the virtio driver should stop messing with the MSI-X
> configuration behind the kernel's back. For example, what happens if
> the kernel needs to do a disable_irq() on the "shared" interrupt? It
> will mask the interrupt in *one* of the vectors, and the interrupt
> will still be screaming.
>
> This is terribly broken, even on x86.
Let's try to see the queue_msix_vector. A correct implemented device
should not try to use a vector more than 1.
>
> > > In any case, this sort of behaviour breaks so many thing in KVM's
> > > implementation that I'd recommend you disable GICv4 until we have a
> > > good solution for that.
> > There seems to be no restriction in the GIC specification that multiple
> > host irqs cannot be mapped to the same vlpi. Or maybe I didn't notice.
> > Do you think there are any risks?
>
> Please see 5.2.10 ("Restrictions for INTID mapping rules"), which
> clearly forbids the case we have here: "Maps multiple EventID-DeviceID
> combinations to the same virtual LPI INTID-vPEID.".
>
> > GICv3 does not have this issue, but is this configuration legal?
>
> With GICv3, the ITS doesn't see multiple mappings to the same
> LPI. Each DeviceID/EventID pair has its own LPI, and KVM will just see
> the injection callback from VFIO.
>
> Honestly, the virtio driver is broken (irrespective of the
> architecture), and incompatible with the GIC architecture.
No matter how the driver is written, the host/KVM should be able to
survive from that.
Thanks
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
2023-12-04 8:39 ` Jason Wang
@ 2023-12-04 8:47 ` Jason Wang
2023-12-04 9:00 ` Marc Zyngier
2023-12-04 8:52 ` Marc Zyngier
1 sibling, 1 reply; 10+ messages in thread
From: Jason Wang @ 2023-12-04 8:47 UTC (permalink / raw)
To: Marc Zyngier
Cc: Kunkun Jiang, dongli.zhang, cohuck, stefanha, mst, Oliver Upton,
James Morse, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
On Mon, Dec 4, 2023 at 4:39 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Sat, Dec 2, 2023 at 8:20 PM Marc Zyngier <maz@kernel.org> wrote:
> >
> > Hi Kunkun,
> >
> > On Wed, 08 Nov 2023 09:45:51 +0000,
> > Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> > >
> > > Hi Marc,
> > >
> > > On 2023/11/6 23:33, Marc Zyngier wrote:
> > > > On Mon, 06 Nov 2023 14:59:01 +0000,
> > > > Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> > > >> The virtio-pci driver write entry1-6
> > > >> massage.data in the msix-table and trap to QEMU for processing. The
> > > >> massage.data is as follow:
> > > >>> entry-0 0
> > > >>> entry-1 1
> > > >>> entry-2 1
> > > >>> entry-3 1
> > > >>> entry-4 1
> > > >>> entry-5 1
> > > >>> entry-6 1
>
> It looks like a bug from the driver. It should only use vector 0 and 1
> in this case.
>
> Could you please check the queue_msix_vector for each queue in this case?
Or did you actually mean queue_msix_vector here? I'm not familiar with
ARM but 0 doesn't seem to be a good message.data anyhow.
Thanks
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
2023-12-04 8:39 ` Jason Wang
2023-12-04 8:47 ` Jason Wang
@ 2023-12-04 8:52 ` Marc Zyngier
1 sibling, 0 replies; 10+ messages in thread
From: Marc Zyngier @ 2023-12-04 8:52 UTC (permalink / raw)
To: Jason Wang
Cc: Kunkun Jiang, dongli.zhang, cohuck, stefanha, mst, Oliver Upton,
James Morse, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
On Mon, 04 Dec 2023 08:39:00 +0000,
Jason Wang <jasowang@redhat.com> wrote:
>
> > Honestly, the virtio driver is broken (irrespective of the
> > architecture), and incompatible with the GIC architecture.
>
> No matter how the driver is written, the host/KVM should be able to
> survive from that.
The host is perfectly fine, thanks for asking. The guest, however, is
in a bad shape.
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
2023-12-04 8:47 ` Jason Wang
@ 2023-12-04 9:00 ` Marc Zyngier
2023-12-04 9:06 ` Jason Wang
0 siblings, 1 reply; 10+ messages in thread
From: Marc Zyngier @ 2023-12-04 9:00 UTC (permalink / raw)
To: Jason Wang
Cc: Kunkun Jiang, dongli.zhang, cohuck, stefanha, mst, Oliver Upton,
James Morse, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
On Mon, 04 Dec 2023 08:47:49 +0000,
Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Dec 4, 2023 at 4:39 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Sat, Dec 2, 2023 at 8:20 PM Marc Zyngier <maz@kernel.org> wrote:
> > >
> > > Hi Kunkun,
> > >
> > > On Wed, 08 Nov 2023 09:45:51 +0000,
> > > Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> > > >
> > > > Hi Marc,
> > > >
> > > > On 2023/11/6 23:33, Marc Zyngier wrote:
> > > > > On Mon, 06 Nov 2023 14:59:01 +0000,
> > > > > Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> > > > >> The virtio-pci driver write entry1-6
> > > > >> massage.data in the msix-table and trap to QEMU for processing. The
> > > > >> massage.data is as follow:
> > > > >>> entry-0 0
> > > > >>> entry-1 1
> > > > >>> entry-2 1
> > > > >>> entry-3 1
> > > > >>> entry-4 1
> > > > >>> entry-5 1
> > > > >>> entry-6 1
> >
> > It looks like a bug from the driver. It should only use vector 0 and 1
> > in this case.
> >
> > Could you please check the queue_msix_vector for each queue in this case?
>
> Or did you actually mean queue_msix_vector here? I'm not familiar with
> ARM but 0 doesn't seem to be a good message.data anyhow.
Why? What's special about 0? 0 is a perfectly fine value.
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI
2023-12-04 9:00 ` Marc Zyngier
@ 2023-12-04 9:06 ` Jason Wang
0 siblings, 0 replies; 10+ messages in thread
From: Jason Wang @ 2023-12-04 9:06 UTC (permalink / raw)
To: Marc Zyngier
Cc: Kunkun Jiang, dongli.zhang, cohuck, stefanha, mst, Oliver Upton,
James Morse, Suzuki K Poulose, Zenghui Yu, Catalin Marinas,
Will Deacon, Gavin Shan, Jean-Philippe Brucker,
open list:IRQCHIP DRIVERS, linux-arm-kernel, kvmarm,
wanghaibin.wang
On Mon, Dec 4, 2023 at 5:00 PM Marc Zyngier <maz@kernel.org> wrote:
>
> On Mon, 04 Dec 2023 08:47:49 +0000,
> Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Dec 4, 2023 at 4:39 PM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Sat, Dec 2, 2023 at 8:20 PM Marc Zyngier <maz@kernel.org> wrote:
> > > >
> > > > Hi Kunkun,
> > > >
> > > > On Wed, 08 Nov 2023 09:45:51 +0000,
> > > > Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> > > > >
> > > > > Hi Marc,
> > > > >
> > > > > On 2023/11/6 23:33, Marc Zyngier wrote:
> > > > > > On Mon, 06 Nov 2023 14:59:01 +0000,
> > > > > > Kunkun Jiang <jiangkunkun@huawei.com> wrote:
> > > > > >> The virtio-pci driver write entry1-6
> > > > > >> massage.data in the msix-table and trap to QEMU for processing. The
> > > > > >> massage.data is as follow:
> > > > > >>> entry-0 0
> > > > > >>> entry-1 1
> > > > > >>> entry-2 1
> > > > > >>> entry-3 1
> > > > > >>> entry-4 1
> > > > > >>> entry-5 1
> > > > > >>> entry-6 1
> > >
> > > It looks like a bug from the driver. It should only use vector 0 and 1
> > > in this case.
> > >
> > > Could you please check the queue_msix_vector for each queue in this case?
> >
> > Or did you actually mean queue_msix_vector here? I'm not familiar with
> > ARM but 0 doesn't seem to be a good message.data anyhow.
>
> Why? What's special about 0? 0 is a perfectly fine value.
Ok, looks like I mis-read it as a message.addr. So it's fine.
Thanks
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2023-12-04 9:07 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-02 14:35 [RFC PATCH] KVM: arm/arm64: GICv4: Support shared VLPI Kunkun Jiang
2023-11-04 10:29 ` Marc Zyngier
[not found] ` <1fb8353e-e9c4-2570-c2ca-ec537c18ac4d@huawei.com>
2023-11-06 15:33 ` Marc Zyngier
2023-11-08 9:45 ` Kunkun Jiang
2023-12-02 12:20 ` Marc Zyngier
2023-12-04 8:39 ` Jason Wang
2023-12-04 8:47 ` Jason Wang
2023-12-04 9:00 ` Marc Zyngier
2023-12-04 9:06 ` Jason Wang
2023-12-04 8:52 ` Marc Zyngier
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).