* [PATCH 01/13] iommu/vt-d: VT-d Posted-Interrupts feature detection
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.
This patch adds feature detection logic for VT-d posted-interrupt.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/iommu/intel_irq_remapping.c | 13 +++++++++++++
drivers/iommu/irq_remapping.c | 4 ++++
drivers/iommu/irq_remapping.h | 5 +++++
include/linux/intel-iommu.h | 1 +
4 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 7c80661..f99f0f1 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -580,6 +580,19 @@ static int __init intel_irq_remapping_supported(void)
if (!ecap_ir_support(iommu->ecap))
return 0;
+ /* VT-d posted-interrupt feature detection*/
+ if (disable_irq_post == 0)
+ for_each_drhd_unit(drhd) {
+ struct intel_iommu *iommu = drhd->iommu;
+
+ if (!cap_pi_support(iommu->cap)) {
+ irq_post_enabled = 0;
+ disable_irq_post = 1;
+ break;
+ }
+ irq_post_enabled = 1;
+ }
+
return 1;
}
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 74a1767..2f8ee00 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -23,6 +23,10 @@ int irq_remap_broken;
int disable_sourceid_checking;
int no_x2apic_optout;
+int disable_irq_post = 1;
+int irq_post_enabled = 0;
+EXPORT_SYMBOL_GPL(irq_post_enabled);
+
static struct irq_remap_ops *remap_ops;
static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index fde250f..7bb5913 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -37,6 +37,9 @@ extern int disable_sourceid_checking;
extern int no_x2apic_optout;
extern int irq_remapping_enabled;
+extern int disable_irq_post;
+extern int irq_post_enabled;
+
struct irq_remap_ops {
/* Check whether Interrupt Remapping is supported */
int (*supported)(void);
@@ -91,6 +94,8 @@ extern struct irq_remap_ops amd_iommu_irq_ops;
#define irq_remapping_enabled 0
#define disable_irq_remap 1
#define irq_remap_broken 0
+#define disable_irq_post 1
+#define irq_post_enabled 0
#endif /* CONFIG_IRQ_REMAP */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index a65208a..5b1a124 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
/*
* Decoding Capability Register
*/
+#define cap_pi_support(c) (((c) >> 59) & 1)
#define cap_read_drain(c) (((c) >> 55) & 1)
#define cap_write_drain(c) (((c) >> 54) & 1)
#define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 01/13] iommu/vt-d: VT-d Posted-Interrupts feature detection
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.
This patch adds feature detection logic for VT-d posted-interrupt.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
drivers/iommu/intel_irq_remapping.c | 13 +++++++++++++
drivers/iommu/irq_remapping.c | 4 ++++
drivers/iommu/irq_remapping.h | 5 +++++
include/linux/intel-iommu.h | 1 +
4 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 7c80661..f99f0f1 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -580,6 +580,19 @@ static int __init intel_irq_remapping_supported(void)
if (!ecap_ir_support(iommu->ecap))
return 0;
+ /* VT-d posted-interrupt feature detection*/
+ if (disable_irq_post == 0)
+ for_each_drhd_unit(drhd) {
+ struct intel_iommu *iommu = drhd->iommu;
+
+ if (!cap_pi_support(iommu->cap)) {
+ irq_post_enabled = 0;
+ disable_irq_post = 1;
+ break;
+ }
+ irq_post_enabled = 1;
+ }
+
return 1;
}
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 74a1767..2f8ee00 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -23,6 +23,10 @@ int irq_remap_broken;
int disable_sourceid_checking;
int no_x2apic_optout;
+int disable_irq_post = 1;
+int irq_post_enabled = 0;
+EXPORT_SYMBOL_GPL(irq_post_enabled);
+
static struct irq_remap_ops *remap_ops;
static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index fde250f..7bb5913 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -37,6 +37,9 @@ extern int disable_sourceid_checking;
extern int no_x2apic_optout;
extern int irq_remapping_enabled;
+extern int disable_irq_post;
+extern int irq_post_enabled;
+
struct irq_remap_ops {
/* Check whether Interrupt Remapping is supported */
int (*supported)(void);
@@ -91,6 +94,8 @@ extern struct irq_remap_ops amd_iommu_irq_ops;
#define irq_remapping_enabled 0
#define disable_irq_remap 1
#define irq_remap_broken 0
+#define disable_irq_post 1
+#define irq_post_enabled 0
#endif /* CONFIG_IRQ_REMAP */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index a65208a..5b1a124 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
/*
* Decoding Capability Register
*/
+#define cap_pi_support(c) (((c) >> 59) & 1)
#define cap_read_drain(c) (((c) >> 55) & 1)
#define cap_write_drain(c) (((c) >> 54) & 1)
#define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread[parent not found: <1415600812-27773-2-git-send-email-feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 01/13] iommu/vt-d: VT-d Posted-Interrupts feature detection
2014-11-10 6:26 ` Feng Wu
@ 2014-11-11 13:38 ` Jiang Liu
-1 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-11 13:38 UTC (permalink / raw)
To: Feng Wu, gleb-DgEjT+Ai2ygdnm+yROfE0A,
pbonzini-H+wXaHxf7aLQT0dZR+AlfA, dwmw2-wEGCiKHe2LqWVfeAwA7xHQ,
joro-zLv9SwRftAIdnm+yROfE0A, tglx-hfZtesqFncYOwBW4kG4KsQ,
mingo-H+wXaHxf7aLQT0dZR+AlfA, hpa-YMNOUZJC4hwAvxtiuMwx3w,
x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
On 2014/11/10 14:26, Feng Wu wrote:
> VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
> With VT-d Posted-Interrupts enabled, external interrupts from
> direct-assigned devices can be delivered to guests without VMM
> intervention when guest is running in non-root mode.
>
> This patch adds feature detection logic for VT-d posted-interrupt.
>
> Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
> drivers/iommu/intel_irq_remapping.c | 13 +++++++++++++
> drivers/iommu/irq_remapping.c | 4 ++++
> drivers/iommu/irq_remapping.h | 5 +++++
> include/linux/intel-iommu.h | 1 +
> 4 files changed, 23 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index 7c80661..f99f0f1 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -580,6 +580,19 @@ static int __init intel_irq_remapping_supported(void)
> if (!ecap_ir_support(iommu->ecap))
> return 0;
>
> + /* VT-d posted-interrupt feature detection*/
> + if (disable_irq_post == 0)
> + for_each_drhd_unit(drhd) {
> + struct intel_iommu *iommu = drhd->iommu;
Hi Feng,
You may use for_each_active_iommu() here.
Regards!
Gerry
> +
> + if (!cap_pi_support(iommu->cap)) {
> + irq_post_enabled = 0;
> + disable_irq_post = 1;
> + break;
> + }
> + irq_post_enabled = 1;
> + }
> +
> return 1;
> }
>
> diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
> index 74a1767..2f8ee00 100644
> --- a/drivers/iommu/irq_remapping.c
> +++ b/drivers/iommu/irq_remapping.c
> @@ -23,6 +23,10 @@ int irq_remap_broken;
> int disable_sourceid_checking;
> int no_x2apic_optout;
>
> +int disable_irq_post = 1;
> +int irq_post_enabled = 0;
> +EXPORT_SYMBOL_GPL(irq_post_enabled);
> +
> static struct irq_remap_ops *remap_ops;
>
> static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
> diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
> index fde250f..7bb5913 100644
> --- a/drivers/iommu/irq_remapping.h
> +++ b/drivers/iommu/irq_remapping.h
> @@ -37,6 +37,9 @@ extern int disable_sourceid_checking;
> extern int no_x2apic_optout;
> extern int irq_remapping_enabled;
>
> +extern int disable_irq_post;
> +extern int irq_post_enabled;
> +
> struct irq_remap_ops {
> /* Check whether Interrupt Remapping is supported */
> int (*supported)(void);
> @@ -91,6 +94,8 @@ extern struct irq_remap_ops amd_iommu_irq_ops;
> #define irq_remapping_enabled 0
> #define disable_irq_remap 1
> #define irq_remap_broken 0
> +#define disable_irq_post 1
> +#define irq_post_enabled 0
>
> #endif /* CONFIG_IRQ_REMAP */
>
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index a65208a..5b1a124 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
> /*
> * Decoding Capability Register
> */
> +#define cap_pi_support(c) (((c) >> 59) & 1)
> #define cap_read_drain(c) (((c) >> 55) & 1)
> #define cap_write_drain(c) (((c) >> 54) & 1)
> #define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
>
^ permalink raw reply [flat|nested] 53+ messages in thread* Re: [PATCH 01/13] iommu/vt-d: VT-d Posted-Interrupts feature detection
@ 2014-11-11 13:38 ` Jiang Liu
0 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-11 13:38 UTC (permalink / raw)
To: Feng Wu, gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel
On 2014/11/10 14:26, Feng Wu wrote:
> VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
> With VT-d Posted-Interrupts enabled, external interrupts from
> direct-assigned devices can be delivered to guests without VMM
> intervention when guest is running in non-root mode.
>
> This patch adds feature detection logic for VT-d posted-interrupt.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> drivers/iommu/intel_irq_remapping.c | 13 +++++++++++++
> drivers/iommu/irq_remapping.c | 4 ++++
> drivers/iommu/irq_remapping.h | 5 +++++
> include/linux/intel-iommu.h | 1 +
> 4 files changed, 23 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index 7c80661..f99f0f1 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -580,6 +580,19 @@ static int __init intel_irq_remapping_supported(void)
> if (!ecap_ir_support(iommu->ecap))
> return 0;
>
> + /* VT-d posted-interrupt feature detection*/
> + if (disable_irq_post == 0)
> + for_each_drhd_unit(drhd) {
> + struct intel_iommu *iommu = drhd->iommu;
Hi Feng,
You may use for_each_active_iommu() here.
Regards!
Gerry
> +
> + if (!cap_pi_support(iommu->cap)) {
> + irq_post_enabled = 0;
> + disable_irq_post = 1;
> + break;
> + }
> + irq_post_enabled = 1;
> + }
> +
> return 1;
> }
>
> diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
> index 74a1767..2f8ee00 100644
> --- a/drivers/iommu/irq_remapping.c
> +++ b/drivers/iommu/irq_remapping.c
> @@ -23,6 +23,10 @@ int irq_remap_broken;
> int disable_sourceid_checking;
> int no_x2apic_optout;
>
> +int disable_irq_post = 1;
> +int irq_post_enabled = 0;
> +EXPORT_SYMBOL_GPL(irq_post_enabled);
> +
> static struct irq_remap_ops *remap_ops;
>
> static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
> diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
> index fde250f..7bb5913 100644
> --- a/drivers/iommu/irq_remapping.h
> +++ b/drivers/iommu/irq_remapping.h
> @@ -37,6 +37,9 @@ extern int disable_sourceid_checking;
> extern int no_x2apic_optout;
> extern int irq_remapping_enabled;
>
> +extern int disable_irq_post;
> +extern int irq_post_enabled;
> +
> struct irq_remap_ops {
> /* Check whether Interrupt Remapping is supported */
> int (*supported)(void);
> @@ -91,6 +94,8 @@ extern struct irq_remap_ops amd_iommu_irq_ops;
> #define irq_remapping_enabled 0
> #define disable_irq_remap 1
> #define irq_remap_broken 0
> +#define disable_irq_post 1
> +#define irq_post_enabled 0
>
> #endif /* CONFIG_IRQ_REMAP */
>
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index a65208a..5b1a124 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
> /*
> * Decoding Capability Register
> */
> +#define cap_pi_support(c) (((c) >> 59) & 1)
> #define cap_read_drain(c) (((c) >> 55) & 1)
> #define cap_write_drain(c) (((c) >> 54) & 1)
> #define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
This patch initialize the VT-d Posted-interrupt Descritpor.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/include/asm/irq_remapping.h | 1 +
arch/x86/kernel/apic/apic.c | 1 +
arch/x86/kvm/vmx.c | 56 ++++++++++++++++++++++++++++++++-
3 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index b7747c4..a3cc437 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
struct irq_chip *chip);
void irq_remap_modify_chip_defaults(struct irq_chip *chip);
+extern int irq_post_enabled;
#else /* CONFIG_IRQ_REMAP */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba6cc04..987408d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
#endif
int x2apic_mode;
+EXPORT_SYMBOL_GPL(x2apic_mode);
#ifdef CONFIG_X86_X2APIC
/* x2apic enabled before OS handover */
int x2apic_preenabled;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c6..a4670d3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
#include <asm/perf_event.h>
#include <asm/debugreg.h>
#include <asm/kexec.h>
+#include <asm/irq_remapping.h>
#include "trace.h"
@@ -408,13 +409,32 @@ struct nested_vmx {
};
#define POSTED_INTR_ON 0
+#define POSTED_INTR_SN 1
+
/* Posted-Interrupt Descriptor */
struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
- u32 control; /* bit 0 of control is outstanding notification bit */
- u32 rsvd[7];
+ union {
+ struct {
+ u64 on : 1,
+ sn : 1,
+ rsvd_1 : 13,
+ ndm : 1,
+ nv : 8,
+ rsvd_2 : 8,
+ ndst : 32;
+ };
+ u64 control;
+ };
+ u32 rsvd[6];
} __aligned(64);
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+ return clear_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
static bool pi_test_and_set_on(struct pi_desc *pi_desc)
{
return test_and_set_bit(POSTED_INTR_ON,
@@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
}
+static bool pi_desc_init(struct vcpu_vmx *vmx)
+{
+ unsigned int dest;
+
+ if (irq_post_enabled == 0)
+ return true;
+
+ /*
+ * Initialize Posted-Interrupt Descriptor
+ */
+
+ pi_clear_sn(&vmx->pi_desc);
+ vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+
+ /* Physical mode for Notificaiton Event */
+ vmx->pi_desc.ndm = 0;
+ dest = cpu_physical_id(vmx->vcpu.cpu);
+
+ if (x2apic_mode)
+ vmx->pi_desc.ndst = dest;
+ else
+ vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
+
+ return true;
+}
+
+
/*
* Sets up the vmcs for emulated real mode.
*/
@@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
+
+ if (!pi_desc_init(vmx)) {
+ printk(KERN_ERR "Initialize PI descriptor error!\n");
+ return 1;
+ }
}
if (ple_gap) {
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
This patch initialize the VT-d Posted-interrupt Descritpor.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/include/asm/irq_remapping.h | 1 +
arch/x86/kernel/apic/apic.c | 1 +
arch/x86/kvm/vmx.c | 56 ++++++++++++++++++++++++++++++++-
3 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index b7747c4..a3cc437 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
struct irq_chip *chip);
void irq_remap_modify_chip_defaults(struct irq_chip *chip);
+extern int irq_post_enabled;
#else /* CONFIG_IRQ_REMAP */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba6cc04..987408d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
#endif
int x2apic_mode;
+EXPORT_SYMBOL_GPL(x2apic_mode);
#ifdef CONFIG_X86_X2APIC
/* x2apic enabled before OS handover */
int x2apic_preenabled;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c6..a4670d3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
#include <asm/perf_event.h>
#include <asm/debugreg.h>
#include <asm/kexec.h>
+#include <asm/irq_remapping.h>
#include "trace.h"
@@ -408,13 +409,32 @@ struct nested_vmx {
};
#define POSTED_INTR_ON 0
+#define POSTED_INTR_SN 1
+
/* Posted-Interrupt Descriptor */
struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
- u32 control; /* bit 0 of control is outstanding notification bit */
- u32 rsvd[7];
+ union {
+ struct {
+ u64 on : 1,
+ sn : 1,
+ rsvd_1 : 13,
+ ndm : 1,
+ nv : 8,
+ rsvd_2 : 8,
+ ndst : 32;
+ };
+ u64 control;
+ };
+ u32 rsvd[6];
} __aligned(64);
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+ return clear_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
static bool pi_test_and_set_on(struct pi_desc *pi_desc)
{
return test_and_set_bit(POSTED_INTR_ON,
@@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
}
+static bool pi_desc_init(struct vcpu_vmx *vmx)
+{
+ unsigned int dest;
+
+ if (irq_post_enabled == 0)
+ return true;
+
+ /*
+ * Initialize Posted-Interrupt Descriptor
+ */
+
+ pi_clear_sn(&vmx->pi_desc);
+ vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+
+ /* Physical mode for Notificaiton Event */
+ vmx->pi_desc.ndm = 0;
+ dest = cpu_physical_id(vmx->vcpu.cpu);
+
+ if (x2apic_mode)
+ vmx->pi_desc.ndst = dest;
+ else
+ vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
+
+ return true;
+}
+
+
/*
* Sets up the vmcs for emulated real mode.
*/
@@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
+
+ if (!pi_desc_init(vmx)) {
+ printk(KERN_ERR "Initialize PI descriptor error!\n");
+ return 1;
+ }
}
if (ple_gap) {
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread[parent not found: <1415600812-27773-3-git-send-email-feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 21:57 ` Alex Williamson
-1 siblings, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2014-11-10 21:57 UTC (permalink / raw)
To: Feng Wu
Cc: kvm-u79uwXL29TY76Z2rM5mHXA, gleb-DgEjT+Ai2ygdnm+yROfE0A,
x86-DgEjT+Ai2ygdnm+yROfE0A, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
mingo-H+wXaHxf7aLQT0dZR+AlfA, hpa-YMNOUZJC4hwAvxtiuMwx3w,
pbonzini-H+wXaHxf7aLQT0dZR+AlfA, tglx-hfZtesqFncYOwBW4kG4KsQ,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ
On Mon, 2014-11-10 at 14:26 +0800, Feng Wu wrote:
> This patch initialize the VT-d Posted-interrupt Descritpor.
>
> Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
> arch/x86/include/asm/irq_remapping.h | 1 +
> arch/x86/kernel/apic/apic.c | 1 +
> arch/x86/kvm/vmx.c | 56 ++++++++++++++++++++++++++++++++-
> 3 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
> index b7747c4..a3cc437 100644
> --- a/arch/x86/include/asm/irq_remapping.h
> +++ b/arch/x86/include/asm/irq_remapping.h
> @@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
> struct irq_chip *chip);
>
> void irq_remap_modify_chip_defaults(struct irq_chip *chip);
> +extern int irq_post_enabled;
>
> #else /* CONFIG_IRQ_REMAP */
>
> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> index ba6cc04..987408d 100644
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
> #endif
>
> int x2apic_mode;
> +EXPORT_SYMBOL_GPL(x2apic_mode);
> #ifdef CONFIG_X86_X2APIC
> /* x2apic enabled before OS handover */
> int x2apic_preenabled;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 3e556c6..a4670d3 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -45,6 +45,7 @@
> #include <asm/perf_event.h>
> #include <asm/debugreg.h>
> #include <asm/kexec.h>
> +#include <asm/irq_remapping.h>
>
> #include "trace.h"
>
> @@ -408,13 +409,32 @@ struct nested_vmx {
> };
>
> #define POSTED_INTR_ON 0
> +#define POSTED_INTR_SN 1
> +
> /* Posted-Interrupt Descriptor */
> struct pi_desc {
> u32 pir[8]; /* Posted interrupt requested */
> - u32 control; /* bit 0 of control is outstanding notification bit */
> - u32 rsvd[7];
> + union {
> + struct {
> + u64 on : 1,
> + sn : 1,
> + rsvd_1 : 13,
> + ndm : 1,
> + nv : 8,
> + rsvd_2 : 8,
> + ndst : 32;
> + };
> + u64 control;
> + };
> + u32 rsvd[6];
> } __aligned(64);
>
> +static void pi_clear_sn(struct pi_desc *pi_desc)
> +{
> + return clear_bit(POSTED_INTR_SN,
> + (unsigned long *)&pi_desc->control);
> +}
> +
> static bool pi_test_and_set_on(struct pi_desc *pi_desc)
> {
> return test_and_set_bit(POSTED_INTR_ON,
> @@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
> kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> }
>
> +static bool pi_desc_init(struct vcpu_vmx *vmx)
> +{
> + unsigned int dest;
> +
> + if (irq_post_enabled == 0)
> + return true;
> +
> + /*
> + * Initialize Posted-Interrupt Descriptor
> + */
> +
> + pi_clear_sn(&vmx->pi_desc);
> + vmx->pi_desc.nv = POSTED_INTR_VECTOR;
> +
> + /* Physical mode for Notificaiton Event */
> + vmx->pi_desc.ndm = 0;
> + dest = cpu_physical_id(vmx->vcpu.cpu);
> +
> + if (x2apic_mode)
> + vmx->pi_desc.ndst = dest;
> + else
> + vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
> +
> + return true;
Why does this bother to return anything since it can only return true?
> +}
> +
> +
> /*
> * Sets up the vmcs for emulated real mode.
> */
> @@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>
> vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
> +
> + if (!pi_desc_init(vmx)) {
And therefore this cannot happen.
> + printk(KERN_ERR "Initialize PI descriptor error!\n");
> + return 1;
This is the wrong error anyway, vmx_create_vcpu() returns ERR_PTR(1)
which fails the reverse IS_ERR()
Thanks,
Alex
> + }
> }
>
> if (ple_gap) {
^ permalink raw reply [flat|nested] 53+ messages in thread* Re: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
@ 2014-11-10 21:57 ` Alex Williamson
0 siblings, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2014-11-10 21:57 UTC (permalink / raw)
To: Feng Wu
Cc: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86, kvm, iommu,
linux-kernel
On Mon, 2014-11-10 at 14:26 +0800, Feng Wu wrote:
> This patch initialize the VT-d Posted-interrupt Descritpor.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> arch/x86/include/asm/irq_remapping.h | 1 +
> arch/x86/kernel/apic/apic.c | 1 +
> arch/x86/kvm/vmx.c | 56 ++++++++++++++++++++++++++++++++-
> 3 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
> index b7747c4..a3cc437 100644
> --- a/arch/x86/include/asm/irq_remapping.h
> +++ b/arch/x86/include/asm/irq_remapping.h
> @@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
> struct irq_chip *chip);
>
> void irq_remap_modify_chip_defaults(struct irq_chip *chip);
> +extern int irq_post_enabled;
>
> #else /* CONFIG_IRQ_REMAP */
>
> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> index ba6cc04..987408d 100644
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
> #endif
>
> int x2apic_mode;
> +EXPORT_SYMBOL_GPL(x2apic_mode);
> #ifdef CONFIG_X86_X2APIC
> /* x2apic enabled before OS handover */
> int x2apic_preenabled;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 3e556c6..a4670d3 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -45,6 +45,7 @@
> #include <asm/perf_event.h>
> #include <asm/debugreg.h>
> #include <asm/kexec.h>
> +#include <asm/irq_remapping.h>
>
> #include "trace.h"
>
> @@ -408,13 +409,32 @@ struct nested_vmx {
> };
>
> #define POSTED_INTR_ON 0
> +#define POSTED_INTR_SN 1
> +
> /* Posted-Interrupt Descriptor */
> struct pi_desc {
> u32 pir[8]; /* Posted interrupt requested */
> - u32 control; /* bit 0 of control is outstanding notification bit */
> - u32 rsvd[7];
> + union {
> + struct {
> + u64 on : 1,
> + sn : 1,
> + rsvd_1 : 13,
> + ndm : 1,
> + nv : 8,
> + rsvd_2 : 8,
> + ndst : 32;
> + };
> + u64 control;
> + };
> + u32 rsvd[6];
> } __aligned(64);
>
> +static void pi_clear_sn(struct pi_desc *pi_desc)
> +{
> + return clear_bit(POSTED_INTR_SN,
> + (unsigned long *)&pi_desc->control);
> +}
> +
> static bool pi_test_and_set_on(struct pi_desc *pi_desc)
> {
> return test_and_set_bit(POSTED_INTR_ON,
> @@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
> kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> }
>
> +static bool pi_desc_init(struct vcpu_vmx *vmx)
> +{
> + unsigned int dest;
> +
> + if (irq_post_enabled == 0)
> + return true;
> +
> + /*
> + * Initialize Posted-Interrupt Descriptor
> + */
> +
> + pi_clear_sn(&vmx->pi_desc);
> + vmx->pi_desc.nv = POSTED_INTR_VECTOR;
> +
> + /* Physical mode for Notificaiton Event */
> + vmx->pi_desc.ndm = 0;
> + dest = cpu_physical_id(vmx->vcpu.cpu);
> +
> + if (x2apic_mode)
> + vmx->pi_desc.ndst = dest;
> + else
> + vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
> +
> + return true;
Why does this bother to return anything since it can only return true?
> +}
> +
> +
> /*
> * Sets up the vmcs for emulated real mode.
> */
> @@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>
> vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
> +
> + if (!pi_desc_init(vmx)) {
And therefore this cannot happen.
> + printk(KERN_ERR "Initialize PI descriptor error!\n");
> + return 1;
This is the wrong error anyway, vmx_create_vcpu() returns ERR_PTR(1)
which fails the reverse IS_ERR()
Thanks,
Alex
> + }
> }
>
> if (ple_gap) {
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
2014-11-10 6:26 ` Feng Wu
@ 2014-11-11 13:35 ` Jiang Liu
-1 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-11 13:35 UTC (permalink / raw)
To: Feng Wu, gleb-DgEjT+Ai2ygdnm+yROfE0A,
pbonzini-H+wXaHxf7aLQT0dZR+AlfA, dwmw2-wEGCiKHe2LqWVfeAwA7xHQ,
joro-zLv9SwRftAIdnm+yROfE0A, tglx-hfZtesqFncYOwBW4kG4KsQ,
mingo-H+wXaHxf7aLQT0dZR+AlfA, hpa-YMNOUZJC4hwAvxtiuMwx3w,
x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
On 2014/11/10 14:26, Feng Wu wrote:
> This patch initialize the VT-d Posted-interrupt Descritpor.
>
> Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
> arch/x86/include/asm/irq_remapping.h | 1 +
> arch/x86/kernel/apic/apic.c | 1 +
> arch/x86/kvm/vmx.c | 56 ++++++++++++++++++++++++++++++++-
> 3 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
> index b7747c4..a3cc437 100644
> --- a/arch/x86/include/asm/irq_remapping.h
> +++ b/arch/x86/include/asm/irq_remapping.h
> @@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
> struct irq_chip *chip);
>
> void irq_remap_modify_chip_defaults(struct irq_chip *chip);
> +extern int irq_post_enabled;
>
> #else /* CONFIG_IRQ_REMAP */
>
> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> index ba6cc04..987408d 100644
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
> #endif
>
> int x2apic_mode;
> +EXPORT_SYMBOL_GPL(x2apic_mode);
> #ifdef CONFIG_X86_X2APIC
> /* x2apic enabled before OS handover */
> int x2apic_preenabled;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 3e556c6..a4670d3 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -45,6 +45,7 @@
> #include <asm/perf_event.h>
> #include <asm/debugreg.h>
> #include <asm/kexec.h>
> +#include <asm/irq_remapping.h>
>
> #include "trace.h"
>
> @@ -408,13 +409,32 @@ struct nested_vmx {
> };
>
> #define POSTED_INTR_ON 0
> +#define POSTED_INTR_SN 1
> +
> /* Posted-Interrupt Descriptor */
> struct pi_desc {
> u32 pir[8]; /* Posted interrupt requested */
> - u32 control; /* bit 0 of control is outstanding notification bit */
> - u32 rsvd[7];
> + union {
> + struct {
> + u64 on : 1,
> + sn : 1,
> + rsvd_1 : 13,
> + ndm : 1,
> + nv : 8,
> + rsvd_2 : 8,
> + ndst : 32;
> + };
> + u64 control;
> + };
> + u32 rsvd[6];
> } __aligned(64);
>
> +static void pi_clear_sn(struct pi_desc *pi_desc)
> +{
> + return clear_bit(POSTED_INTR_SN,
> + (unsigned long *)&pi_desc->control);
> +}
> +
> static bool pi_test_and_set_on(struct pi_desc *pi_desc)
> {
> return test_and_set_bit(POSTED_INTR_ON,
> @@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
> kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> }
>
> +static bool pi_desc_init(struct vcpu_vmx *vmx)
> +{
> + unsigned int dest;
> +
> + if (irq_post_enabled == 0)
> + return true;
> +
> + /*
> + * Initialize Posted-Interrupt Descriptor
> + */
> +
> + pi_clear_sn(&vmx->pi_desc);
> + vmx->pi_desc.nv = POSTED_INTR_VECTOR;
> +
> + /* Physical mode for Notificaiton Event */
> + vmx->pi_desc.ndm = 0;
> + dest = cpu_physical_id(vmx->vcpu.cpu);
> +
> + if (x2apic_mode)
Hi Feng,
Could you try to use x2apic_enabled() here so you don't
need to export x2apic_mode?
Regards!
Gerry
> + vmx->pi_desc.ndst = dest;
> + else
> + vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
> +
> + return true;
> +}
> +
> +
> /*
> * Sets up the vmcs for emulated real mode.
> */
> @@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>
> vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
> +
> + if (!pi_desc_init(vmx)) {
> + printk(KERN_ERR "Initialize PI descriptor error!\n");
> + return 1;
> + }
> }
>
> if (ple_gap) {
>
^ permalink raw reply [flat|nested] 53+ messages in thread* Re: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
@ 2014-11-11 13:35 ` Jiang Liu
0 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-11 13:35 UTC (permalink / raw)
To: Feng Wu, gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel
On 2014/11/10 14:26, Feng Wu wrote:
> This patch initialize the VT-d Posted-interrupt Descritpor.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> arch/x86/include/asm/irq_remapping.h | 1 +
> arch/x86/kernel/apic/apic.c | 1 +
> arch/x86/kvm/vmx.c | 56 ++++++++++++++++++++++++++++++++-
> 3 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
> index b7747c4..a3cc437 100644
> --- a/arch/x86/include/asm/irq_remapping.h
> +++ b/arch/x86/include/asm/irq_remapping.h
> @@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
> struct irq_chip *chip);
>
> void irq_remap_modify_chip_defaults(struct irq_chip *chip);
> +extern int irq_post_enabled;
>
> #else /* CONFIG_IRQ_REMAP */
>
> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> index ba6cc04..987408d 100644
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
> #endif
>
> int x2apic_mode;
> +EXPORT_SYMBOL_GPL(x2apic_mode);
> #ifdef CONFIG_X86_X2APIC
> /* x2apic enabled before OS handover */
> int x2apic_preenabled;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 3e556c6..a4670d3 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -45,6 +45,7 @@
> #include <asm/perf_event.h>
> #include <asm/debugreg.h>
> #include <asm/kexec.h>
> +#include <asm/irq_remapping.h>
>
> #include "trace.h"
>
> @@ -408,13 +409,32 @@ struct nested_vmx {
> };
>
> #define POSTED_INTR_ON 0
> +#define POSTED_INTR_SN 1
> +
> /* Posted-Interrupt Descriptor */
> struct pi_desc {
> u32 pir[8]; /* Posted interrupt requested */
> - u32 control; /* bit 0 of control is outstanding notification bit */
> - u32 rsvd[7];
> + union {
> + struct {
> + u64 on : 1,
> + sn : 1,
> + rsvd_1 : 13,
> + ndm : 1,
> + nv : 8,
> + rsvd_2 : 8,
> + ndst : 32;
> + };
> + u64 control;
> + };
> + u32 rsvd[6];
> } __aligned(64);
>
> +static void pi_clear_sn(struct pi_desc *pi_desc)
> +{
> + return clear_bit(POSTED_INTR_SN,
> + (unsigned long *)&pi_desc->control);
> +}
> +
> static bool pi_test_and_set_on(struct pi_desc *pi_desc)
> {
> return test_and_set_bit(POSTED_INTR_ON,
> @@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
> kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> }
>
> +static bool pi_desc_init(struct vcpu_vmx *vmx)
> +{
> + unsigned int dest;
> +
> + if (irq_post_enabled == 0)
> + return true;
> +
> + /*
> + * Initialize Posted-Interrupt Descriptor
> + */
> +
> + pi_clear_sn(&vmx->pi_desc);
> + vmx->pi_desc.nv = POSTED_INTR_VECTOR;
> +
> + /* Physical mode for Notificaiton Event */
> + vmx->pi_desc.ndm = 0;
> + dest = cpu_physical_id(vmx->vcpu.cpu);
> +
> + if (x2apic_mode)
Hi Feng,
Could you try to use x2apic_enabled() here so you don't
need to export x2apic_mode?
Regards!
Gerry
> + vmx->pi_desc.ndst = dest;
> + else
> + vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
> +
> + return true;
> +}
> +
> +
> /*
> * Sets up the vmcs for emulated real mode.
> */
> @@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>
> vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
> +
> + if (!pi_desc_init(vmx)) {
> + printk(KERN_ERR "Initialize PI descriptor error!\n");
> + return 1;
> + }
> }
>
> if (ple_gap) {
>
^ permalink raw reply [flat|nested] 53+ messages in thread[parent not found: <546210B1.7060007-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>]
* RE: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
2014-11-11 13:35 ` Jiang Liu
@ 2014-11-20 4:53 ` Wu, Feng
-1 siblings, 0 replies; 53+ messages in thread
From: Wu, Feng @ 2014-11-20 4:53 UTC (permalink / raw)
To: Jiang Liu, gleb-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
pbonzini-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org,
joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org,
tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org,
mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org,
x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> -----Original Message-----
> From: Jiang Liu [mailto:jiang.liu-VuQAYsv1563Yd54FQh9/CA@public.gmane.org]
> Sent: Tuesday, November 11, 2014 9:36 PM
> To: Wu, Feng; gleb-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org; pbonzini-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org;
> dwmw2-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org; joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org; tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org;
> mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org; hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org; x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
> Cc: kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org;
> linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Subject: Re: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
>
> On 2014/11/10 14:26, Feng Wu wrote:
> > This patch initialize the VT-d Posted-interrupt Descritpor.
> >
> > Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > ---
> > arch/x86/include/asm/irq_remapping.h | 1 +
> > arch/x86/kernel/apic/apic.c | 1 +
> > arch/x86/kvm/vmx.c | 56
> ++++++++++++++++++++++++++++++++-
> > 3 files changed, 56 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/irq_remapping.h
> b/arch/x86/include/asm/irq_remapping.h
> > index b7747c4..a3cc437 100644
> > --- a/arch/x86/include/asm/irq_remapping.h
> > +++ b/arch/x86/include/asm/irq_remapping.h
> > @@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
> > struct irq_chip *chip);
> >
> > void irq_remap_modify_chip_defaults(struct irq_chip *chip);
> > +extern int irq_post_enabled;
> >
> > #else /* CONFIG_IRQ_REMAP */
> >
> > diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> > index ba6cc04..987408d 100644
> > --- a/arch/x86/kernel/apic/apic.c
> > +++ b/arch/x86/kernel/apic/apic.c
> > @@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
> > #endif
> >
> > int x2apic_mode;
> > +EXPORT_SYMBOL_GPL(x2apic_mode);
> > #ifdef CONFIG_X86_X2APIC
> > /* x2apic enabled before OS handover */
> > int x2apic_preenabled;
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 3e556c6..a4670d3 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -45,6 +45,7 @@
> > #include <asm/perf_event.h>
> > #include <asm/debugreg.h>
> > #include <asm/kexec.h>
> > +#include <asm/irq_remapping.h>
> >
> > #include "trace.h"
> >
> > @@ -408,13 +409,32 @@ struct nested_vmx {
> > };
> >
> > #define POSTED_INTR_ON 0
> > +#define POSTED_INTR_SN 1
> > +
> > /* Posted-Interrupt Descriptor */
> > struct pi_desc {
> > u32 pir[8]; /* Posted interrupt requested */
> > - u32 control; /* bit 0 of control is outstanding notification bit */
> > - u32 rsvd[7];
> > + union {
> > + struct {
> > + u64 on : 1,
> > + sn : 1,
> > + rsvd_1 : 13,
> > + ndm : 1,
> > + nv : 8,
> > + rsvd_2 : 8,
> > + ndst : 32;
> > + };
> > + u64 control;
> > + };
> > + u32 rsvd[6];
> > } __aligned(64);
> >
> > +static void pi_clear_sn(struct pi_desc *pi_desc)
> > +{
> > + return clear_bit(POSTED_INTR_SN,
> > + (unsigned long *)&pi_desc->control);
> > +}
> > +
> > static bool pi_test_and_set_on(struct pi_desc *pi_desc)
> > {
> > return test_and_set_bit(POSTED_INTR_ON,
> > @@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
> > kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> > }
> >
> > +static bool pi_desc_init(struct vcpu_vmx *vmx)
> > +{
> > + unsigned int dest;
> > +
> > + if (irq_post_enabled == 0)
> > + return true;
> > +
> > + /*
> > + * Initialize Posted-Interrupt Descriptor
> > + */
> > +
> > + pi_clear_sn(&vmx->pi_desc);
> > + vmx->pi_desc.nv = POSTED_INTR_VECTOR;
> > +
> > + /* Physical mode for Notificaiton Event */
> > + vmx->pi_desc.ndm = 0;
> > + dest = cpu_physical_id(vmx->vcpu.cpu);
> > +
> > + if (x2apic_mode)
> Hi Feng,
> Could you try to use x2apic_enabled() here so you don't
> need to export x2apic_mode?
> Regards!
> Gerry
In that case, we should also export x2apic_enabled(), right?
Thanks,
Feng
> > + vmx->pi_desc.ndst = dest;
> > + else
> > + vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
> > +
> > + return true;
> > +}
> > +
> > +
> > /*
> > * Sets up the vmcs for emulated real mode.
> > */
> > @@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx
> *vmx)
> >
> > vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> > vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
> > +
> > + if (!pi_desc_init(vmx)) {
> > + printk(KERN_ERR "Initialize PI descriptor error!\n");
> > + return 1;
> > + }
> > }
> >
> > if (ple_gap) {
> >
^ permalink raw reply [flat|nested] 53+ messages in thread* RE: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
@ 2014-11-20 4:53 ` Wu, Feng
0 siblings, 0 replies; 53+ messages in thread
From: Wu, Feng @ 2014-11-20 4:53 UTC (permalink / raw)
To: Jiang Liu, gleb@kernel.org, pbonzini@redhat.com,
dwmw2@infradead.org, joro@8bytes.org, tglx@linutronix.de,
mingo@redhat.com, hpa@zytor.com, x86@kernel.org
Cc: kvm@vger.kernel.org, iommu@lists.linux-foundation.org,
linux-kernel@vger.kernel.org, Wu, Feng
> -----Original Message-----
> From: Jiang Liu [mailto:jiang.liu@linux.intel.com]
> Sent: Tuesday, November 11, 2014 9:36 PM
> To: Wu, Feng; gleb@kernel.org; pbonzini@redhat.com;
> dwmw2@infradead.org; joro@8bytes.org; tglx@linutronix.de;
> mingo@redhat.com; hpa@zytor.com; x86@kernel.org
> Cc: kvm@vger.kernel.org; iommu@lists.linux-foundation.org;
> linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
>
> On 2014/11/10 14:26, Feng Wu wrote:
> > This patch initialize the VT-d Posted-interrupt Descritpor.
> >
> > Signed-off-by: Feng Wu <feng.wu@intel.com>
> > ---
> > arch/x86/include/asm/irq_remapping.h | 1 +
> > arch/x86/kernel/apic/apic.c | 1 +
> > arch/x86/kvm/vmx.c | 56
> ++++++++++++++++++++++++++++++++-
> > 3 files changed, 56 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/irq_remapping.h
> b/arch/x86/include/asm/irq_remapping.h
> > index b7747c4..a3cc437 100644
> > --- a/arch/x86/include/asm/irq_remapping.h
> > +++ b/arch/x86/include/asm/irq_remapping.h
> > @@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
> > struct irq_chip *chip);
> >
> > void irq_remap_modify_chip_defaults(struct irq_chip *chip);
> > +extern int irq_post_enabled;
> >
> > #else /* CONFIG_IRQ_REMAP */
> >
> > diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> > index ba6cc04..987408d 100644
> > --- a/arch/x86/kernel/apic/apic.c
> > +++ b/arch/x86/kernel/apic/apic.c
> > @@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
> > #endif
> >
> > int x2apic_mode;
> > +EXPORT_SYMBOL_GPL(x2apic_mode);
> > #ifdef CONFIG_X86_X2APIC
> > /* x2apic enabled before OS handover */
> > int x2apic_preenabled;
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 3e556c6..a4670d3 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -45,6 +45,7 @@
> > #include <asm/perf_event.h>
> > #include <asm/debugreg.h>
> > #include <asm/kexec.h>
> > +#include <asm/irq_remapping.h>
> >
> > #include "trace.h"
> >
> > @@ -408,13 +409,32 @@ struct nested_vmx {
> > };
> >
> > #define POSTED_INTR_ON 0
> > +#define POSTED_INTR_SN 1
> > +
> > /* Posted-Interrupt Descriptor */
> > struct pi_desc {
> > u32 pir[8]; /* Posted interrupt requested */
> > - u32 control; /* bit 0 of control is outstanding notification bit */
> > - u32 rsvd[7];
> > + union {
> > + struct {
> > + u64 on : 1,
> > + sn : 1,
> > + rsvd_1 : 13,
> > + ndm : 1,
> > + nv : 8,
> > + rsvd_2 : 8,
> > + ndst : 32;
> > + };
> > + u64 control;
> > + };
> > + u32 rsvd[6];
> > } __aligned(64);
> >
> > +static void pi_clear_sn(struct pi_desc *pi_desc)
> > +{
> > + return clear_bit(POSTED_INTR_SN,
> > + (unsigned long *)&pi_desc->control);
> > +}
> > +
> > static bool pi_test_and_set_on(struct pi_desc *pi_desc)
> > {
> > return test_and_set_bit(POSTED_INTR_ON,
> > @@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
> > kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> > }
> >
> > +static bool pi_desc_init(struct vcpu_vmx *vmx)
> > +{
> > + unsigned int dest;
> > +
> > + if (irq_post_enabled == 0)
> > + return true;
> > +
> > + /*
> > + * Initialize Posted-Interrupt Descriptor
> > + */
> > +
> > + pi_clear_sn(&vmx->pi_desc);
> > + vmx->pi_desc.nv = POSTED_INTR_VECTOR;
> > +
> > + /* Physical mode for Notificaiton Event */
> > + vmx->pi_desc.ndm = 0;
> > + dest = cpu_physical_id(vmx->vcpu.cpu);
> > +
> > + if (x2apic_mode)
> Hi Feng,
> Could you try to use x2apic_enabled() here so you don't
> need to export x2apic_mode?
> Regards!
> Gerry
In that case, we should also export x2apic_enabled(), right?
Thanks,
Feng
> > + vmx->pi_desc.ndst = dest;
> > + else
> > + vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
> > +
> > + return true;
> > +}
> > +
> > +
> > /*
> > * Sets up the vmcs for emulated real mode.
> > */
> > @@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx
> *vmx)
> >
> > vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> > vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
> > +
> > + if (!pi_desc_init(vmx)) {
> > + printk(KERN_ERR "Initialize PI descriptor error!\n");
> > + return 1;
> > + }
> > }
> >
> > if (ple_gap) {
> >
^ permalink raw reply [flat|nested] 53+ messages in thread[parent not found: <E959C4978C3B6342920538CF579893F0022A04CD-0J0gbvR4kTg/UvCtAeCM4rfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
2014-11-20 4:53 ` Wu, Feng
@ 2014-11-20 5:00 ` Jiang Liu
-1 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-20 5:00 UTC (permalink / raw)
To: Wu, Feng, gleb-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
pbonzini-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org,
joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org,
tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org,
mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org,
x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
On 2014/11/20 12:53, Wu, Feng wrote:
>
>
>> -----Original Message-----
<snit>
>>> + /*
>>> + * Initialize Posted-Interrupt Descriptor
>>> + */
>>> +
>>> + pi_clear_sn(&vmx->pi_desc);
>>> + vmx->pi_desc.nv = POSTED_INTR_VECTOR;
>>> +
>>> + /* Physical mode for Notificaiton Event */
>>> + vmx->pi_desc.ndm = 0;
>>> + dest = cpu_physical_id(vmx->vcpu.cpu);
>>> +
>>> + if (x2apic_mode)
>> Hi Feng,
>> Could you try to use x2apic_enabled() here so you don't
>> need to export x2apic_mode?
>> Regards!
>> Gerry
>
> In that case, we should also export x2apic_enabled(), right?
Hi Feng,
x2apic_enabled() is a static inline function:)
Regards!
Gerry
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor
@ 2014-11-20 5:00 ` Jiang Liu
0 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-20 5:00 UTC (permalink / raw)
To: Wu, Feng, gleb@kernel.org, pbonzini@redhat.com,
dwmw2@infradead.org, joro@8bytes.org, tglx@linutronix.de,
mingo@redhat.com, hpa@zytor.com, x86@kernel.org
Cc: kvm@vger.kernel.org, iommu@lists.linux-foundation.org,
linux-kernel@vger.kernel.org
On 2014/11/20 12:53, Wu, Feng wrote:
>
>
>> -----Original Message-----
<snit>
>>> + /*
>>> + * Initialize Posted-Interrupt Descriptor
>>> + */
>>> +
>>> + pi_clear_sn(&vmx->pi_desc);
>>> + vmx->pi_desc.nv = POSTED_INTR_VECTOR;
>>> +
>>> + /* Physical mode for Notificaiton Event */
>>> + vmx->pi_desc.ndm = 0;
>>> + dest = cpu_physical_id(vmx->vcpu.cpu);
>>> +
>>> + if (x2apic_mode)
>> Hi Feng,
>> Could you try to use x2apic_enabled() here so you don't
>> need to export x2apic_mode?
>> Regards!
>> Gerry
>
> In that case, we should also export x2apic_enabled(), right?
Hi Feng,
x2apic_enabled() is a static inline function:)
Regards!
Gerry
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 03/13] KVM: Add KVM_CAP_PI to detect VT-d Posted-Interrtups
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
This patch adds KVM_CAP_PI to detect VT-d Posted-Interrtups
feature for QEMU.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/kvm/x86.c | 4 ++++
include/uapi/linux/kvm.h | 1 +
2 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df3..b447a98 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -63,6 +63,7 @@
#include <asm/xcr.h>
#include <asm/pvclock.h>
#include <asm/div64.h>
+#include <asm/irq_remapping.h>
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
@@ -2775,6 +2776,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_TSC_DEADLINE_TIMER:
r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
break;
+ case KVM_CAP_PI:
+ r = irq_post_enabled;
+ break;
default:
r = 0;
break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6076882..7593c52 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -761,6 +761,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_FIXUP_HCALL 103
#define KVM_CAP_PPC_ENABLE_HCALL 104
#define KVM_CAP_CHECK_EXTENSION_VM 105
+#define KVM_CAP_PI 106
#ifdef KVM_CAP_IRQ_ROUTING
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 03/13] KVM: Add KVM_CAP_PI to detect VT-d Posted-Interrtups
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
This patch adds KVM_CAP_PI to detect VT-d Posted-Interrtups
feature for QEMU.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/kvm/x86.c | 4 ++++
include/uapi/linux/kvm.h | 1 +
2 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df3..b447a98 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -63,6 +63,7 @@
#include <asm/xcr.h>
#include <asm/pvclock.h>
#include <asm/div64.h>
+#include <asm/irq_remapping.h>
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
@@ -2775,6 +2776,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_TSC_DEADLINE_TIMER:
r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
break;
+ case KVM_CAP_PI:
+ r = irq_post_enabled;
+ break;
default:
r = 0;
break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6076882..7593c52 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -761,6 +761,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_FIXUP_HCALL 103
#define KVM_CAP_PPC_ENABLE_HCALL 104
#define KVM_CAP_CHECK_EXTENSION_VM 105
+#define KVM_CAP_PI 106
#ifdef KVM_CAP_IRQ_ROUTING
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH 04/13] iommu/vt-d: Adjust 'struct irte' to better suit for VT-d Posted-Interrupts
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
This patch adjusts the definition of 'struct irte', so that we can
add the VT-d Posted-Interrtups format in this structure later.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/iommu/intel_irq_remapping.c | 35 +++++++++++++++++++----------------
include/linux/dmar.h | 4 ++--
2 files changed, 21 insertions(+), 18 deletions(-)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index f99f0f1..776da10 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -310,9 +310,9 @@ static void set_irte_sid(struct irte *irte, unsigned int svt,
{
if (disable_sourceid_checking)
svt = SVT_NO_VERIFY;
- irte->svt = svt;
- irte->sq = sq;
- irte->sid = sid;
+ irte->irq_remap_high.svt = svt;
+ irte->irq_remap_high.sq = sq;
+ irte->irq_remap_high.sid = sid;
}
static int set_ioapic_sid(struct irte *irte, int apic)
@@ -917,8 +917,8 @@ static void prepare_irte(struct irte *irte, int vector,
{
memset(irte, 0, sizeof(*irte));
- irte->present = 1;
- irte->dst_mode = apic->irq_dest_mode;
+ irte->irq_remap_low.present = 1;
+ irte->irq_remap_low.dst_mode = apic->irq_dest_mode;
/*
* Trigger mode in the IRTE will always be edge, and for IO-APIC, the
* actual level or edge trigger will be setup in the IO-APIC
@@ -926,11 +926,11 @@ static void prepare_irte(struct irte *irte, int vector,
* For more details, see the comments (in io_apic.c) explainig IO-APIC
* irq migration in the presence of interrupt-remapping.
*/
- irte->trigger_mode = 0;
- irte->dlvry_mode = apic->irq_delivery_mode;
- irte->vector = vector;
- irte->dest_id = IRTE_DEST(dest);
- irte->redir_hint = 1;
+ irte->irq_remap_low.trigger_mode = 0;
+ irte->irq_remap_low.dlvry_mode = apic->irq_delivery_mode;
+ irte->irq_remap_low.vector = vector;
+ irte->irq_remap_low.dest_id = IRTE_DEST(dest);
+ irte->irq_remap_low.redir_hint = 1;
}
static int intel_setup_ioapic_entry(int irq,
@@ -973,10 +973,13 @@ static int intel_setup_ioapic_entry(int irq,
"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
"Avail:%X Vector:%02X Dest:%08X "
"SID:%04X SQ:%X SVT:%X)\n",
- attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
- irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
- irte.avail, irte.vector, irte.dest_id,
- irte.sid, irte.sq, irte.svt);
+ attr->ioapic, irte.irq_remap_low.present,
+ irte.irq_remap_low.fpd, irte.irq_remap_low.dst_mode,
+ irte.irq_remap_low.redir_hint, irte.irq_remap_low.trigger_mode,
+ irte.irq_remap_low.dlvry_mode, irte.irq_remap_low.avail,
+ irte.irq_remap_low.vector, irte.irq_remap_low.dest_id,
+ irte.irq_remap_high.sid, irte.irq_remap_high.sq,
+ irte.irq_remap_high.svt);
entry = (struct IR_IO_APIC_route_entry *)route_entry;
memset(entry, 0, sizeof(*entry));
@@ -1046,8 +1049,8 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
return err;
}
- irte.vector = cfg->vector;
- irte.dest_id = IRTE_DEST(dest);
+ irte.irq_remap_low.vector = cfg->vector;
+ irte.irq_remap_low.dest_id = IRTE_DEST(dest);
/*
* Atomically updates the IRTE with the new destination, vector
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 593fff9..8be5d42 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -159,7 +159,7 @@ struct irte {
vector : 8,
__reserved_2 : 8,
dest_id : 32;
- };
+ } irq_remap_low;
__u64 low;
};
@@ -169,7 +169,7 @@ struct irte {
sq : 2,
svt : 2,
__reserved_3 : 44;
- };
+ } irq_remap_high;
__u64 high;
};
};
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 04/13] iommu/vt-d: Adjust 'struct irte' to better suit for VT-d Posted-Interrupts
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
This patch adjusts the definition of 'struct irte', so that we can
add the VT-d Posted-Interrtups format in this structure later.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
drivers/iommu/intel_irq_remapping.c | 35 +++++++++++++++++++----------------
include/linux/dmar.h | 4 ++--
2 files changed, 21 insertions(+), 18 deletions(-)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index f99f0f1..776da10 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -310,9 +310,9 @@ static void set_irte_sid(struct irte *irte, unsigned int svt,
{
if (disable_sourceid_checking)
svt = SVT_NO_VERIFY;
- irte->svt = svt;
- irte->sq = sq;
- irte->sid = sid;
+ irte->irq_remap_high.svt = svt;
+ irte->irq_remap_high.sq = sq;
+ irte->irq_remap_high.sid = sid;
}
static int set_ioapic_sid(struct irte *irte, int apic)
@@ -917,8 +917,8 @@ static void prepare_irte(struct irte *irte, int vector,
{
memset(irte, 0, sizeof(*irte));
- irte->present = 1;
- irte->dst_mode = apic->irq_dest_mode;
+ irte->irq_remap_low.present = 1;
+ irte->irq_remap_low.dst_mode = apic->irq_dest_mode;
/*
* Trigger mode in the IRTE will always be edge, and for IO-APIC, the
* actual level or edge trigger will be setup in the IO-APIC
@@ -926,11 +926,11 @@ static void prepare_irte(struct irte *irte, int vector,
* For more details, see the comments (in io_apic.c) explainig IO-APIC
* irq migration in the presence of interrupt-remapping.
*/
- irte->trigger_mode = 0;
- irte->dlvry_mode = apic->irq_delivery_mode;
- irte->vector = vector;
- irte->dest_id = IRTE_DEST(dest);
- irte->redir_hint = 1;
+ irte->irq_remap_low.trigger_mode = 0;
+ irte->irq_remap_low.dlvry_mode = apic->irq_delivery_mode;
+ irte->irq_remap_low.vector = vector;
+ irte->irq_remap_low.dest_id = IRTE_DEST(dest);
+ irte->irq_remap_low.redir_hint = 1;
}
static int intel_setup_ioapic_entry(int irq,
@@ -973,10 +973,13 @@ static int intel_setup_ioapic_entry(int irq,
"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
"Avail:%X Vector:%02X Dest:%08X "
"SID:%04X SQ:%X SVT:%X)\n",
- attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
- irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
- irte.avail, irte.vector, irte.dest_id,
- irte.sid, irte.sq, irte.svt);
+ attr->ioapic, irte.irq_remap_low.present,
+ irte.irq_remap_low.fpd, irte.irq_remap_low.dst_mode,
+ irte.irq_remap_low.redir_hint, irte.irq_remap_low.trigger_mode,
+ irte.irq_remap_low.dlvry_mode, irte.irq_remap_low.avail,
+ irte.irq_remap_low.vector, irte.irq_remap_low.dest_id,
+ irte.irq_remap_high.sid, irte.irq_remap_high.sq,
+ irte.irq_remap_high.svt);
entry = (struct IR_IO_APIC_route_entry *)route_entry;
memset(entry, 0, sizeof(*entry));
@@ -1046,8 +1049,8 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
return err;
}
- irte.vector = cfg->vector;
- irte.dest_id = IRTE_DEST(dest);
+ irte.irq_remap_low.vector = cfg->vector;
+ irte.irq_remap_low.dest_id = IRTE_DEST(dest);
/*
* Atomically updates the IRTE with the new destination, vector
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 593fff9..8be5d42 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -159,7 +159,7 @@ struct irte {
vector : 8,
__reserved_2 : 8,
dest_id : 32;
- };
+ } irq_remap_low;
__u64 low;
};
@@ -169,7 +169,7 @@ struct irte {
sq : 2,
svt : 2,
__reserved_3 : 44;
- };
+ } irq_remap_high;
__u64 high;
};
};
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread[parent not found: <1415600812-27773-5-git-send-email-feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 04/13] iommu/vt-d: Adjust 'struct irte' to better suit for VT-d Posted-Interrupts
2014-11-10 6:26 ` Feng Wu
@ 2014-11-11 13:43 ` Jiang Liu
-1 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-11 13:43 UTC (permalink / raw)
To: Feng Wu, gleb-DgEjT+Ai2ygdnm+yROfE0A,
pbonzini-H+wXaHxf7aLQT0dZR+AlfA, dwmw2-wEGCiKHe2LqWVfeAwA7xHQ,
joro-zLv9SwRftAIdnm+yROfE0A, tglx-hfZtesqFncYOwBW4kG4KsQ,
mingo-H+wXaHxf7aLQT0dZR+AlfA, hpa-YMNOUZJC4hwAvxtiuMwx3w,
x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
Hi Feng,
Other than this solution, how about introducing new
struct irte_pi for posted interrupt?
On 2014/11/10 14:26, Feng Wu wrote:
> This patch adjusts the definition of 'struct irte', so that we can
> add the VT-d Posted-Interrtups format in this structure later.
>
> Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
> drivers/iommu/intel_irq_remapping.c | 35 +++++++++++++++++++----------------
> include/linux/dmar.h | 4 ++--
> 2 files changed, 21 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index f99f0f1..776da10 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -310,9 +310,9 @@ static void set_irte_sid(struct irte *irte, unsigned int svt,
> {
> if (disable_sourceid_checking)
> svt = SVT_NO_VERIFY;
> - irte->svt = svt;
> - irte->sq = sq;
> - irte->sid = sid;
> + irte->irq_remap_high.svt = svt;
> + irte->irq_remap_high.sq = sq;
> + irte->irq_remap_high.sid = sid;
> }
>
> static int set_ioapic_sid(struct irte *irte, int apic)
> @@ -917,8 +917,8 @@ static void prepare_irte(struct irte *irte, int vector,
> {
> memset(irte, 0, sizeof(*irte));
>
> - irte->present = 1;
> - irte->dst_mode = apic->irq_dest_mode;
> + irte->irq_remap_low.present = 1;
> + irte->irq_remap_low.dst_mode = apic->irq_dest_mode;
> /*
> * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
> * actual level or edge trigger will be setup in the IO-APIC
> @@ -926,11 +926,11 @@ static void prepare_irte(struct irte *irte, int vector,
> * For more details, see the comments (in io_apic.c) explainig IO-APIC
> * irq migration in the presence of interrupt-remapping.
> */
> - irte->trigger_mode = 0;
> - irte->dlvry_mode = apic->irq_delivery_mode;
> - irte->vector = vector;
> - irte->dest_id = IRTE_DEST(dest);
> - irte->redir_hint = 1;
> + irte->irq_remap_low.trigger_mode = 0;
> + irte->irq_remap_low.dlvry_mode = apic->irq_delivery_mode;
> + irte->irq_remap_low.vector = vector;
> + irte->irq_remap_low.dest_id = IRTE_DEST(dest);
> + irte->irq_remap_low.redir_hint = 1;
> }
>
> static int intel_setup_ioapic_entry(int irq,
> @@ -973,10 +973,13 @@ static int intel_setup_ioapic_entry(int irq,
> "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
> "Avail:%X Vector:%02X Dest:%08X "
> "SID:%04X SQ:%X SVT:%X)\n",
> - attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
> - irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
> - irte.avail, irte.vector, irte.dest_id,
> - irte.sid, irte.sq, irte.svt);
> + attr->ioapic, irte.irq_remap_low.present,
> + irte.irq_remap_low.fpd, irte.irq_remap_low.dst_mode,
> + irte.irq_remap_low.redir_hint, irte.irq_remap_low.trigger_mode,
> + irte.irq_remap_low.dlvry_mode, irte.irq_remap_low.avail,
> + irte.irq_remap_low.vector, irte.irq_remap_low.dest_id,
> + irte.irq_remap_high.sid, irte.irq_remap_high.sq,
> + irte.irq_remap_high.svt);
>
> entry = (struct IR_IO_APIC_route_entry *)route_entry;
> memset(entry, 0, sizeof(*entry));
> @@ -1046,8 +1049,8 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
> return err;
> }
>
> - irte.vector = cfg->vector;
> - irte.dest_id = IRTE_DEST(dest);
> + irte.irq_remap_low.vector = cfg->vector;
> + irte.irq_remap_low.dest_id = IRTE_DEST(dest);
>
> /*
> * Atomically updates the IRTE with the new destination, vector
> diff --git a/include/linux/dmar.h b/include/linux/dmar.h
> index 593fff9..8be5d42 100644
> --- a/include/linux/dmar.h
> +++ b/include/linux/dmar.h
> @@ -159,7 +159,7 @@ struct irte {
> vector : 8,
> __reserved_2 : 8,
> dest_id : 32;
> - };
> + } irq_remap_low;
> __u64 low;
> };
>
> @@ -169,7 +169,7 @@ struct irte {
> sq : 2,
> svt : 2,
> __reserved_3 : 44;
> - };
> + } irq_remap_high;
> __u64 high;
> };
> };
>
^ permalink raw reply [flat|nested] 53+ messages in thread* Re: [PATCH 04/13] iommu/vt-d: Adjust 'struct irte' to better suit for VT-d Posted-Interrupts
@ 2014-11-11 13:43 ` Jiang Liu
0 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-11 13:43 UTC (permalink / raw)
To: Feng Wu, gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel
Hi Feng,
Other than this solution, how about introducing new
struct irte_pi for posted interrupt?
On 2014/11/10 14:26, Feng Wu wrote:
> This patch adjusts the definition of 'struct irte', so that we can
> add the VT-d Posted-Interrtups format in this structure later.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> drivers/iommu/intel_irq_remapping.c | 35 +++++++++++++++++++----------------
> include/linux/dmar.h | 4 ++--
> 2 files changed, 21 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index f99f0f1..776da10 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -310,9 +310,9 @@ static void set_irte_sid(struct irte *irte, unsigned int svt,
> {
> if (disable_sourceid_checking)
> svt = SVT_NO_VERIFY;
> - irte->svt = svt;
> - irte->sq = sq;
> - irte->sid = sid;
> + irte->irq_remap_high.svt = svt;
> + irte->irq_remap_high.sq = sq;
> + irte->irq_remap_high.sid = sid;
> }
>
> static int set_ioapic_sid(struct irte *irte, int apic)
> @@ -917,8 +917,8 @@ static void prepare_irte(struct irte *irte, int vector,
> {
> memset(irte, 0, sizeof(*irte));
>
> - irte->present = 1;
> - irte->dst_mode = apic->irq_dest_mode;
> + irte->irq_remap_low.present = 1;
> + irte->irq_remap_low.dst_mode = apic->irq_dest_mode;
> /*
> * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
> * actual level or edge trigger will be setup in the IO-APIC
> @@ -926,11 +926,11 @@ static void prepare_irte(struct irte *irte, int vector,
> * For more details, see the comments (in io_apic.c) explainig IO-APIC
> * irq migration in the presence of interrupt-remapping.
> */
> - irte->trigger_mode = 0;
> - irte->dlvry_mode = apic->irq_delivery_mode;
> - irte->vector = vector;
> - irte->dest_id = IRTE_DEST(dest);
> - irte->redir_hint = 1;
> + irte->irq_remap_low.trigger_mode = 0;
> + irte->irq_remap_low.dlvry_mode = apic->irq_delivery_mode;
> + irte->irq_remap_low.vector = vector;
> + irte->irq_remap_low.dest_id = IRTE_DEST(dest);
> + irte->irq_remap_low.redir_hint = 1;
> }
>
> static int intel_setup_ioapic_entry(int irq,
> @@ -973,10 +973,13 @@ static int intel_setup_ioapic_entry(int irq,
> "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
> "Avail:%X Vector:%02X Dest:%08X "
> "SID:%04X SQ:%X SVT:%X)\n",
> - attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
> - irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
> - irte.avail, irte.vector, irte.dest_id,
> - irte.sid, irte.sq, irte.svt);
> + attr->ioapic, irte.irq_remap_low.present,
> + irte.irq_remap_low.fpd, irte.irq_remap_low.dst_mode,
> + irte.irq_remap_low.redir_hint, irte.irq_remap_low.trigger_mode,
> + irte.irq_remap_low.dlvry_mode, irte.irq_remap_low.avail,
> + irte.irq_remap_low.vector, irte.irq_remap_low.dest_id,
> + irte.irq_remap_high.sid, irte.irq_remap_high.sq,
> + irte.irq_remap_high.svt);
>
> entry = (struct IR_IO_APIC_route_entry *)route_entry;
> memset(entry, 0, sizeof(*entry));
> @@ -1046,8 +1049,8 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
> return err;
> }
>
> - irte.vector = cfg->vector;
> - irte.dest_id = IRTE_DEST(dest);
> + irte.irq_remap_low.vector = cfg->vector;
> + irte.irq_remap_low.dest_id = IRTE_DEST(dest);
>
> /*
> * Atomically updates the IRTE with the new destination, vector
> diff --git a/include/linux/dmar.h b/include/linux/dmar.h
> index 593fff9..8be5d42 100644
> --- a/include/linux/dmar.h
> +++ b/include/linux/dmar.h
> @@ -159,7 +159,7 @@ struct irte {
> vector : 8,
> __reserved_2 : 8,
> dest_id : 32;
> - };
> + } irq_remap_low;
> __u64 low;
> };
>
> @@ -169,7 +169,7 @@ struct irte {
> sq : 2,
> svt : 2,
> __reserved_3 : 44;
> - };
> + } irq_remap_high;
> __u64 high;
> };
> };
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 05/13] KVM: Update IRTE according to guest interrupt configuration changes
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
When guest changes its interrupt configuration (such as, vector, etc.)
for direct-assigned devices, we need to update the associated IRTE
with the new guest vector, so external interrupts from the assigned
devices can be injected to guests without VM-Exit.
The current method of handling guest lowest priority interrtups
is to use a counter 'apic_arb_prio' for each VCPU, we choose the
VCPU with smallest 'apic_arb_prio' and then increase it by 1.
However, for VT-d PI, we cannot re-use this, since we no longer
have control to 'apic_arb_prio' with posted interrupt direct
delivery by Hardware.
Here, we introduce a similiar way with 'apic_arb_prio' to handle
guest lowest priority interrtups when VT-d PI is used. Here is the
ideas:
- Each VCPU has a counter 'round_robin_counter'.
- When guests sets an interrupts to lowest priority, we choose
the VCPU with smallest 'round_robin_counter' as the destination,
then increase it.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/include/asm/irq_remapping.h | 6 ++
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/vmx.c | 12 +++
arch/x86/kvm/x86.c | 11 +++
drivers/iommu/amd_iommu.c | 6 ++
drivers/iommu/intel_irq_remapping.c | 28 +++++++
drivers/iommu/irq_remapping.c | 9 ++
drivers/iommu/irq_remapping.h | 3 +
include/linux/dmar.h | 26 ++++++
include/linux/kvm_host.h | 22 +++++
include/uapi/linux/kvm.h | 1 +
virt/kvm/assigned-dev.c | 141 ++++++++++++++++++++++++++++++++++
virt/kvm/irq_comm.c | 4 +-
virt/kvm/irqchip.c | 11 ---
14 files changed, 269 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index a3cc437..32d6cc4 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -51,6 +51,7 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id);
extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
+extern int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector);
extern void panic_if_irq_remap(const char *msg);
extern bool setup_remapped_irq(int irq,
struct irq_cfg *cfg,
@@ -88,6 +89,11 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
return -ENODEV;
}
+static inline int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector)
+{
+ return -ENODEV;
+}
+
static inline void panic_if_irq_remap(const char *msg)
{
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30..0630161 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
struct kvm_lapic *apic; /* kernel irqchip context */
unsigned long apic_attention;
int32_t apic_arb_prio;
+ int32_t round_robin_counter;
int mp_state;
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
@@ -771,6 +772,7 @@ struct kvm_x86_ops {
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+ u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4670d3..ae91b72 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -544,6 +544,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
}
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+ return &(to_vmx(vcpu)->pi_desc);
+}
+
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
@@ -4280,6 +4285,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
return;
}
+static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
+{
+ return __pa((u64)vcpu_to_pi_desc(vcpu));
+}
+
/*
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
* will not change in the lifetime of the guest.
@@ -9232,6 +9242,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_nested_events = vmx_check_nested_events,
.sched_in = vmx_sched_in,
+
+ .get_pi_desc_addr = vmx_get_pi_desc_addr,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b447a98..0c19d15 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7735,6 +7735,17 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
+int kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ u32 guest_vector, int host_irq)
+{
+ u64 pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
+
+ if (update_pi_irte(host_irq, pi_desc_addr, guest_vector))
+ return -1;
+
+ return 0;
+}
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 505a9ad..a36fdc7 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4280,6 +4280,11 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int id)
return 0;
}
+static int dummy_update_pi_irte(int irq, u64 pi_desc_addr, u32 vector)
+{
+ return -EINVAL;
+}
+
struct irq_remap_ops amd_iommu_irq_ops = {
.supported = amd_iommu_supported,
.prepare = amd_iommu_prepare,
@@ -4294,5 +4299,6 @@ struct irq_remap_ops amd_iommu_irq_ops = {
.msi_alloc_irq = msi_alloc_irq,
.msi_setup_irq = msi_setup_irq,
.alloc_hpet_msi = alloc_hpet_msi,
+ .update_pi_irte = dummy_update_pi_irte,
};
#endif
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 776da10..87c02fe 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1172,6 +1172,33 @@ static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
return ret;
}
+static int intel_update_pi_irte(int irq, u64 pi_desc_addr, u32 vector)
+{
+ struct irte irte;
+
+ if (get_irte(irq, &irte))
+ return -1;
+
+ irte.irq_post_low.urg = 0;
+ irte.irq_post_low.vector = vector;
+ irte.irq_post_low.pda_l = (pi_desc_addr >> (32 - PDA_LOW_BIT)) &
+ ~(-1UL << PDA_LOW_BIT);
+ irte.irq_post_high.pda_h = (pi_desc_addr >> 32) &
+ ~(-1UL << PDA_HIGH_BIT);
+
+ irte.irq_post_low.__reserved_1 = 0;
+ irte.irq_post_low.__reserved_2 = 0;
+ irte.irq_post_low.__reserved_3 = 0;
+ irte.irq_post_high.__reserved_4 = 0;
+
+ irte.irq_post_low.pst = 1;
+
+ if (modify_irte(irq, &irte))
+ return -1;
+
+ return 0;
+}
+
struct irq_remap_ops intel_irq_remap_ops = {
.supported = intel_irq_remapping_supported,
.prepare = dmar_table_init,
@@ -1186,4 +1213,5 @@ struct irq_remap_ops intel_irq_remap_ops = {
.msi_alloc_irq = intel_msi_alloc_irq,
.msi_setup_irq = intel_msi_setup_irq,
.alloc_hpet_msi = intel_alloc_hpet_msi,
+ .update_pi_irte = intel_update_pi_irte,
};
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2f8ee00..0e36860 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -362,6 +362,15 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
return default_setup_hpet_msi(irq, id);
}
+int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector)
+{
+ if (!remap_ops || !remap_ops->update_pi_irte)
+ return -ENODEV;
+
+ return remap_ops->update_pi_irte(irq, pi_desc_addr, vector);
+}
+EXPORT_SYMBOL_GPL(update_pi_irte);
+
void panic_if_irq_remap(const char *msg)
{
if (irq_remapping_enabled)
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 7bb5913..2d8f740 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -84,6 +84,9 @@ struct irq_remap_ops {
/* Setup interrupt remapping for an HPET MSI */
int (*alloc_hpet_msi)(unsigned int, unsigned int);
+
+ /* Update IRTE for posted-interrupt */
+ int (*update_pi_irte)(int irq, u64 pi_desc_addr, u32 vector);
};
extern struct irq_remap_ops intel_irq_remap_ops;
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 8be5d42..e1ff4f7 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -160,6 +160,20 @@ struct irte {
__reserved_2 : 8,
dest_id : 32;
} irq_remap_low;
+
+ struct {
+ __u64 present : 1,
+ fpd : 1,
+ __reserved_1 : 6,
+ avail : 4,
+ __reserved_2 : 2,
+ urg : 1,
+ pst : 1,
+ vector : 8,
+ __reserved_3 : 14,
+ pda_l : 26;
+ } irq_post_low;
+
__u64 low;
};
@@ -170,10 +184,22 @@ struct irte {
svt : 2,
__reserved_3 : 44;
} irq_remap_high;
+
+ struct {
+ __u64 sid: 16,
+ sq: 2,
+ svt: 2,
+ __reserved_4: 12,
+ pda_h: 32;
+ } irq_post_high;
+
__u64 high;
};
};
+#define PDA_LOW_BIT 26
+#define PDA_HIGH_BIT 32
+
enum {
IRQ_REMAP_XAPIC_MODE,
IRQ_REMAP_X2APIC_MODE,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea53b04..6bb8287 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -335,6 +335,25 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
};
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+ int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+ struct kvm_kernel_irq_routing_entry *rt_entries;
+ u32 nr_rt_entries;
+ /*
+ * Array indexed by gsi. Each entry contains list of irq chips
+ * the gsi is connected to.
+ */
+ struct hlist_head map[0];
+};
+
+#else
+
+struct kvm_irq_routing_table {};
+
+#endif
+
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#endif
@@ -766,6 +785,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm_lapic_irq *irq);
+bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq);
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7593c52..509223a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1027,6 +1027,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
+#define KVM_ASSIGN_DEV_PI_UPDATE _IOR(KVMIO, 0x7d, __u32)
/* Available with KVM_CAP_PIT_STATE2 */
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index e05000e..e154009 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -326,6 +326,135 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
}
}
+int __weak kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ u32 guest_vector, int host_irq)
+{
+ return 0;
+}
+
+int kvm_compare_rr_counter(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
+{
+ return vcpu1->arch.round_robin_counter -
+ vcpu2->arch.round_robin_counter;
+}
+
+bool kvm_pi_find_dest_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ struct kvm_vcpu **dest_vcpu)
+{
+ int i, r = 0;
+ struct kvm_vcpu *vcpu, *dest = NULL;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_apic_present(vcpu))
+ continue;
+
+ if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+ irq->dest_id, irq->dest_mode))
+ continue;
+
+ if (!kvm_is_dm_lowest_prio(irq)) {
+ r++;
+ *dest_vcpu = vcpu;
+ } else if (kvm_lapic_enabled(vcpu)) {
+ if (!dest)
+ dest = vcpu;
+ else if (kvm_compare_rr_counter(vcpu, dest) < 0)
+ dest = vcpu;
+ }
+ }
+
+ if (dest) {
+ dest->arch.round_robin_counter++;
+ *dest_vcpu = dest;
+ return true;
+ } else if (r == 1)
+ return true;
+
+ return false;
+}
+
+static int __kvm_update_pi_irte(struct kvm *kvm, int host_irq, int guest_irq)
+{
+ struct kvm_kernel_irq_routing_entry *e;
+ struct kvm_irq_routing_table *irq_rt;
+ struct kvm_lapic_irq irq;
+ struct kvm_vcpu *vcpu;
+ int idx, ret = -EINVAL;
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+ ASSERT(guest_irq < irq_rt->nr_rt_entries);
+
+ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+ if (e->type != KVM_IRQ_ROUTING_MSI)
+ continue;
+ /*
+ * VT-d posted-interrupt has the following
+ * limitations:
+ * - No support for posting multicast/broadcast
+ * interrupts to a VCPU
+ * Still use interrupt remapping for these
+ * kind of interrupts
+ */
+
+ kvm_set_msi_irq(e, &irq);
+ if (!kvm_pi_find_dest_vcpu(kvm, &irq, &vcpu)) {
+ printk(KERN_INFO "%s: can not find the target VCPU\n",
+ __func__);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (kvm_update_pi_irte_common(kvm, vcpu, irq.vector,
+ host_irq)) {
+ printk(KERN_INFO "%s: failed to update PI IRTE\n",
+ __func__);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return ret;
+}
+
+int kvm_update_pi_irte(struct kvm *kvm, u32 dev_id)
+{
+ int i, rc = -1;
+ struct kvm_assigned_dev_kernel *dev;
+
+ mutex_lock(&kvm->lock);
+ dev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, dev_id);
+ if (!dev) {
+ printk(KERN_INFO "%s: cannot find the assigned dev.\n",
+ __func__);
+ rc = -1;
+ goto out;
+ }
+
+ BUG_ON(dev->irq_requested_type == 0);
+
+ if ((dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) &&
+ (dev->dev->msi_enabled == 1)) {
+ __kvm_update_pi_irte(kvm,
+ dev->host_irq, dev->guest_irq);
+ } else if ((dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) &&
+ (dev->dev->msix_enabled == 1)) {
+ for (i = 0; i < dev->entries_nr; i++) {
+ __kvm_update_pi_irte(kvm,
+ dev->host_msix_entries[i].vector,
+ dev->guest_msix_entries[i].vector);
+ }
+ }
+
+out:
+ rc = 0;
+ mutex_unlock(&kvm->lock);
+ return rc;
+}
+
static int assigned_device_enable_host_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev)
{
@@ -1017,6 +1146,18 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
break;
}
+ case KVM_ASSIGN_DEV_PI_UPDATE: {
+ u32 dev_id;
+
+ r = -EFAULT;
+ if (copy_from_user(&dev_id, argp, sizeof(dev_id)))
+ goto out;
+ r = kvm_update_pi_irte(kvm, dev_id);
+ if (r)
+ goto out;
+ break;
+
+ }
default:
r = -ENOTTY;
break;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 963b899..f51aed3 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -55,7 +55,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
line_status);
}
-inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
+bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
{
#ifdef CONFIG_IA64
return irq->delivery_mode ==
@@ -106,7 +106,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
return r;
}
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq)
{
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 7f256f3..cdf29a6 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,17 +31,6 @@
#include <trace/events/kvm.h>
#include "irq.h"
-struct kvm_irq_routing_table {
- int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
- struct kvm_kernel_irq_routing_entry *rt_entries;
- u32 nr_rt_entries;
- /*
- * Array indexed by gsi. Each entry contains list of irq chips
- * the gsi is connected to.
- */
- struct hlist_head map[0];
-};
-
int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
{
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 05/13] KVM: Update IRTE according to guest interrupt configuration changes
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
When guest changes its interrupt configuration (such as, vector, etc.)
for direct-assigned devices, we need to update the associated IRTE
with the new guest vector, so external interrupts from the assigned
devices can be injected to guests without VM-Exit.
The current method of handling guest lowest priority interrtups
is to use a counter 'apic_arb_prio' for each VCPU, we choose the
VCPU with smallest 'apic_arb_prio' and then increase it by 1.
However, for VT-d PI, we cannot re-use this, since we no longer
have control to 'apic_arb_prio' with posted interrupt direct
delivery by Hardware.
Here, we introduce a similiar way with 'apic_arb_prio' to handle
guest lowest priority interrtups when VT-d PI is used. Here is the
ideas:
- Each VCPU has a counter 'round_robin_counter'.
- When guests sets an interrupts to lowest priority, we choose
the VCPU with smallest 'round_robin_counter' as the destination,
then increase it.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/include/asm/irq_remapping.h | 6 ++
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/vmx.c | 12 +++
arch/x86/kvm/x86.c | 11 +++
drivers/iommu/amd_iommu.c | 6 ++
drivers/iommu/intel_irq_remapping.c | 28 +++++++
drivers/iommu/irq_remapping.c | 9 ++
drivers/iommu/irq_remapping.h | 3 +
include/linux/dmar.h | 26 ++++++
include/linux/kvm_host.h | 22 +++++
include/uapi/linux/kvm.h | 1 +
virt/kvm/assigned-dev.c | 141 ++++++++++++++++++++++++++++++++++
virt/kvm/irq_comm.c | 4 +-
virt/kvm/irqchip.c | 11 ---
14 files changed, 269 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index a3cc437..32d6cc4 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -51,6 +51,7 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id);
extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
+extern int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector);
extern void panic_if_irq_remap(const char *msg);
extern bool setup_remapped_irq(int irq,
struct irq_cfg *cfg,
@@ -88,6 +89,11 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
return -ENODEV;
}
+static inline int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector)
+{
+ return -ENODEV;
+}
+
static inline void panic_if_irq_remap(const char *msg)
{
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30..0630161 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
struct kvm_lapic *apic; /* kernel irqchip context */
unsigned long apic_attention;
int32_t apic_arb_prio;
+ int32_t round_robin_counter;
int mp_state;
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
@@ -771,6 +772,7 @@ struct kvm_x86_ops {
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+ u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4670d3..ae91b72 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -544,6 +544,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
}
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+ return &(to_vmx(vcpu)->pi_desc);
+}
+
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
@@ -4280,6 +4285,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
return;
}
+static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
+{
+ return __pa((u64)vcpu_to_pi_desc(vcpu));
+}
+
/*
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
* will not change in the lifetime of the guest.
@@ -9232,6 +9242,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_nested_events = vmx_check_nested_events,
.sched_in = vmx_sched_in,
+
+ .get_pi_desc_addr = vmx_get_pi_desc_addr,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b447a98..0c19d15 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7735,6 +7735,17 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
+int kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ u32 guest_vector, int host_irq)
+{
+ u64 pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
+
+ if (update_pi_irte(host_irq, pi_desc_addr, guest_vector))
+ return -1;
+
+ return 0;
+}
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 505a9ad..a36fdc7 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4280,6 +4280,11 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int id)
return 0;
}
+static int dummy_update_pi_irte(int irq, u64 pi_desc_addr, u32 vector)
+{
+ return -EINVAL;
+}
+
struct irq_remap_ops amd_iommu_irq_ops = {
.supported = amd_iommu_supported,
.prepare = amd_iommu_prepare,
@@ -4294,5 +4299,6 @@ struct irq_remap_ops amd_iommu_irq_ops = {
.msi_alloc_irq = msi_alloc_irq,
.msi_setup_irq = msi_setup_irq,
.alloc_hpet_msi = alloc_hpet_msi,
+ .update_pi_irte = dummy_update_pi_irte,
};
#endif
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 776da10..87c02fe 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1172,6 +1172,33 @@ static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
return ret;
}
+static int intel_update_pi_irte(int irq, u64 pi_desc_addr, u32 vector)
+{
+ struct irte irte;
+
+ if (get_irte(irq, &irte))
+ return -1;
+
+ irte.irq_post_low.urg = 0;
+ irte.irq_post_low.vector = vector;
+ irte.irq_post_low.pda_l = (pi_desc_addr >> (32 - PDA_LOW_BIT)) &
+ ~(-1UL << PDA_LOW_BIT);
+ irte.irq_post_high.pda_h = (pi_desc_addr >> 32) &
+ ~(-1UL << PDA_HIGH_BIT);
+
+ irte.irq_post_low.__reserved_1 = 0;
+ irte.irq_post_low.__reserved_2 = 0;
+ irte.irq_post_low.__reserved_3 = 0;
+ irte.irq_post_high.__reserved_4 = 0;
+
+ irte.irq_post_low.pst = 1;
+
+ if (modify_irte(irq, &irte))
+ return -1;
+
+ return 0;
+}
+
struct irq_remap_ops intel_irq_remap_ops = {
.supported = intel_irq_remapping_supported,
.prepare = dmar_table_init,
@@ -1186,4 +1213,5 @@ struct irq_remap_ops intel_irq_remap_ops = {
.msi_alloc_irq = intel_msi_alloc_irq,
.msi_setup_irq = intel_msi_setup_irq,
.alloc_hpet_msi = intel_alloc_hpet_msi,
+ .update_pi_irte = intel_update_pi_irte,
};
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2f8ee00..0e36860 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -362,6 +362,15 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
return default_setup_hpet_msi(irq, id);
}
+int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector)
+{
+ if (!remap_ops || !remap_ops->update_pi_irte)
+ return -ENODEV;
+
+ return remap_ops->update_pi_irte(irq, pi_desc_addr, vector);
+}
+EXPORT_SYMBOL_GPL(update_pi_irte);
+
void panic_if_irq_remap(const char *msg)
{
if (irq_remapping_enabled)
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 7bb5913..2d8f740 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -84,6 +84,9 @@ struct irq_remap_ops {
/* Setup interrupt remapping for an HPET MSI */
int (*alloc_hpet_msi)(unsigned int, unsigned int);
+
+ /* Update IRTE for posted-interrupt */
+ int (*update_pi_irte)(int irq, u64 pi_desc_addr, u32 vector);
};
extern struct irq_remap_ops intel_irq_remap_ops;
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 8be5d42..e1ff4f7 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -160,6 +160,20 @@ struct irte {
__reserved_2 : 8,
dest_id : 32;
} irq_remap_low;
+
+ struct {
+ __u64 present : 1,
+ fpd : 1,
+ __reserved_1 : 6,
+ avail : 4,
+ __reserved_2 : 2,
+ urg : 1,
+ pst : 1,
+ vector : 8,
+ __reserved_3 : 14,
+ pda_l : 26;
+ } irq_post_low;
+
__u64 low;
};
@@ -170,10 +184,22 @@ struct irte {
svt : 2,
__reserved_3 : 44;
} irq_remap_high;
+
+ struct {
+ __u64 sid: 16,
+ sq: 2,
+ svt: 2,
+ __reserved_4: 12,
+ pda_h: 32;
+ } irq_post_high;
+
__u64 high;
};
};
+#define PDA_LOW_BIT 26
+#define PDA_HIGH_BIT 32
+
enum {
IRQ_REMAP_XAPIC_MODE,
IRQ_REMAP_X2APIC_MODE,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea53b04..6bb8287 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -335,6 +335,25 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
};
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+ int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+ struct kvm_kernel_irq_routing_entry *rt_entries;
+ u32 nr_rt_entries;
+ /*
+ * Array indexed by gsi. Each entry contains list of irq chips
+ * the gsi is connected to.
+ */
+ struct hlist_head map[0];
+};
+
+#else
+
+struct kvm_irq_routing_table {};
+
+#endif
+
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#endif
@@ -766,6 +785,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm_lapic_irq *irq);
+bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq);
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7593c52..509223a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1027,6 +1027,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
+#define KVM_ASSIGN_DEV_PI_UPDATE _IOR(KVMIO, 0x7d, __u32)
/* Available with KVM_CAP_PIT_STATE2 */
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index e05000e..e154009 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -326,6 +326,135 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
}
}
+int __weak kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ u32 guest_vector, int host_irq)
+{
+ return 0;
+}
+
+int kvm_compare_rr_counter(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
+{
+ return vcpu1->arch.round_robin_counter -
+ vcpu2->arch.round_robin_counter;
+}
+
+bool kvm_pi_find_dest_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ struct kvm_vcpu **dest_vcpu)
+{
+ int i, r = 0;
+ struct kvm_vcpu *vcpu, *dest = NULL;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_apic_present(vcpu))
+ continue;
+
+ if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+ irq->dest_id, irq->dest_mode))
+ continue;
+
+ if (!kvm_is_dm_lowest_prio(irq)) {
+ r++;
+ *dest_vcpu = vcpu;
+ } else if (kvm_lapic_enabled(vcpu)) {
+ if (!dest)
+ dest = vcpu;
+ else if (kvm_compare_rr_counter(vcpu, dest) < 0)
+ dest = vcpu;
+ }
+ }
+
+ if (dest) {
+ dest->arch.round_robin_counter++;
+ *dest_vcpu = dest;
+ return true;
+ } else if (r == 1)
+ return true;
+
+ return false;
+}
+
+static int __kvm_update_pi_irte(struct kvm *kvm, int host_irq, int guest_irq)
+{
+ struct kvm_kernel_irq_routing_entry *e;
+ struct kvm_irq_routing_table *irq_rt;
+ struct kvm_lapic_irq irq;
+ struct kvm_vcpu *vcpu;
+ int idx, ret = -EINVAL;
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+ ASSERT(guest_irq < irq_rt->nr_rt_entries);
+
+ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+ if (e->type != KVM_IRQ_ROUTING_MSI)
+ continue;
+ /*
+ * VT-d posted-interrupt has the following
+ * limitations:
+ * - No support for posting multicast/broadcast
+ * interrupts to a VCPU
+ * Still use interrupt remapping for these
+ * kind of interrupts
+ */
+
+ kvm_set_msi_irq(e, &irq);
+ if (!kvm_pi_find_dest_vcpu(kvm, &irq, &vcpu)) {
+ printk(KERN_INFO "%s: can not find the target VCPU\n",
+ __func__);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (kvm_update_pi_irte_common(kvm, vcpu, irq.vector,
+ host_irq)) {
+ printk(KERN_INFO "%s: failed to update PI IRTE\n",
+ __func__);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return ret;
+}
+
+int kvm_update_pi_irte(struct kvm *kvm, u32 dev_id)
+{
+ int i, rc = -1;
+ struct kvm_assigned_dev_kernel *dev;
+
+ mutex_lock(&kvm->lock);
+ dev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, dev_id);
+ if (!dev) {
+ printk(KERN_INFO "%s: cannot find the assigned dev.\n",
+ __func__);
+ rc = -1;
+ goto out;
+ }
+
+ BUG_ON(dev->irq_requested_type == 0);
+
+ if ((dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) &&
+ (dev->dev->msi_enabled == 1)) {
+ __kvm_update_pi_irte(kvm,
+ dev->host_irq, dev->guest_irq);
+ } else if ((dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) &&
+ (dev->dev->msix_enabled == 1)) {
+ for (i = 0; i < dev->entries_nr; i++) {
+ __kvm_update_pi_irte(kvm,
+ dev->host_msix_entries[i].vector,
+ dev->guest_msix_entries[i].vector);
+ }
+ }
+
+out:
+ rc = 0;
+ mutex_unlock(&kvm->lock);
+ return rc;
+}
+
static int assigned_device_enable_host_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev)
{
@@ -1017,6 +1146,18 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
break;
}
+ case KVM_ASSIGN_DEV_PI_UPDATE: {
+ u32 dev_id;
+
+ r = -EFAULT;
+ if (copy_from_user(&dev_id, argp, sizeof(dev_id)))
+ goto out;
+ r = kvm_update_pi_irte(kvm, dev_id);
+ if (r)
+ goto out;
+ break;
+
+ }
default:
r = -ENOTTY;
break;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 963b899..f51aed3 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -55,7 +55,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
line_status);
}
-inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
+bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
{
#ifdef CONFIG_IA64
return irq->delivery_mode ==
@@ -106,7 +106,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
return r;
}
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq)
{
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 7f256f3..cdf29a6 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,17 +31,6 @@
#include <trace/events/kvm.h>
#include "irq.h"
-struct kvm_irq_routing_table {
- int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
- struct kvm_kernel_irq_routing_entry *rt_entries;
- u32 nr_rt_entries;
- /*
- * Array indexed by gsi. Each entry contains list of irq chips
- * the gsi is connected to.
- */
- struct hlist_head map[0];
-};
-
int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
{
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread[parent not found: <1415600812-27773-6-git-send-email-feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 05/13] KVM: Update IRTE according to guest interrupt configuration changes
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 21:57 ` Alex Williamson
-1 siblings, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2014-11-10 21:57 UTC (permalink / raw)
To: Feng Wu
Cc: kvm-u79uwXL29TY76Z2rM5mHXA, gleb-DgEjT+Ai2ygdnm+yROfE0A,
x86-DgEjT+Ai2ygdnm+yROfE0A, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
mingo-H+wXaHxf7aLQT0dZR+AlfA, hpa-YMNOUZJC4hwAvxtiuMwx3w,
pbonzini-H+wXaHxf7aLQT0dZR+AlfA, tglx-hfZtesqFncYOwBW4kG4KsQ,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ
On Mon, 2014-11-10 at 14:26 +0800, Feng Wu wrote:
> When guest changes its interrupt configuration (such as, vector, etc.)
> for direct-assigned devices, we need to update the associated IRTE
> with the new guest vector, so external interrupts from the assigned
> devices can be injected to guests without VM-Exit.
>
> The current method of handling guest lowest priority interrtups
> is to use a counter 'apic_arb_prio' for each VCPU, we choose the
> VCPU with smallest 'apic_arb_prio' and then increase it by 1.
> However, for VT-d PI, we cannot re-use this, since we no longer
> have control to 'apic_arb_prio' with posted interrupt direct
> delivery by Hardware.
>
> Here, we introduce a similiar way with 'apic_arb_prio' to handle
> guest lowest priority interrtups when VT-d PI is used. Here is the
> ideas:
> - Each VCPU has a counter 'round_robin_counter'.
> - When guests sets an interrupts to lowest priority, we choose
> the VCPU with smallest 'round_robin_counter' as the destination,
> then increase it.
>
> Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
> arch/x86/include/asm/irq_remapping.h | 6 ++
> arch/x86/include/asm/kvm_host.h | 2 +
> arch/x86/kvm/vmx.c | 12 +++
> arch/x86/kvm/x86.c | 11 +++
> drivers/iommu/amd_iommu.c | 6 ++
> drivers/iommu/intel_irq_remapping.c | 28 +++++++
> drivers/iommu/irq_remapping.c | 9 ++
> drivers/iommu/irq_remapping.h | 3 +
> include/linux/dmar.h | 26 ++++++
> include/linux/kvm_host.h | 22 +++++
> include/uapi/linux/kvm.h | 1 +
> virt/kvm/assigned-dev.c | 141 ++++++++++++++++++++++++++++++++++
> virt/kvm/irq_comm.c | 4 +-
> virt/kvm/irqchip.c | 11 ---
> 14 files changed, 269 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
> index a3cc437..32d6cc4 100644
> --- a/arch/x86/include/asm/irq_remapping.h
> +++ b/arch/x86/include/asm/irq_remapping.h
> @@ -51,6 +51,7 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev,
> unsigned int irq, unsigned int dest,
> struct msi_msg *msg, u8 hpet_id);
> extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
> +extern int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector);
> extern void panic_if_irq_remap(const char *msg);
> extern bool setup_remapped_irq(int irq,
> struct irq_cfg *cfg,
> @@ -88,6 +89,11 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
> return -ENODEV;
> }
>
> +static inline int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector)
> +{
> + return -ENODEV;
> +}
> +
> static inline void panic_if_irq_remap(const char *msg)
> {
> }
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6ed0c30..0630161 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
> struct kvm_lapic *apic; /* kernel irqchip context */
> unsigned long apic_attention;
> int32_t apic_arb_prio;
> + int32_t round_robin_counter;
> int mp_state;
> u64 ia32_misc_enable_msr;
> bool tpr_access_reporting;
> @@ -771,6 +772,7 @@ struct kvm_x86_ops {
> int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
>
> void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
> + u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
> };
>
> struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a4670d3..ae91b72 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -544,6 +544,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
> return container_of(vcpu, struct vcpu_vmx, vcpu);
> }
>
> +struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
> +{
> + return &(to_vmx(vcpu)->pi_desc);
> +}
> +
> #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
> #define FIELD(number, name) [number] = VMCS12_OFFSET(name)
> #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
> @@ -4280,6 +4285,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
> return;
> }
>
> +static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
> +{
> + return __pa((u64)vcpu_to_pi_desc(vcpu));
> +}
> +
> /*
> * Set up the vmcs's constant host-state fields, i.e., host-state fields that
> * will not change in the lifetime of the guest.
> @@ -9232,6 +9242,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
> .check_nested_events = vmx_check_nested_events,
>
> .sched_in = vmx_sched_in,
> +
> + .get_pi_desc_addr = vmx_get_pi_desc_addr,
> };
>
> static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b447a98..0c19d15 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7735,6 +7735,17 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
> }
> EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
>
> +int kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
> + u32 guest_vector, int host_irq)
> +{
> + u64 pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
> +
> + if (update_pi_irte(host_irq, pi_desc_addr, guest_vector))
> + return -1;
> +
> + return 0;
> +}
> +
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 505a9ad..a36fdc7 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -4280,6 +4280,11 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int id)
> return 0;
> }
>
> +static int dummy_update_pi_irte(int irq, u64 pi_desc_addr, u32 vector)
> +{
> + return -EINVAL;
> +}
> +
> struct irq_remap_ops amd_iommu_irq_ops = {
> .supported = amd_iommu_supported,
> .prepare = amd_iommu_prepare,
> @@ -4294,5 +4299,6 @@ struct irq_remap_ops amd_iommu_irq_ops = {
> .msi_alloc_irq = msi_alloc_irq,
> .msi_setup_irq = msi_setup_irq,
> .alloc_hpet_msi = alloc_hpet_msi,
> + .update_pi_irte = dummy_update_pi_irte,
> };
> #endif
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index 776da10..87c02fe 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -1172,6 +1172,33 @@ static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
> return ret;
> }
>
> +static int intel_update_pi_irte(int irq, u64 pi_desc_addr, u32 vector)
> +{
> + struct irte irte;
> +
> + if (get_irte(irq, &irte))
> + return -1;
> +
> + irte.irq_post_low.urg = 0;
> + irte.irq_post_low.vector = vector;
> + irte.irq_post_low.pda_l = (pi_desc_addr >> (32 - PDA_LOW_BIT)) &
> + ~(-1UL << PDA_LOW_BIT);
> + irte.irq_post_high.pda_h = (pi_desc_addr >> 32) &
> + ~(-1UL << PDA_HIGH_BIT);
> +
> + irte.irq_post_low.__reserved_1 = 0;
> + irte.irq_post_low.__reserved_2 = 0;
> + irte.irq_post_low.__reserved_3 = 0;
> + irte.irq_post_high.__reserved_4 = 0;
> +
> + irte.irq_post_low.pst = 1;
> +
> + if (modify_irte(irq, &irte))
> + return -1;
> +
> + return 0;
> +}
> +
> struct irq_remap_ops intel_irq_remap_ops = {
> .supported = intel_irq_remapping_supported,
> .prepare = dmar_table_init,
> @@ -1186,4 +1213,5 @@ struct irq_remap_ops intel_irq_remap_ops = {
> .msi_alloc_irq = intel_msi_alloc_irq,
> .msi_setup_irq = intel_msi_setup_irq,
> .alloc_hpet_msi = intel_alloc_hpet_msi,
> + .update_pi_irte = intel_update_pi_irte,
Extending irq_remap_ops should really be a separate patch from it's use
by KVM.
> };
> diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
> index 2f8ee00..0e36860 100644
> --- a/drivers/iommu/irq_remapping.c
> +++ b/drivers/iommu/irq_remapping.c
> @@ -362,6 +362,15 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
> return default_setup_hpet_msi(irq, id);
> }
>
> +int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector)
> +{
> + if (!remap_ops || !remap_ops->update_pi_irte)
> + return -ENODEV;
> +
> + return remap_ops->update_pi_irte(irq, pi_desc_addr, vector);
> +}
> +EXPORT_SYMBOL_GPL(update_pi_irte);
> +
> void panic_if_irq_remap(const char *msg)
> {
> if (irq_remapping_enabled)
> diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
> index 7bb5913..2d8f740 100644
> --- a/drivers/iommu/irq_remapping.h
> +++ b/drivers/iommu/irq_remapping.h
> @@ -84,6 +84,9 @@ struct irq_remap_ops {
>
> /* Setup interrupt remapping for an HPET MSI */
> int (*alloc_hpet_msi)(unsigned int, unsigned int);
> +
> + /* Update IRTE for posted-interrupt */
> + int (*update_pi_irte)(int irq, u64 pi_desc_addr, u32 vector);
> };
>
> extern struct irq_remap_ops intel_irq_remap_ops;
> diff --git a/include/linux/dmar.h b/include/linux/dmar.h
> index 8be5d42..e1ff4f7 100644
> --- a/include/linux/dmar.h
> +++ b/include/linux/dmar.h
> @@ -160,6 +160,20 @@ struct irte {
> __reserved_2 : 8,
> dest_id : 32;
> } irq_remap_low;
> +
> + struct {
> + __u64 present : 1,
> + fpd : 1,
> + __reserved_1 : 6,
> + avail : 4,
> + __reserved_2 : 2,
> + urg : 1,
> + pst : 1,
> + vector : 8,
> + __reserved_3 : 14,
> + pda_l : 26;
> + } irq_post_low;
> +
> __u64 low;
> };
>
> @@ -170,10 +184,22 @@ struct irte {
> svt : 2,
> __reserved_3 : 44;
> } irq_remap_high;
> +
> + struct {
> + __u64 sid: 16,
> + sq: 2,
> + svt: 2,
> + __reserved_4: 12,
> + pda_h: 32;
> + } irq_post_high;
> +
> __u64 high;
> };
> };
>
> +#define PDA_LOW_BIT 26
> +#define PDA_HIGH_BIT 32
> +
> enum {
> IRQ_REMAP_XAPIC_MODE,
> IRQ_REMAP_X2APIC_MODE,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ea53b04..6bb8287 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -335,6 +335,25 @@ struct kvm_kernel_irq_routing_entry {
> struct hlist_node link;
> };
>
> +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
> +
> +struct kvm_irq_routing_table {
> + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> + struct kvm_kernel_irq_routing_entry *rt_entries;
> + u32 nr_rt_entries;
> + /*
> + * Array indexed by gsi. Each entry contains list of irq chips
> + * the gsi is connected to.
> + */
> + struct hlist_head map[0];
> +};
> +
> +#else
> +
> +struct kvm_irq_routing_table {};
> +
> +#endif
> +
> #ifndef KVM_PRIVATE_MEM_SLOTS
> #define KVM_PRIVATE_MEM_SLOTS 0
> #endif
> @@ -766,6 +785,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> struct kvm_irq_ack_notifier *kian);
> int kvm_request_irq_source_id(struct kvm *kvm);
> void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
> +void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> + struct kvm_lapic_irq *irq);
> +bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq);
>
> #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 7593c52..509223a 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1027,6 +1027,7 @@ struct kvm_s390_ucas_mapping {
> #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
> #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
> #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
> +#define KVM_ASSIGN_DEV_PI_UPDATE _IOR(KVMIO, 0x7d, __u32)
> /* Available with KVM_CAP_PIT_STATE2 */
> #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
> #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
Needs an accompanying Documentation/virtual/kvm/api.txt update.
> diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> index e05000e..e154009 100644
> --- a/virt/kvm/assigned-dev.c
> +++ b/virt/kvm/assigned-dev.c
Since legacy KVM device assignment is effectively deprecated, have you
considered how we might do this with VFIO? Thanks,
Alex
> @@ -326,6 +326,135 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
> }
> }
>
> +int __weak kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
> + u32 guest_vector, int host_irq)
> +{
> + return 0;
> +}
> +
> +int kvm_compare_rr_counter(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
> +{
> + return vcpu1->arch.round_robin_counter -
> + vcpu2->arch.round_robin_counter;
> +}
> +
> +bool kvm_pi_find_dest_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> + struct kvm_vcpu **dest_vcpu)
> +{
> + int i, r = 0;
> + struct kvm_vcpu *vcpu, *dest = NULL;
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!kvm_apic_present(vcpu))
> + continue;
> +
> + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> + irq->dest_id, irq->dest_mode))
> + continue;
> +
> + if (!kvm_is_dm_lowest_prio(irq)) {
> + r++;
> + *dest_vcpu = vcpu;
> + } else if (kvm_lapic_enabled(vcpu)) {
> + if (!dest)
> + dest = vcpu;
> + else if (kvm_compare_rr_counter(vcpu, dest) < 0)
> + dest = vcpu;
> + }
> + }
> +
> + if (dest) {
> + dest->arch.round_robin_counter++;
> + *dest_vcpu = dest;
> + return true;
> + } else if (r == 1)
> + return true;
> +
> + return false;
> +}
> +
> +static int __kvm_update_pi_irte(struct kvm *kvm, int host_irq, int guest_irq)
> +{
> + struct kvm_kernel_irq_routing_entry *e;
> + struct kvm_irq_routing_table *irq_rt;
> + struct kvm_lapic_irq irq;
> + struct kvm_vcpu *vcpu;
> + int idx, ret = -EINVAL;
> +
> + idx = srcu_read_lock(&kvm->irq_srcu);
> + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> + ASSERT(guest_irq < irq_rt->nr_rt_entries);
> +
> + hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
> + if (e->type != KVM_IRQ_ROUTING_MSI)
> + continue;
> + /*
> + * VT-d posted-interrupt has the following
> + * limitations:
> + * - No support for posting multicast/broadcast
> + * interrupts to a VCPU
> + * Still use interrupt remapping for these
> + * kind of interrupts
> + */
> +
> + kvm_set_msi_irq(e, &irq);
> + if (!kvm_pi_find_dest_vcpu(kvm, &irq, &vcpu)) {
> + printk(KERN_INFO "%s: can not find the target VCPU\n",
> + __func__);
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (kvm_update_pi_irte_common(kvm, vcpu, irq.vector,
> + host_irq)) {
> + printk(KERN_INFO "%s: failed to update PI IRTE\n",
> + __func__);
> + ret = -EINVAL;
> + goto out;
> + }
> + }
> +
> + ret = 0;
> +out:
> + srcu_read_unlock(&kvm->irq_srcu, idx);
> + return ret;
> +}
> +
> +int kvm_update_pi_irte(struct kvm *kvm, u32 dev_id)
> +{
> + int i, rc = -1;
> + struct kvm_assigned_dev_kernel *dev;
> +
> + mutex_lock(&kvm->lock);
> + dev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, dev_id);
> + if (!dev) {
> + printk(KERN_INFO "%s: cannot find the assigned dev.\n",
> + __func__);
> + rc = -1;
> + goto out;
> + }
> +
> + BUG_ON(dev->irq_requested_type == 0);
> +
> + if ((dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) &&
> + (dev->dev->msi_enabled == 1)) {
> + __kvm_update_pi_irte(kvm,
> + dev->host_irq, dev->guest_irq);
> + } else if ((dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) &&
> + (dev->dev->msix_enabled == 1)) {
> + for (i = 0; i < dev->entries_nr; i++) {
> + __kvm_update_pi_irte(kvm,
> + dev->host_msix_entries[i].vector,
> + dev->guest_msix_entries[i].vector);
> + }
> + }
> +
> +out:
> + rc = 0;
> + mutex_unlock(&kvm->lock);
> + return rc;
> +}
> +
> static int assigned_device_enable_host_intx(struct kvm *kvm,
> struct kvm_assigned_dev_kernel *dev)
> {
> @@ -1017,6 +1146,18 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
> r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
> break;
> }
> + case KVM_ASSIGN_DEV_PI_UPDATE: {
> + u32 dev_id;
> +
> + r = -EFAULT;
> + if (copy_from_user(&dev_id, argp, sizeof(dev_id)))
> + goto out;
> + r = kvm_update_pi_irte(kvm, dev_id);
> + if (r)
> + goto out;
> + break;
> +
> + }
> default:
> r = -ENOTTY;
> break;
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 963b899..f51aed3 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -55,7 +55,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
> line_status);
> }
>
> -inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
> +bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
> {
> #ifdef CONFIG_IA64
> return irq->delivery_mode ==
> @@ -106,7 +106,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> return r;
> }
>
> -static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> +void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> struct kvm_lapic_irq *irq)
> {
> trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
> diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> index 7f256f3..cdf29a6 100644
> --- a/virt/kvm/irqchip.c
> +++ b/virt/kvm/irqchip.c
> @@ -31,17 +31,6 @@
> #include <trace/events/kvm.h>
> #include "irq.h"
>
> -struct kvm_irq_routing_table {
> - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> - struct kvm_kernel_irq_routing_entry *rt_entries;
> - u32 nr_rt_entries;
> - /*
> - * Array indexed by gsi. Each entry contains list of irq chips
> - * the gsi is connected to.
> - */
> - struct hlist_head map[0];
> -};
> -
> int kvm_irq_map_gsi(struct kvm *kvm,
> struct kvm_kernel_irq_routing_entry *entries, int gsi)
> {
^ permalink raw reply [flat|nested] 53+ messages in thread* Re: [PATCH 05/13] KVM: Update IRTE according to guest interrupt configuration changes
@ 2014-11-10 21:57 ` Alex Williamson
0 siblings, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2014-11-10 21:57 UTC (permalink / raw)
To: Feng Wu
Cc: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86, kvm, iommu,
linux-kernel
On Mon, 2014-11-10 at 14:26 +0800, Feng Wu wrote:
> When guest changes its interrupt configuration (such as, vector, etc.)
> for direct-assigned devices, we need to update the associated IRTE
> with the new guest vector, so external interrupts from the assigned
> devices can be injected to guests without VM-Exit.
>
> The current method of handling guest lowest priority interrtups
> is to use a counter 'apic_arb_prio' for each VCPU, we choose the
> VCPU with smallest 'apic_arb_prio' and then increase it by 1.
> However, for VT-d PI, we cannot re-use this, since we no longer
> have control to 'apic_arb_prio' with posted interrupt direct
> delivery by Hardware.
>
> Here, we introduce a similiar way with 'apic_arb_prio' to handle
> guest lowest priority interrtups when VT-d PI is used. Here is the
> ideas:
> - Each VCPU has a counter 'round_robin_counter'.
> - When guests sets an interrupts to lowest priority, we choose
> the VCPU with smallest 'round_robin_counter' as the destination,
> then increase it.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> arch/x86/include/asm/irq_remapping.h | 6 ++
> arch/x86/include/asm/kvm_host.h | 2 +
> arch/x86/kvm/vmx.c | 12 +++
> arch/x86/kvm/x86.c | 11 +++
> drivers/iommu/amd_iommu.c | 6 ++
> drivers/iommu/intel_irq_remapping.c | 28 +++++++
> drivers/iommu/irq_remapping.c | 9 ++
> drivers/iommu/irq_remapping.h | 3 +
> include/linux/dmar.h | 26 ++++++
> include/linux/kvm_host.h | 22 +++++
> include/uapi/linux/kvm.h | 1 +
> virt/kvm/assigned-dev.c | 141 ++++++++++++++++++++++++++++++++++
> virt/kvm/irq_comm.c | 4 +-
> virt/kvm/irqchip.c | 11 ---
> 14 files changed, 269 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
> index a3cc437..32d6cc4 100644
> --- a/arch/x86/include/asm/irq_remapping.h
> +++ b/arch/x86/include/asm/irq_remapping.h
> @@ -51,6 +51,7 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev,
> unsigned int irq, unsigned int dest,
> struct msi_msg *msg, u8 hpet_id);
> extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
> +extern int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector);
> extern void panic_if_irq_remap(const char *msg);
> extern bool setup_remapped_irq(int irq,
> struct irq_cfg *cfg,
> @@ -88,6 +89,11 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
> return -ENODEV;
> }
>
> +static inline int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector)
> +{
> + return -ENODEV;
> +}
> +
> static inline void panic_if_irq_remap(const char *msg)
> {
> }
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6ed0c30..0630161 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
> struct kvm_lapic *apic; /* kernel irqchip context */
> unsigned long apic_attention;
> int32_t apic_arb_prio;
> + int32_t round_robin_counter;
> int mp_state;
> u64 ia32_misc_enable_msr;
> bool tpr_access_reporting;
> @@ -771,6 +772,7 @@ struct kvm_x86_ops {
> int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
>
> void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
> + u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
> };
>
> struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a4670d3..ae91b72 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -544,6 +544,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
> return container_of(vcpu, struct vcpu_vmx, vcpu);
> }
>
> +struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
> +{
> + return &(to_vmx(vcpu)->pi_desc);
> +}
> +
> #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
> #define FIELD(number, name) [number] = VMCS12_OFFSET(name)
> #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
> @@ -4280,6 +4285,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
> return;
> }
>
> +static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
> +{
> + return __pa((u64)vcpu_to_pi_desc(vcpu));
> +}
> +
> /*
> * Set up the vmcs's constant host-state fields, i.e., host-state fields that
> * will not change in the lifetime of the guest.
> @@ -9232,6 +9242,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
> .check_nested_events = vmx_check_nested_events,
>
> .sched_in = vmx_sched_in,
> +
> + .get_pi_desc_addr = vmx_get_pi_desc_addr,
> };
>
> static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b447a98..0c19d15 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7735,6 +7735,17 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
> }
> EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
>
> +int kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
> + u32 guest_vector, int host_irq)
> +{
> + u64 pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
> +
> + if (update_pi_irte(host_irq, pi_desc_addr, guest_vector))
> + return -1;
> +
> + return 0;
> +}
> +
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 505a9ad..a36fdc7 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -4280,6 +4280,11 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int id)
> return 0;
> }
>
> +static int dummy_update_pi_irte(int irq, u64 pi_desc_addr, u32 vector)
> +{
> + return -EINVAL;
> +}
> +
> struct irq_remap_ops amd_iommu_irq_ops = {
> .supported = amd_iommu_supported,
> .prepare = amd_iommu_prepare,
> @@ -4294,5 +4299,6 @@ struct irq_remap_ops amd_iommu_irq_ops = {
> .msi_alloc_irq = msi_alloc_irq,
> .msi_setup_irq = msi_setup_irq,
> .alloc_hpet_msi = alloc_hpet_msi,
> + .update_pi_irte = dummy_update_pi_irte,
> };
> #endif
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index 776da10..87c02fe 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -1172,6 +1172,33 @@ static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
> return ret;
> }
>
> +static int intel_update_pi_irte(int irq, u64 pi_desc_addr, u32 vector)
> +{
> + struct irte irte;
> +
> + if (get_irte(irq, &irte))
> + return -1;
> +
> + irte.irq_post_low.urg = 0;
> + irte.irq_post_low.vector = vector;
> + irte.irq_post_low.pda_l = (pi_desc_addr >> (32 - PDA_LOW_BIT)) &
> + ~(-1UL << PDA_LOW_BIT);
> + irte.irq_post_high.pda_h = (pi_desc_addr >> 32) &
> + ~(-1UL << PDA_HIGH_BIT);
> +
> + irte.irq_post_low.__reserved_1 = 0;
> + irte.irq_post_low.__reserved_2 = 0;
> + irte.irq_post_low.__reserved_3 = 0;
> + irte.irq_post_high.__reserved_4 = 0;
> +
> + irte.irq_post_low.pst = 1;
> +
> + if (modify_irte(irq, &irte))
> + return -1;
> +
> + return 0;
> +}
> +
> struct irq_remap_ops intel_irq_remap_ops = {
> .supported = intel_irq_remapping_supported,
> .prepare = dmar_table_init,
> @@ -1186,4 +1213,5 @@ struct irq_remap_ops intel_irq_remap_ops = {
> .msi_alloc_irq = intel_msi_alloc_irq,
> .msi_setup_irq = intel_msi_setup_irq,
> .alloc_hpet_msi = intel_alloc_hpet_msi,
> + .update_pi_irte = intel_update_pi_irte,
Extending irq_remap_ops should really be a separate patch from it's use
by KVM.
> };
> diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
> index 2f8ee00..0e36860 100644
> --- a/drivers/iommu/irq_remapping.c
> +++ b/drivers/iommu/irq_remapping.c
> @@ -362,6 +362,15 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
> return default_setup_hpet_msi(irq, id);
> }
>
> +int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector)
> +{
> + if (!remap_ops || !remap_ops->update_pi_irte)
> + return -ENODEV;
> +
> + return remap_ops->update_pi_irte(irq, pi_desc_addr, vector);
> +}
> +EXPORT_SYMBOL_GPL(update_pi_irte);
> +
> void panic_if_irq_remap(const char *msg)
> {
> if (irq_remapping_enabled)
> diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
> index 7bb5913..2d8f740 100644
> --- a/drivers/iommu/irq_remapping.h
> +++ b/drivers/iommu/irq_remapping.h
> @@ -84,6 +84,9 @@ struct irq_remap_ops {
>
> /* Setup interrupt remapping for an HPET MSI */
> int (*alloc_hpet_msi)(unsigned int, unsigned int);
> +
> + /* Update IRTE for posted-interrupt */
> + int (*update_pi_irte)(int irq, u64 pi_desc_addr, u32 vector);
> };
>
> extern struct irq_remap_ops intel_irq_remap_ops;
> diff --git a/include/linux/dmar.h b/include/linux/dmar.h
> index 8be5d42..e1ff4f7 100644
> --- a/include/linux/dmar.h
> +++ b/include/linux/dmar.h
> @@ -160,6 +160,20 @@ struct irte {
> __reserved_2 : 8,
> dest_id : 32;
> } irq_remap_low;
> +
> + struct {
> + __u64 present : 1,
> + fpd : 1,
> + __reserved_1 : 6,
> + avail : 4,
> + __reserved_2 : 2,
> + urg : 1,
> + pst : 1,
> + vector : 8,
> + __reserved_3 : 14,
> + pda_l : 26;
> + } irq_post_low;
> +
> __u64 low;
> };
>
> @@ -170,10 +184,22 @@ struct irte {
> svt : 2,
> __reserved_3 : 44;
> } irq_remap_high;
> +
> + struct {
> + __u64 sid: 16,
> + sq: 2,
> + svt: 2,
> + __reserved_4: 12,
> + pda_h: 32;
> + } irq_post_high;
> +
> __u64 high;
> };
> };
>
> +#define PDA_LOW_BIT 26
> +#define PDA_HIGH_BIT 32
> +
> enum {
> IRQ_REMAP_XAPIC_MODE,
> IRQ_REMAP_X2APIC_MODE,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ea53b04..6bb8287 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -335,6 +335,25 @@ struct kvm_kernel_irq_routing_entry {
> struct hlist_node link;
> };
>
> +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
> +
> +struct kvm_irq_routing_table {
> + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> + struct kvm_kernel_irq_routing_entry *rt_entries;
> + u32 nr_rt_entries;
> + /*
> + * Array indexed by gsi. Each entry contains list of irq chips
> + * the gsi is connected to.
> + */
> + struct hlist_head map[0];
> +};
> +
> +#else
> +
> +struct kvm_irq_routing_table {};
> +
> +#endif
> +
> #ifndef KVM_PRIVATE_MEM_SLOTS
> #define KVM_PRIVATE_MEM_SLOTS 0
> #endif
> @@ -766,6 +785,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> struct kvm_irq_ack_notifier *kian);
> int kvm_request_irq_source_id(struct kvm *kvm);
> void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
> +void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> + struct kvm_lapic_irq *irq);
> +bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq);
>
> #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 7593c52..509223a 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1027,6 +1027,7 @@ struct kvm_s390_ucas_mapping {
> #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
> #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
> #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
> +#define KVM_ASSIGN_DEV_PI_UPDATE _IOR(KVMIO, 0x7d, __u32)
> /* Available with KVM_CAP_PIT_STATE2 */
> #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
> #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
Needs an accompanying Documentation/virtual/kvm/api.txt update.
> diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> index e05000e..e154009 100644
> --- a/virt/kvm/assigned-dev.c
> +++ b/virt/kvm/assigned-dev.c
Since legacy KVM device assignment is effectively deprecated, have you
considered how we might do this with VFIO? Thanks,
Alex
> @@ -326,6 +326,135 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
> }
> }
>
> +int __weak kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
> + u32 guest_vector, int host_irq)
> +{
> + return 0;
> +}
> +
> +int kvm_compare_rr_counter(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
> +{
> + return vcpu1->arch.round_robin_counter -
> + vcpu2->arch.round_robin_counter;
> +}
> +
> +bool kvm_pi_find_dest_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> + struct kvm_vcpu **dest_vcpu)
> +{
> + int i, r = 0;
> + struct kvm_vcpu *vcpu, *dest = NULL;
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!kvm_apic_present(vcpu))
> + continue;
> +
> + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> + irq->dest_id, irq->dest_mode))
> + continue;
> +
> + if (!kvm_is_dm_lowest_prio(irq)) {
> + r++;
> + *dest_vcpu = vcpu;
> + } else if (kvm_lapic_enabled(vcpu)) {
> + if (!dest)
> + dest = vcpu;
> + else if (kvm_compare_rr_counter(vcpu, dest) < 0)
> + dest = vcpu;
> + }
> + }
> +
> + if (dest) {
> + dest->arch.round_robin_counter++;
> + *dest_vcpu = dest;
> + return true;
> + } else if (r == 1)
> + return true;
> +
> + return false;
> +}
> +
> +static int __kvm_update_pi_irte(struct kvm *kvm, int host_irq, int guest_irq)
> +{
> + struct kvm_kernel_irq_routing_entry *e;
> + struct kvm_irq_routing_table *irq_rt;
> + struct kvm_lapic_irq irq;
> + struct kvm_vcpu *vcpu;
> + int idx, ret = -EINVAL;
> +
> + idx = srcu_read_lock(&kvm->irq_srcu);
> + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> + ASSERT(guest_irq < irq_rt->nr_rt_entries);
> +
> + hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
> + if (e->type != KVM_IRQ_ROUTING_MSI)
> + continue;
> + /*
> + * VT-d posted-interrupt has the following
> + * limitations:
> + * - No support for posting multicast/broadcast
> + * interrupts to a VCPU
> + * Still use interrupt remapping for these
> + * kind of interrupts
> + */
> +
> + kvm_set_msi_irq(e, &irq);
> + if (!kvm_pi_find_dest_vcpu(kvm, &irq, &vcpu)) {
> + printk(KERN_INFO "%s: can not find the target VCPU\n",
> + __func__);
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (kvm_update_pi_irte_common(kvm, vcpu, irq.vector,
> + host_irq)) {
> + printk(KERN_INFO "%s: failed to update PI IRTE\n",
> + __func__);
> + ret = -EINVAL;
> + goto out;
> + }
> + }
> +
> + ret = 0;
> +out:
> + srcu_read_unlock(&kvm->irq_srcu, idx);
> + return ret;
> +}
> +
> +int kvm_update_pi_irte(struct kvm *kvm, u32 dev_id)
> +{
> + int i, rc = -1;
> + struct kvm_assigned_dev_kernel *dev;
> +
> + mutex_lock(&kvm->lock);
> + dev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, dev_id);
> + if (!dev) {
> + printk(KERN_INFO "%s: cannot find the assigned dev.\n",
> + __func__);
> + rc = -1;
> + goto out;
> + }
> +
> + BUG_ON(dev->irq_requested_type == 0);
> +
> + if ((dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) &&
> + (dev->dev->msi_enabled == 1)) {
> + __kvm_update_pi_irte(kvm,
> + dev->host_irq, dev->guest_irq);
> + } else if ((dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) &&
> + (dev->dev->msix_enabled == 1)) {
> + for (i = 0; i < dev->entries_nr; i++) {
> + __kvm_update_pi_irte(kvm,
> + dev->host_msix_entries[i].vector,
> + dev->guest_msix_entries[i].vector);
> + }
> + }
> +
> +out:
> + rc = 0;
> + mutex_unlock(&kvm->lock);
> + return rc;
> +}
> +
> static int assigned_device_enable_host_intx(struct kvm *kvm,
> struct kvm_assigned_dev_kernel *dev)
> {
> @@ -1017,6 +1146,18 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
> r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
> break;
> }
> + case KVM_ASSIGN_DEV_PI_UPDATE: {
> + u32 dev_id;
> +
> + r = -EFAULT;
> + if (copy_from_user(&dev_id, argp, sizeof(dev_id)))
> + goto out;
> + r = kvm_update_pi_irte(kvm, dev_id);
> + if (r)
> + goto out;
> + break;
> +
> + }
> default:
> r = -ENOTTY;
> break;
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 963b899..f51aed3 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -55,7 +55,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
> line_status);
> }
>
> -inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
> +bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
> {
> #ifdef CONFIG_IA64
> return irq->delivery_mode ==
> @@ -106,7 +106,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> return r;
> }
>
> -static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> +void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
> struct kvm_lapic_irq *irq)
> {
> trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
> diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> index 7f256f3..cdf29a6 100644
> --- a/virt/kvm/irqchip.c
> +++ b/virt/kvm/irqchip.c
> @@ -31,17 +31,6 @@
> #include <trace/events/kvm.h>
> #include "irq.h"
>
> -struct kvm_irq_routing_table {
> - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> - struct kvm_kernel_irq_routing_entry *rt_entries;
> - u32 nr_rt_entries;
> - /*
> - * Array indexed by gsi. Each entry contains list of irq chips
> - * the gsi is connected to.
> - */
> - struct hlist_head map[0];
> -};
> -
> int kvm_irq_map_gsi(struct kvm *kvm,
> struct kvm_kernel_irq_routing_entry *entries, int gsi)
> {
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH 06/13] KVM: Add some helper functions for Posted-Interrupts
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
This patch adds three helper functions to manipulate the Posted-
Interrtups Decriptor.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/kvm/vmx.c | 18 ++++++++++++++++++
1 files changed, 18 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ae91b72..f41111f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -435,6 +435,24 @@ static void pi_clear_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+ return set_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
static bool pi_test_and_set_on(struct pi_desc *pi_desc)
{
return test_and_set_bit(POSTED_INTR_ON,
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 06/13] KVM: Add some helper functions for Posted-Interrupts
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
This patch adds three helper functions to manipulate the Posted-
Interrtups Decriptor.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/kvm/vmx.c | 18 ++++++++++++++++++
1 files changed, 18 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ae91b72..f41111f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -435,6 +435,24 @@ static void pi_clear_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+ return set_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
static bool pi_test_and_set_on(struct pi_desc *pi_desc)
{
return test_and_set_bit(POSTED_INTR_ON,
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH 07/13] x86, irq: Define a global vector for VT-d Posted-Interrupts
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
Currently, we use a global vector as the Posted-Interrupts
Notification Event for all the VCPUs in the system. We need
to introduce another global vector for VT-d Posted-Interrtups,
which will be used to wakeup the sleep VCPU when an external
interrupt from a direct-assigned device happens for that VCPU.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/include/asm/entry_arch.h | 2 ++
arch/x86/include/asm/hardirq.h | 1 +
arch/x86/include/asm/hw_irq.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 1 +
arch/x86/kernel/entry_64.S | 2 ++
arch/x86/kernel/irq.c | 27 +++++++++++++++++++++++++++
arch/x86/kernel/irqinit.c | 2 ++
7 files changed, 37 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index dc5fa66..27ca0af 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_HAVE_KVM
BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
smp_kvm_posted_intr_ipi)
+BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
+ smp_kvm_posted_intr_wakeup_ipi)
#endif
/*
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 0f5fb6b..9866065 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,6 +14,7 @@ typedef struct {
#endif
#ifdef CONFIG_HAVE_KVM
unsigned int kvm_posted_intr_ipis;
+ unsigned int kvm_posted_intr_wakeup_ipis;
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4615906..559563c 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,7 @@
extern asmlinkage void apic_timer_interrupt(void);
extern asmlinkage void x86_platform_ipi(void);
extern asmlinkage void kvm_posted_intr_ipi(void);
+extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
@@ -92,6 +93,7 @@ extern void trace_call_function_single_interrupt(void);
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
#define trace_reboot_interrupt reboot_interrupt
#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
+#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
#endif /* CONFIG_TRACING */
/* IOAPIC */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5702d7e..1343349 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -105,6 +105,7 @@
/* Vector for KVM to deliver posted interrupt IPI */
#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
+#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#endif
/*
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index df088bb..7663aaa 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1004,6 +1004,8 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
#ifdef CONFIG_HAVE_KVM
apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
+ kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 922d285..47408c3 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -237,6 +237,9 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs)
}
#ifdef CONFIG_HAVE_KVM
+void (*wakeup_handler_callback)(void) = NULL;
+EXPORT_SYMBOL_GPL(wakeup_handler_callback);
+
/*
* Handler for POSTED_INTERRUPT_VECTOR.
*/
@@ -256,6 +259,30 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ ack_APIC_irq();
+
+ irq_enter();
+
+ exit_idle();
+
+ inc_irq_stat(kvm_posted_intr_wakeup_ipis);
+
+ if (wakeup_handler_callback)
+ wakeup_handler_callback();
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
#endif
__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 4de73ee..659cde3 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -168,6 +168,8 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_HAVE_KVM
/* IPI for KVM to deliver posted interrupt */
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+ /* IPI for KVM to deliver interrupt to wake up tasks */
+ alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
#endif
/* IPI vectors for APIC spurious and error interrupts */
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 07/13] x86, irq: Define a global vector for VT-d Posted-Interrupts
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
Currently, we use a global vector as the Posted-Interrupts
Notification Event for all the VCPUs in the system. We need
to introduce another global vector for VT-d Posted-Interrtups,
which will be used to wakeup the sleep VCPU when an external
interrupt from a direct-assigned device happens for that VCPU.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/include/asm/entry_arch.h | 2 ++
arch/x86/include/asm/hardirq.h | 1 +
arch/x86/include/asm/hw_irq.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 1 +
arch/x86/kernel/entry_64.S | 2 ++
arch/x86/kernel/irq.c | 27 +++++++++++++++++++++++++++
arch/x86/kernel/irqinit.c | 2 ++
7 files changed, 37 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index dc5fa66..27ca0af 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_HAVE_KVM
BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
smp_kvm_posted_intr_ipi)
+BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
+ smp_kvm_posted_intr_wakeup_ipi)
#endif
/*
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 0f5fb6b..9866065 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,6 +14,7 @@ typedef struct {
#endif
#ifdef CONFIG_HAVE_KVM
unsigned int kvm_posted_intr_ipis;
+ unsigned int kvm_posted_intr_wakeup_ipis;
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4615906..559563c 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,7 @@
extern asmlinkage void apic_timer_interrupt(void);
extern asmlinkage void x86_platform_ipi(void);
extern asmlinkage void kvm_posted_intr_ipi(void);
+extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
@@ -92,6 +93,7 @@ extern void trace_call_function_single_interrupt(void);
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
#define trace_reboot_interrupt reboot_interrupt
#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
+#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
#endif /* CONFIG_TRACING */
/* IOAPIC */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5702d7e..1343349 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -105,6 +105,7 @@
/* Vector for KVM to deliver posted interrupt IPI */
#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
+#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#endif
/*
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index df088bb..7663aaa 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1004,6 +1004,8 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
#ifdef CONFIG_HAVE_KVM
apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
+ kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 922d285..47408c3 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -237,6 +237,9 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs)
}
#ifdef CONFIG_HAVE_KVM
+void (*wakeup_handler_callback)(void) = NULL;
+EXPORT_SYMBOL_GPL(wakeup_handler_callback);
+
/*
* Handler for POSTED_INTERRUPT_VECTOR.
*/
@@ -256,6 +259,30 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ ack_APIC_irq();
+
+ irq_enter();
+
+ exit_idle();
+
+ inc_irq_stat(kvm_posted_intr_wakeup_ipis);
+
+ if (wakeup_handler_callback)
+ wakeup_handler_callback();
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
#endif
__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 4de73ee..659cde3 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -168,6 +168,8 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_HAVE_KVM
/* IPI for KVM to deliver posted interrupt */
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+ /* IPI for KVM to deliver interrupt to wake up tasks */
+ alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
#endif
/* IPI vectors for APIC spurious and error interrupts */
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH 08/13] KVM: Update Posted-Interrupts descriptor during VCPU scheduling
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
Update Posted-Interrupts descriptor according to the
following rules:
- Before VCPU block, set 'NV' to POSTED_INTR_WAKEUP_VECTOR
- After VCPU block, set 'NV' back to POSTED_INTR_VECTOR
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/include/asm/kvm_host.h | 5 ++
arch/x86/kvm/vmx.c | 83 +++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 16 +++++++
virt/kvm/kvm_main.c | 11 +++++
4 files changed, 115 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0630161..71cfe3e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -773,6 +773,8 @@ struct kvm_x86_ops {
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
+ int (*vcpu_pre_block)(struct kvm_vcpu *vcpu);
+ void (*vcpu_post_block)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
@@ -1095,4 +1097,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_pre_block(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_post_block(struct kvm_vcpu *vcpu);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f41111f..4c1a966 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9153,6 +9153,86 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
shrink_ple_window(vcpu);
}
+static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old;
+ struct pi_desc new;
+
+ if (!irq_post_enabled)
+ return 0;
+
+ memset(&old, 0, sizeof(old));
+ memset(&new, 0, sizeof(new));
+
+ do {
+ old.control = new.control = pi_desc->control;
+
+ /*
+ * A posted-interrupt happened in the one of the
+ * following two cases:
+ * 1. After the latest pir-to-virr sync operation
+ * in kvm_arch_vcpu_runnable() function
+ * 2. In this do-while() loop, a posted-interrupt
+ * occurs.
+ *
+ * For either of above cases, we should not block
+ * the VCPU.
+ */
+ if (pi_test_on(pi_desc) == 1) {
+ /*
+ * Need to set this flag, then the inject will
+ * be synced from PIR to vIRR before VM-ENTRY.
+ * In fact, for guest IPI case, in function
+ * vmx_deliver_posted_interrupt(), this flags
+ * has already been set, but if the interrupt
+ * is injected by VT-d PI hardware, we need
+ * to set this.
+ */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ return 1;
+ }
+
+ pi_clear_sn(&new);
+
+ /* set 'NV' to 'wakeup vector' */
+ new.nv = POSTED_INTR_WAKEUP_VECTOR;
+ } while (cmpxchg(&pi_desc->control, old.control, new.control)
+ != old.control);
+
+ return 0;
+}
+
+static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old;
+ struct pi_desc new;
+ unsigned int dest = 0;
+
+ if (!irq_post_enabled)
+ return;
+
+ pi_set_sn(pi_desc);
+
+ do {
+ old.control = new.control = pi_desc->control;
+
+ dest = cpu_physical_id(vcpu->cpu);
+
+ if (x2apic_mode)
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ /* set 'NV' to 'notification vector' */
+ new.nv = POSTED_INTR_VECTOR;
+ } while (cmpxchg(&pi_desc->control, old.control, new.control)
+ != old.control);
+
+ pi_clear_sn(pi_desc);
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -9262,6 +9342,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.sched_in = vmx_sched_in,
.get_pi_desc_addr = vmx_get_pi_desc_addr,
+
+ .vcpu_pre_block = vmx_vcpu_pre_block,
+ .vcpu_post_block = vmx_vcpu_post_block,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c19d15..d0c8bb2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7746,6 +7746,22 @@ int kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
return 0;
}
+int kvm_arch_vcpu_pre_block(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->vcpu_pre_block)
+ return kvm_x86_ops->vcpu_pre_block(vcpu);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_pre_block);
+
+void kvm_arch_vcpu_post_block(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->vcpu_post_block)
+ kvm_x86_ops->vcpu_post_block(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_post_block);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25ffac9..1be1a45 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1754,7 +1754,18 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
if (signal_pending(current))
break;
+#ifdef CONFIG_X86
+ if (kvm_arch_vcpu_pre_block(vcpu) == 1) {
+ kvm_make_request(KVM_REQ_UNHALT, vcpu);
+ break;
+ }
+#endif
+
schedule();
+
+#ifdef CONFIG_X86
+ kvm_arch_vcpu_post_block(vcpu);
+#endif
}
finish_wait(&vcpu->wq, &wait);
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 08/13] KVM: Update Posted-Interrupts descriptor during VCPU scheduling
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
Update Posted-Interrupts descriptor according to the
following rules:
- Before VCPU block, set 'NV' to POSTED_INTR_WAKEUP_VECTOR
- After VCPU block, set 'NV' back to POSTED_INTR_VECTOR
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/include/asm/kvm_host.h | 5 ++
arch/x86/kvm/vmx.c | 83 +++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 16 +++++++
virt/kvm/kvm_main.c | 11 +++++
4 files changed, 115 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0630161..71cfe3e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -773,6 +773,8 @@ struct kvm_x86_ops {
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
+ int (*vcpu_pre_block)(struct kvm_vcpu *vcpu);
+ void (*vcpu_post_block)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
@@ -1095,4 +1097,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_pre_block(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_post_block(struct kvm_vcpu *vcpu);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f41111f..4c1a966 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9153,6 +9153,86 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
shrink_ple_window(vcpu);
}
+static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old;
+ struct pi_desc new;
+
+ if (!irq_post_enabled)
+ return 0;
+
+ memset(&old, 0, sizeof(old));
+ memset(&new, 0, sizeof(new));
+
+ do {
+ old.control = new.control = pi_desc->control;
+
+ /*
+ * A posted-interrupt happened in the one of the
+ * following two cases:
+ * 1. After the latest pir-to-virr sync operation
+ * in kvm_arch_vcpu_runnable() function
+ * 2. In this do-while() loop, a posted-interrupt
+ * occurs.
+ *
+ * For either of above cases, we should not block
+ * the VCPU.
+ */
+ if (pi_test_on(pi_desc) == 1) {
+ /*
+ * Need to set this flag, then the inject will
+ * be synced from PIR to vIRR before VM-ENTRY.
+ * In fact, for guest IPI case, in function
+ * vmx_deliver_posted_interrupt(), this flags
+ * has already been set, but if the interrupt
+ * is injected by VT-d PI hardware, we need
+ * to set this.
+ */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ return 1;
+ }
+
+ pi_clear_sn(&new);
+
+ /* set 'NV' to 'wakeup vector' */
+ new.nv = POSTED_INTR_WAKEUP_VECTOR;
+ } while (cmpxchg(&pi_desc->control, old.control, new.control)
+ != old.control);
+
+ return 0;
+}
+
+static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old;
+ struct pi_desc new;
+ unsigned int dest = 0;
+
+ if (!irq_post_enabled)
+ return;
+
+ pi_set_sn(pi_desc);
+
+ do {
+ old.control = new.control = pi_desc->control;
+
+ dest = cpu_physical_id(vcpu->cpu);
+
+ if (x2apic_mode)
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ /* set 'NV' to 'notification vector' */
+ new.nv = POSTED_INTR_VECTOR;
+ } while (cmpxchg(&pi_desc->control, old.control, new.control)
+ != old.control);
+
+ pi_clear_sn(pi_desc);
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -9262,6 +9342,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.sched_in = vmx_sched_in,
.get_pi_desc_addr = vmx_get_pi_desc_addr,
+
+ .vcpu_pre_block = vmx_vcpu_pre_block,
+ .vcpu_post_block = vmx_vcpu_post_block,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c19d15..d0c8bb2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7746,6 +7746,22 @@ int kvm_update_pi_irte_common(struct kvm *kvm, struct kvm_vcpu *vcpu,
return 0;
}
+int kvm_arch_vcpu_pre_block(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->vcpu_pre_block)
+ return kvm_x86_ops->vcpu_pre_block(vcpu);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_pre_block);
+
+void kvm_arch_vcpu_post_block(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->vcpu_post_block)
+ kvm_x86_ops->vcpu_post_block(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_post_block);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25ffac9..1be1a45 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1754,7 +1754,18 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
if (signal_pending(current))
break;
+#ifdef CONFIG_X86
+ if (kvm_arch_vcpu_pre_block(vcpu) == 1) {
+ kvm_make_request(KVM_REQ_UNHALT, vcpu);
+ break;
+ }
+#endif
+
schedule();
+
+#ifdef CONFIG_X86
+ kvm_arch_vcpu_post_block(vcpu);
+#endif
}
finish_wait(&vcpu->wq, &wait);
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH 09/13] KVM: Change NDST field after VCPU scheduling
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
This patch changes the NDST filed of Posted-Interrupts
Descriptor after VCPU is scheduled to another physical
CPU.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/kvm/vmx.c | 25 +++++++++++++++++++++++++
1 files changed, 25 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4c1a966..fa77714 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1906,6 +1906,31 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
+
+ if (irq_post_enabled && (vcpu->cpu != cpu)) {
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old, new;
+ unsigned int dest;
+
+ memset(&old, 0, sizeof(old));
+ memset(&new, 0, sizeof(new));
+
+ pi_set_sn(pi_desc);
+
+ do {
+ old.control = new.control = pi_desc->control;
+
+ dest = cpu_physical_id(cpu);
+
+ if (x2apic_mode)
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ } while (cmpxchg(&pi_desc->control, old.control,
+ new.control) != old.control);
+ pi_clear_sn(pi_desc);
+ }
}
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 09/13] KVM: Change NDST field after VCPU scheduling
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
This patch changes the NDST filed of Posted-Interrupts
Descriptor after VCPU is scheduled to another physical
CPU.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/kvm/vmx.c | 25 +++++++++++++++++++++++++
1 files changed, 25 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4c1a966..fa77714 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1906,6 +1906,31 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
+
+ if (irq_post_enabled && (vcpu->cpu != cpu)) {
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old, new;
+ unsigned int dest;
+
+ memset(&old, 0, sizeof(old));
+ memset(&new, 0, sizeof(new));
+
+ pi_set_sn(pi_desc);
+
+ do {
+ old.control = new.control = pi_desc->control;
+
+ dest = cpu_physical_id(cpu);
+
+ if (x2apic_mode)
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ } while (cmpxchg(&pi_desc->control, old.control,
+ new.control) != old.control);
+ pi_clear_sn(pi_desc);
+ }
}
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH 10/13] KVM: Add the handler for Wake-up Vector
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
When VCPU is blocked and an external interrupts from assigned
devices is delivered to it, VT-d Posted-Interrupts mechanism
will deliver a interrrupt to the associated physical CPU with
Wake-up Vector. In its handler, we find the destination VCPU
and wake up it.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/vmx.c | 52 +++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 22 +++++++++++-----
include/linux/kvm_host.h | 3 ++
virt/kvm/kvm_main.c | 3 ++
5 files changed, 75 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 71cfe3e..ca231a3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,6 +99,8 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
#define ASYNC_PF_PER_VCPU 64
+extern void (*wakeup_handler_callback)(void);
+
enum kvm_reg {
VCPU_REGS_RAX = 0,
VCPU_REGS_RCX = 1,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fa77714..51d2c8a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -822,6 +822,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
+/*
+ * We maintian a per-CPU linked-list of VCPU, so in wakeup_handler() we
+ * can find which VCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
static unsigned long *vmx_msr_bitmap_legacy;
@@ -2813,6 +2820,8 @@ static int hardware_enable(void)
return -EBUSY;
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+ INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+ spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
/*
* Now we can enable the vmclear operation in kdump
@@ -9183,6 +9192,7 @@ static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct pi_desc old;
struct pi_desc new;
+ unsigned long flags;
if (!irq_post_enabled)
return 0;
@@ -9222,9 +9232,22 @@ static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
+
+ /*
+ * We should save physical cpu id here, vcpu->cpu may
+ * be changed due to preemption, in that case, this
+ * do-while loop will run again.
+ */
+ vcpu->wakeup_cpu = vcpu->cpu;
} while (cmpxchg(&pi_desc->control, old.control, new.control)
!= old.control);
+ spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->wakeup_cpu), flags);
+ list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu, vcpu->wakeup_cpu));
+ spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->wakeup_cpu), flags);
return 0;
}
@@ -9234,6 +9257,7 @@ static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
struct pi_desc old;
struct pi_desc new;
unsigned int dest = 0;
+ unsigned long flags;
if (!irq_post_enabled)
return;
@@ -9255,6 +9279,13 @@ static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
} while (cmpxchg(&pi_desc->control, old.control, new.control)
!= old.control);
+ spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->wakeup_cpu), flags);
+ list_del(&vcpu->blocked_vcpu_list);
+ spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->wakeup_cpu), flags);
+ vcpu->wakeup_cpu = -1;
+
pi_clear_sn(pi_desc);
}
@@ -9372,6 +9403,25 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_post_block = vmx_vcpu_post_block,
};
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+void wakeup_handler(void)
+{
+ struct kvm_vcpu *vcpu;
+ int cpu = smp_processor_id();
+
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+ blocked_vcpu_list) {
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ if (pi_test_on(pi_desc) == 1)
+ kvm_vcpu_kick(vcpu);
+ }
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
static int __init vmx_init(void)
{
int r, i, msr;
@@ -9486,6 +9536,8 @@ static int __init vmx_init(void)
update_ple_window_actual_max();
+ wakeup_handler_callback = wakeup_handler;
+
return 0;
out7:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d0c8bb2..2061b3d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6156,6 +6156,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_vcpu_reload_apic_access_page(vcpu);
}
+ /*
+ * Since posted-interrupts can be set by VT-d HW now, in this
+ * case, KVM_REQ_EVENT is not set. We move the following
+ * operations out of the if statement.
+ */
+ if (kvm_lapic_enabled(vcpu)) {
+ /*
+ * Update architecture specific hints for APIC
+ * virtual interrupt delivery.
+ */
+ if (kvm_x86_ops->hwapic_irr_update)
+ kvm_x86_ops->hwapic_irr_update(vcpu,
+ kvm_lapic_find_highest_irr(vcpu));
+ }
+
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@@ -6172,13 +6187,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
- /*
- * Update architecture specific hints for APIC
- * virtual interrupt delivery.
- */
- if (kvm_x86_ops->hwapic_irr_update)
- kvm_x86_ops->hwapic_irr_update(vcpu,
- kvm_lapic_find_highest_irr(vcpu));
update_cr8_intercept(vcpu);
kvm_lapic_sync_to_vapic(vcpu);
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6bb8287..614b4ba 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -239,6 +239,9 @@ struct kvm_vcpu {
unsigned long requests;
unsigned long guest_debug;
+ int wakeup_cpu;
+ struct list_head blocked_vcpu_list;
+
struct mutex mutex;
struct kvm_run *run;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1be1a45..fb3e504 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -224,6 +224,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
init_waitqueue_head(&vcpu->wq);
kvm_async_pf_vcpu_init(vcpu);
+ vcpu->wakeup_cpu = -1;
+ INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
+
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 10/13] KVM: Add the handler for Wake-up Vector
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
When VCPU is blocked and an external interrupts from assigned
devices is delivered to it, VT-d Posted-Interrupts mechanism
will deliver a interrrupt to the associated physical CPU with
Wake-up Vector. In its handler, we find the destination VCPU
and wake up it.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/vmx.c | 52 +++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 22 +++++++++++-----
include/linux/kvm_host.h | 3 ++
virt/kvm/kvm_main.c | 3 ++
5 files changed, 75 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 71cfe3e..ca231a3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,6 +99,8 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
#define ASYNC_PF_PER_VCPU 64
+extern void (*wakeup_handler_callback)(void);
+
enum kvm_reg {
VCPU_REGS_RAX = 0,
VCPU_REGS_RCX = 1,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fa77714..51d2c8a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -822,6 +822,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
+/*
+ * We maintian a per-CPU linked-list of VCPU, so in wakeup_handler() we
+ * can find which VCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
static unsigned long *vmx_msr_bitmap_legacy;
@@ -2813,6 +2820,8 @@ static int hardware_enable(void)
return -EBUSY;
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+ INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+ spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
/*
* Now we can enable the vmclear operation in kdump
@@ -9183,6 +9192,7 @@ static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct pi_desc old;
struct pi_desc new;
+ unsigned long flags;
if (!irq_post_enabled)
return 0;
@@ -9222,9 +9232,22 @@ static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
+
+ /*
+ * We should save physical cpu id here, vcpu->cpu may
+ * be changed due to preemption, in that case, this
+ * do-while loop will run again.
+ */
+ vcpu->wakeup_cpu = vcpu->cpu;
} while (cmpxchg(&pi_desc->control, old.control, new.control)
!= old.control);
+ spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->wakeup_cpu), flags);
+ list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu, vcpu->wakeup_cpu));
+ spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->wakeup_cpu), flags);
return 0;
}
@@ -9234,6 +9257,7 @@ static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
struct pi_desc old;
struct pi_desc new;
unsigned int dest = 0;
+ unsigned long flags;
if (!irq_post_enabled)
return;
@@ -9255,6 +9279,13 @@ static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
} while (cmpxchg(&pi_desc->control, old.control, new.control)
!= old.control);
+ spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->wakeup_cpu), flags);
+ list_del(&vcpu->blocked_vcpu_list);
+ spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->wakeup_cpu), flags);
+ vcpu->wakeup_cpu = -1;
+
pi_clear_sn(pi_desc);
}
@@ -9372,6 +9403,25 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_post_block = vmx_vcpu_post_block,
};
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+void wakeup_handler(void)
+{
+ struct kvm_vcpu *vcpu;
+ int cpu = smp_processor_id();
+
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+ blocked_vcpu_list) {
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ if (pi_test_on(pi_desc) == 1)
+ kvm_vcpu_kick(vcpu);
+ }
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
static int __init vmx_init(void)
{
int r, i, msr;
@@ -9486,6 +9536,8 @@ static int __init vmx_init(void)
update_ple_window_actual_max();
+ wakeup_handler_callback = wakeup_handler;
+
return 0;
out7:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d0c8bb2..2061b3d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6156,6 +6156,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_vcpu_reload_apic_access_page(vcpu);
}
+ /*
+ * Since posted-interrupts can be set by VT-d HW now, in this
+ * case, KVM_REQ_EVENT is not set. We move the following
+ * operations out of the if statement.
+ */
+ if (kvm_lapic_enabled(vcpu)) {
+ /*
+ * Update architecture specific hints for APIC
+ * virtual interrupt delivery.
+ */
+ if (kvm_x86_ops->hwapic_irr_update)
+ kvm_x86_ops->hwapic_irr_update(vcpu,
+ kvm_lapic_find_highest_irr(vcpu));
+ }
+
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@@ -6172,13 +6187,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
- /*
- * Update architecture specific hints for APIC
- * virtual interrupt delivery.
- */
- if (kvm_x86_ops->hwapic_irr_update)
- kvm_x86_ops->hwapic_irr_update(vcpu,
- kvm_lapic_find_highest_irr(vcpu));
update_cr8_intercept(vcpu);
kvm_lapic_sync_to_vapic(vcpu);
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6bb8287..614b4ba 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -239,6 +239,9 @@ struct kvm_vcpu {
unsigned long requests;
unsigned long guest_debug;
+ int wakeup_cpu;
+ struct list_head blocked_vcpu_list;
+
struct mutex mutex;
struct kvm_run *run;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1be1a45..fb3e504 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -224,6 +224,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
init_waitqueue_head(&vcpu->wq);
kvm_async_pf_vcpu_init(vcpu);
+ vcpu->wakeup_cpu = -1;
+ INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
+
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH 11/13] KVM: Suppress posted-interrupt when 'SN' is set
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
arch/x86/kvm/vmx.c | 11 +++++++++--
1 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51d2c8a..495cfbd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4306,15 +4306,22 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int r;
+ int r, sn;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
return;
+ /*
+ * Currently, we don't support urgent interrupt, all interrupts
+ * are recognized as non-urgent interrupt, so we cannot send
+ * posted-interrupt when 'SN' is set.
+ */
+ sn = pi_test_sn(&vmx->pi_desc);
+
r = pi_test_and_set_on(&vmx->pi_desc);
kvm_make_request(KVM_REQ_EVENT, vcpu);
#ifdef CONFIG_SMP
- if (!r && (vcpu->mode == IN_GUEST_MODE))
+ if (!r && !sn && (vcpu->mode == IN_GUEST_MODE))
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
else
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread* [PATCH 11/13] KVM: Suppress posted-interrupt when 'SN' is set
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
arch/x86/kvm/vmx.c | 11 +++++++++--
1 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51d2c8a..495cfbd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4306,15 +4306,22 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int r;
+ int r, sn;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
return;
+ /*
+ * Currently, we don't support urgent interrupt, all interrupts
+ * are recognized as non-urgent interrupt, so we cannot send
+ * posted-interrupt when 'SN' is set.
+ */
+ sn = pi_test_sn(&vmx->pi_desc);
+
r = pi_test_and_set_on(&vmx->pi_desc);
kvm_make_request(KVM_REQ_EVENT, vcpu);
#ifdef CONFIG_SMP
- if (!r && (vcpu->mode == IN_GUEST_MODE))
+ if (!r && !sn && (vcpu->mode == IN_GUEST_MODE))
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
else
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH 12/13] iommu/vt-d: No need to migrating irq for VT-d Posted-Interrtups
2014-11-10 6:26 ` Feng Wu
@ 2014-11-10 6:26 ` Feng Wu
-1 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb-DgEjT+Ai2ygdnm+yROfE0A, pbonzini-H+wXaHxf7aLQT0dZR+AlfA,
dwmw2-wEGCiKHe2LqWVfeAwA7xHQ, joro-zLv9SwRftAIdnm+yROfE0A,
tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
We don't need to migrate the irqs for VT-d Posted-Interrtups here.
When 'pst' is set in IRTE, the associated irq will be posted to
guests instead of interrupt remapping. The destination of the
interrupt is set in Posted-Interrupts Descriptor, and the migration
happens during VCPU scheduling.
Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/iommu/intel_irq_remapping.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 87c02fe..249e2b1 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1038,6 +1038,13 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
if (get_irte(irq, &irte))
return -EBUSY;
+ /*
+ * If the interrupt is for posting, it is used by guests,
+ * we cannot change IRTE here.
+ */
+ if (irte.irq_post_low.pst == 1)
+ return 0;
+
err = assign_irq_vector(irq, cfg, mask);
if (err)
return err;
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH 12/13] iommu/vt-d: No need to migrating irq for VT-d Posted-Interrtups
@ 2014-11-10 6:26 ` Feng Wu
0 siblings, 0 replies; 53+ messages in thread
From: Feng Wu @ 2014-11-10 6:26 UTC (permalink / raw)
To: gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel, Feng Wu
We don't need to migrate the irqs for VT-d Posted-Interrtups here.
When 'pst' is set in IRTE, the associated irq will be posted to
guests instead of interrupt remapping. The destination of the
interrupt is set in Posted-Interrupts Descriptor, and the migration
happens during VCPU scheduling.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
drivers/iommu/intel_irq_remapping.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 87c02fe..249e2b1 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1038,6 +1038,13 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
if (get_irte(irq, &irte))
return -EBUSY;
+ /*
+ * If the interrupt is for posting, it is used by guests,
+ * we cannot change IRTE here.
+ */
+ if (irte.irq_post_low.pst == 1)
+ return 0;
+
err = assign_irq_vector(irq, cfg, mask);
if (err)
return err;
--
1.7.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
[parent not found: <1415600812-27773-13-git-send-email-feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 12/13] iommu/vt-d: No need to migrating irq for VT-d Posted-Interrtups
2014-11-10 6:26 ` Feng Wu
@ 2014-11-11 13:48 ` Jiang Liu
-1 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-11 13:48 UTC (permalink / raw)
To: Feng Wu, gleb-DgEjT+Ai2ygdnm+yROfE0A,
pbonzini-H+wXaHxf7aLQT0dZR+AlfA, dwmw2-wEGCiKHe2LqWVfeAwA7xHQ,
joro-zLv9SwRftAIdnm+yROfE0A, tglx-hfZtesqFncYOwBW4kG4KsQ,
mingo-H+wXaHxf7aLQT0dZR+AlfA, hpa-YMNOUZJC4hwAvxtiuMwx3w,
x86-DgEjT+Ai2ygdnm+yROfE0A
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, kvm-u79uwXL29TY76Z2rM5mHXA
On 2014/11/10 14:26, Feng Wu wrote:
> We don't need to migrate the irqs for VT-d Posted-Interrtups here.
> When 'pst' is set in IRTE, the associated irq will be posted to
> guests instead of interrupt remapping. The destination of the
> interrupt is set in Posted-Interrupts Descriptor, and the migration
> happens during VCPU scheduling.
>
> Signed-off-by: Feng Wu <feng.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
> drivers/iommu/intel_irq_remapping.c | 7 +++++++
> 1 files changed, 7 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index 87c02fe..249e2b1 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -1038,6 +1038,13 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
> if (get_irte(irq, &irte))
> return -EBUSY;
>
> + /*
> + * If the interrupt is for posting, it is used by guests,
> + * we cannot change IRTE here.
> + */
> + if (irte.irq_post_low.pst == 1)
> + return 0;
Hi Feng,
You should return some error code instead of 0, otherwise the
irq core will get confused.
> +
> err = assign_irq_vector(irq, cfg, mask);
> if (err)
> return err;
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 12/13] iommu/vt-d: No need to migrating irq for VT-d Posted-Interrtups
@ 2014-11-11 13:48 ` Jiang Liu
0 siblings, 0 replies; 53+ messages in thread
From: Jiang Liu @ 2014-11-11 13:48 UTC (permalink / raw)
To: Feng Wu, gleb, pbonzini, dwmw2, joro, tglx, mingo, hpa, x86
Cc: kvm, iommu, linux-kernel
On 2014/11/10 14:26, Feng Wu wrote:
> We don't need to migrate the irqs for VT-d Posted-Interrtups here.
> When 'pst' is set in IRTE, the associated irq will be posted to
> guests instead of interrupt remapping. The destination of the
> interrupt is set in Posted-Interrupts Descriptor, and the migration
> happens during VCPU scheduling.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> drivers/iommu/intel_irq_remapping.c | 7 +++++++
> 1 files changed, 7 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
> index 87c02fe..249e2b1 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -1038,6 +1038,13 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
> if (get_irte(irq, &irte))
> return -EBUSY;
>
> + /*
> + * If the interrupt is for posting, it is used by guests,
> + * we cannot change IRTE here.
> + */
> + if (irte.irq_post_low.pst == 1)
> + return 0;
Hi Feng,
You should return some error code instead of 0, otherwise the
irq core will get confused.
> +
> err = assign_irq_vector(irq, cfg, mask);
> if (err)
> return err;
>
^ permalink raw reply [flat|nested] 53+ messages in thread