kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
To: <rkrcmar@redhat.com>, <joro@8bytes.org>, <pbonzini@redhat.com>,
	<alex.williamson@redhat.com>
Cc: <kvm@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<sherry.hurwitz@amd.com>,
	Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Subject: [PART2 PATCH v5 12/12] svm: Implements update_pi_irte hook to setup posted interrupt
Date: Mon, 25 Jul 2016 04:32:11 -0500	[thread overview]
Message-ID: <1469439131-11308-13-git-send-email-suravee.suthikulpanit@amd.com> (raw)
In-Reply-To: <1469439131-11308-1-git-send-email-suravee.suthikulpanit@amd.com>

From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>

This patch implements update_pi_irte function hook to allow SVM
communicate to IOMMU driver regarding how to set up IRTE for handling
posted interrupt.

In case AVIC is enabled, during vcpu_load/unload, SVM needs to update
IOMMU IRTE with appropriate host physical APIC ID. Also, when
vcpu_blocking/unblocking, SVM needs to update the is-running bit in
the IOMMU IRTE. Both are achieved via calling amd_iommu_update_ga().

However, if GA mode is not enabled for the pass-through device,
IOMMU driver will simply just return when calling amd_iommu_update_ga.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
---
 arch/x86/kvm/svm.c | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 228 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2be5ed7..39dc600 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -43,6 +43,7 @@
 #include <asm/desc.h>
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
+#include <asm/irq_remapping.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -199,6 +200,16 @@ struct vcpu_svm {
 	struct page *avic_backing_page;
 	u64 *avic_physical_id_cache;
 	bool avic_is_running;
+
+	/*
+	 * Per-vcpu list of struct amd_iommu_pi_data:
+	 * This is used mainly to track interrupt remapping table entry (IRTE)
+	 * to be updated when the vcpu affinity changes. This avoid the need
+	 * for scanning for IRTE and try to match ga_tag in the IOMMU driver
+	 * (or using hashtable).
+	 */
+	struct list_head pi_list;
+	spinlock_t pi_list_lock;
 };
 
 #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFF)
@@ -1463,6 +1474,31 @@ free_avic:
 	return err;
 }
 
+static inline int
+avic_update_iommu(struct kvm_vcpu *vcpu, int cpu, phys_addr_t pa, bool r)
+{
+	int ret;
+	unsigned long flags;
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct amd_iommu_pi_data *pi;
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		return 0;
+
+	/*
+	 * Here, we go through the per-vcpu pi_list to update all existing
+	 * interrupt remapping table entry targeting this vcpu.
+	 */
+	spin_lock_irqsave(&svm->pi_list_lock, flags);
+	list_for_each_entry(pi, &svm->pi_list, node) {
+		ret = amd_iommu_update_ga(cpu, (pa & AVIC_HPA_MASK), r, pi);
+		if (ret)
+			break;
+	}
+	spin_unlock_irqrestore(&svm->pi_list_lock, flags);
+	return ret;
+}
+
 /**
  * This function is called during VCPU halt/unhalt.
  */
@@ -1485,9 +1521,16 @@ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
 	WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
 
 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
-	if (is_run)
+	if (is_run) {
 		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
-	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+		WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+		avic_update_iommu(vcpu, h_physical_id,
+				  page_to_phys(svm->avic_backing_page), 1);
+	} else {
+		avic_update_iommu(vcpu, h_physical_id,
+				  page_to_phys(svm->avic_backing_page), 0);
+		WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+	}
 }
 
 static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1514,6 +1557,9 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 
 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+	avic_update_iommu(vcpu, h_physical_id,
+			  page_to_phys(svm->avic_backing_page),
+			  svm->avic_is_running);
 }
 
 static void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1525,6 +1571,10 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
 		return;
 
 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
+	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
+		avic_update_iommu(vcpu, -1,
+				  page_to_phys(svm->avic_backing_page), 0);
+
 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 }
@@ -1590,6 +1640,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 		err = avic_init_backing_page(&svm->vcpu);
 		if (err)
 			goto free_page4;
+
+		INIT_LIST_HEAD(&svm->pi_list);
+		spin_lock_init(&svm->pi_list_lock);
 	}
 
 	/* We initialize this flag to true to make sure that the is_running
@@ -4386,6 +4439,178 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
 		kvm_vcpu_wake_up(vcpu);
 }
 
+static void svm_pi_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+{
+	bool found = false;
+	unsigned long flags;
+	struct amd_iommu_pi_data *cur;
+
+	spin_lock_irqsave(&svm->pi_list_lock, flags);
+	list_for_each_entry(cur, &svm->pi_list, node) {
+		if (cur->ir_data != pi->ir_data)
+			continue;
+		found = true;
+		break;
+	}
+	if (!found)
+		list_add(&pi->node, &svm->pi_list);
+	spin_unlock_irqrestore(&svm->pi_list_lock, flags);
+}
+
+static void svm_pi_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+{
+	unsigned long flags;
+	struct amd_iommu_pi_data *cur, *next;
+
+	spin_lock_irqsave(&svm->pi_list_lock, flags);
+	list_for_each_entry_safe(cur, next, &svm->pi_list, node) {
+		if (cur->ir_data != pi->ir_data)
+			continue;
+		list_del(&cur->node);
+		kfree(cur);
+		break;
+	}
+	spin_unlock_irqrestore(&svm->pi_list_lock, flags);
+}
+
+/*
+ * svm_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+			      uint32_t guest_irq, bool set)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_irq_routing_table *irq_rt;
+	int idx, ret = -EINVAL;
+
+	if (!kvm_arch_has_assigned_device(kvm) ||
+	    !irq_remapping_cap(IRQ_POSTING_CAP))
+		return 0;
+
+	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
+		 __func__, host_irq, guest_irq, set);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+		struct kvm_lapic_irq irq;
+		struct vcpu_data vcpu_info;
+		struct kvm_vcpu *vcpu = NULL;
+		struct vcpu_svm *svm = NULL;
+
+		if (e->type != KVM_IRQ_ROUTING_MSI)
+			continue;
+
+		/**
+		 * Note:
+		 * The HW cannot support posting multicast/broadcast
+		 * interrupts to a vCPU. So, we still use interrupt
+		 * remapping for these kind of interrupts.
+		 *
+		 * For lowest-priority interrupts, we only support
+		 * those with single CPU as the destination, e.g. user
+		 * configures the interrupts via /proc/irq or uses
+		 * irqbalance to make the interrupts single-CPU.
+		 */
+		kvm_set_msi_irq(e, &irq);
+		if (kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+			svm = to_svm(vcpu);
+			vcpu_info.pi_desc_addr = page_to_phys(svm->avic_backing_page);
+			vcpu_info.vector = irq.vector;
+
+			trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi,
+						 vcpu_info.vector,
+						 vcpu_info.pi_desc_addr, set);
+
+			pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
+				 irq.vector);
+		} else {
+			set = false;
+
+			pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
+				 __func__, irq.vector);
+		}
+
+		/**
+		 * When AVIC is disabled, we fall-back to setup
+		 * IRTE w/ legacy mode
+		 */
+		if (set && kvm_vcpu_apicv_active(&svm->vcpu)) {
+			struct amd_iommu_pi_data *pi_data;
+
+			/**
+			 * Allocating new amd_iommu_pi_data, which will get
+			 * add to the per-vcpu pi_list.
+			 */
+			pi_data = kzalloc(sizeof(struct amd_iommu_pi_data),
+					  GFP_KERNEL);
+			if (!pi_data) {
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			/* Try to enable guest_mode in IRTE */
+			pi_data->ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
+						     vcpu->vcpu_id);
+			pi_data->vcpu_data = &vcpu_info;
+			pi_data->is_guest_mode = true;
+			ret = irq_set_vcpu_affinity(host_irq, pi_data);
+
+			/**
+			 * We save the pointer to pi_data in the struct
+			 * vcpu_svm so that we can reference to them directly
+			 * when we update vcpu scheduling information in IOMMU
+			 * irte.
+			 */
+			if (!ret && pi_data->is_guest_mode)
+				svm_pi_list_add(svm, pi_data);
+		} else {
+			/* Use legacy mode in IRTE */
+			struct amd_iommu_pi_data pi;
+
+			/**
+			 * Here, pi is used to:
+			 * - Tell IOMMU to use legacy mode for this interrupt.
+			 * - Retrieve ga_tag of prior interrupt remapping data.
+			 */
+			pi.is_guest_mode = false;
+			ret = irq_set_vcpu_affinity(host_irq, &pi);
+
+			/**
+			 * We need to check if the interrupt was previously
+			 * setup with the guest_mode by checking if the ga_tag
+			 * was cached. If so, we need to clean up the per-vcpu
+			 * pi_list.
+			 */
+			if (!ret && pi.ga_tag) {
+				struct kvm_vcpu *vcpu = kvm_get_vcpu_by_id(kvm,
+						AVIC_GATAG_TO_VCPUID(pi.ga_tag));
+
+				if (vcpu)
+					svm_pi_list_del(to_svm(vcpu), &pi);
+			}
+		}
+
+		if (ret < 0) {
+			pr_err("%s: failed to update PI IRTE\n", __func__);
+			goto out;
+		}
+	}
+
+	ret = 0;
+out:
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+	return ret;
+}
+
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -5212,6 +5437,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.pmu_ops = &amd_pmu_ops,
 	.deliver_posted_interrupt = svm_deliver_avic_intr,
+	.update_pi_irte = svm_update_pi_irte,
 };
 
 static int __init svm_init(void)
-- 
1.9.1

  parent reply	other threads:[~2016-07-25  9:32 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-25  9:31 [PART2 PATCH v5 00/12] iommu/AMD: Introduce IOMMU AVIC support Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 01/12] iommu/amd: Detect and enable guest vAPIC support Suravee Suthikulpanit
2016-08-09 14:30   ` Joerg Roedel
2016-07-25  9:32 ` [PART2 PATCH v5 02/12] iommu/amd: Move and introduce new IRTE-related unions and structures Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 03/12] iommu/amd: Introduce interrupt remapping ops structure Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 04/12] iommu/amd: Add support for multiple IRTE formats Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 05/12] iommu/amd: Detect and initialize guest vAPIC log Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 06/12] iommu/amd: Adding GALOG interrupt handler Suravee Suthikulpanit
2016-08-09 14:43   ` Joerg Roedel
2016-08-16  2:43     ` Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 07/12] iommu/amd: Introduce amd_iommu_update_ga() Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 08/12] iommu/amd: Implements irq_set_vcpu_affinity() hook to setup vapic mode for pass-through devices Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 09/12] iommu/amd: Enable vAPIC interrupt remapping mode by default Suravee Suthikulpanit
2016-08-09 14:54   ` Joerg Roedel
2016-07-25  9:32 ` [PART2 PATCH v5 10/12] svm: Introduces AVIC per-VM ID Suravee Suthikulpanit
2016-08-12 14:16   ` Radim Krčmář
2016-08-18 12:24     ` Suravee Suthikulpanit
2016-07-25  9:32 ` [PART2 PATCH v5 11/12] svm: Introduce AMD IOMMU avic_ga_log_notifier Suravee Suthikulpanit
2016-08-12 14:27   ` Radim Krčmář
2016-07-25  9:32 ` Suravee Suthikulpanit [this message]
2016-08-13 12:03   ` [PART2 PATCH v5 12/12] svm: Implements update_pi_irte hook to setup posted interrupt Radim Krčmář
2016-08-16 15:19     ` Suravee Suthikulpanit
2016-08-16 16:33       ` Radim Krčmář
2016-08-18 15:43         ` Suravee Suthikulpanit
2016-08-08 14:42 ` [PART2 PATCH v5 00/12] iommu/AMD: Introduce IOMMU AVIC support Suravee Suthikulpanit
2016-08-09 14:58   ` Joerg Roedel
2016-08-12  4:11     ` Suravee Suthikulpanit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1469439131-11308-13-git-send-email-suravee.suthikulpanit@amd.com \
    --to=suravee.suthikulpanit@amd.com \
    --cc=alex.williamson@redhat.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=sherry.hurwitz@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).