All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sheng Yang <sheng@linux.intel.com>
To: Avi Kivity <avi@redhat.com>
Cc: kvm@vger.kernel.org, Sheng Yang <sheng@linux.intel.com>
Subject: [PATCH] KVM: Enable snooping control for supported hardware
Date: Thu, 23 Apr 2009 16:43:02 +0800	[thread overview]
Message-ID: <1240476182-26254-1-git-send-email-sheng@linux.intel.com> (raw)

Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of
host memory type then be a potential issue.

Snooping control can guarantee the cache correctness of accessing the DMA engine
of VT-d.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |    2 ++
 arch/x86/kvm/mmu.c              |   23 ++++++++++++++++++++---
 virt/kvm/iommu.c                |   27 ++++++++++++++++++++++++---
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3fc4623..d2da40f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -397,6 +397,8 @@ struct kvm_arch{
 	struct list_head assigned_dev_head;
 	struct list_head oos_global_pages;
 	struct iommu_domain *iommu_domain;
+#define KVM_IOMMU_CACHE_COHERENCY	0x1
+	int iommu_flags;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 409d08e..1e63a87 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1713,10 +1713,27 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
 	if (mt_mask) {
+		/* For VT-d and EPT combination
+		 * 1. MMIO: always map as UC
+		 * 2. EPT without VT-d: always map as WB and set IGMT=1 to
+		 *    keep consistent with host MTRR
+		 * 3. EPT with VT-d:
+		 *   a. VT-d with snooping control feature: keep consistent
+		 *	with host MTRR can guarantee the correctness
+		 *   b. VT-d without snooping control feature: can't
+		 *      guarantee the result, try to trust guest.
+		 */
 		if (!kvm_is_mmio_pfn(pfn)) {
-			mt_mask = get_memory_type(vcpu, gfn) <<
-				kvm_x86_ops->get_mt_mask_shift();
-			mt_mask |= VMX_EPT_IGMT_BIT;
+			if (vcpu->kvm->arch.iommu_domain &&
+			    !(vcpu->kvm->arch.iommu_flags &
+				    KVM_IOMMU_CACHE_COHERENCY)) {
+				mt_mask = get_memory_type(vcpu, gfn) <<
+					  kvm_x86_ops->get_mt_mask_shift();
+			} else {
+				mt_mask = MTRR_TYPE_WRBACK <<
+					kvm_x86_ops->get_mt_mask_shift();
+				mt_mask |= VMX_EPT_IGMT_BIT;
+			}
 		} else
 			mt_mask = MTRR_TYPE_UNCACHABLE <<
 				kvm_x86_ops->get_mt_mask_shift();
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c40375..1514758 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
+	flags = IOMMU_READ | IOMMU_WRITE;
+	if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+		flags |= IOMMU_CACHE;
+
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
-				    PAGE_SIZE,
-				    IOMMU_READ | IOMMU_WRITE);
+				    PAGE_SIZE, flags);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
 {
 	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	int r;
+	int r, last_flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
 		return r;
 	}
 
+	last_flags = kvm->arch.iommu_flags;
+	if (iommu_domain_has_cap(kvm->arch.iommu_domain,
+				 IOMMU_CAP_CACHE_COHERENCY))
+		kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+
+	/* Check if need to update IOMMU page table for guest memory */
+	if ((last_flags ^ kvm->arch.iommu_flags) ==
+			KVM_IOMMU_CACHE_COHERENCY) {
+		kvm_iommu_unmap_memslots(kvm);
+		r = kvm_iommu_map_memslots(kvm);
+		if (r)
+			goto out_unmap;
+	}
+
 	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
 
 	return 0;
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
 }
 
 int kvm_deassign_device(struct kvm *kvm,
-- 
1.5.4.5


             reply	other threads:[~2009-04-23  8:43 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-23  8:43 Sheng Yang [this message]
2009-04-24 22:37 ` [PATCH] KVM: Enable snooping control for supported hardware Marcelo Tosatti
2009-04-26 11:12   ` Sheng Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1240476182-26254-1-git-send-email-sheng@linux.intel.com \
    --to=sheng@linux.intel.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.