From: Sheng Yang <sheng@linux.intel.com>
To: Avi Kivity <avi@redhat.com>, Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org, Sheng Yang <sheng@linux.intel.com>
Subject: [PATCH 2/2] KVM: Enable snooping control for supported hardware
Date: Mon, 27 Apr 2009 17:47:45 +0800 [thread overview]
Message-ID: <1240825665-15016-2-git-send-email-sheng@linux.intel.com> (raw)
In-Reply-To: <1240825665-15016-1-git-send-email-sheng@linux.intel.com>
Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of
host memory type then be a potential issue.
Snooping control can guarantee the cache correctness of memory go through the
DMA engine of VT-d.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
arch/x86/include/asm/kvm_host.h | 5 ++++-
arch/x86/kvm/mmu.c | 16 +++++-----------
arch/x86/kvm/svm.c | 4 ++--
arch/x86/kvm/vmx.c | 29 ++++++++++++++++++++++++++---
virt/kvm/iommu.c | 27 ++++++++++++++++++++++++---
5 files changed, 61 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ba6906f..b972889 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -395,6 +395,8 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
+#define KVM_IOMMU_CACHE_COHERENCY 0x1
+ int iommu_flags;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
@@ -523,7 +525,7 @@ struct kvm_x86_ops {
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
- int (*get_mt_mask_shift)(void);
+ u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
};
extern struct kvm_x86_ops *kvm_x86_ops;
@@ -551,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret);
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 55c6923..ea1c2aa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1606,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
return mtrr_state->def_type;
}
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u8 mtrr;
@@ -1616,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
mtrr = MTRR_TYPE_WRBACK;
return mtrr;
}
+EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
@@ -1688,16 +1689,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= shadow_user_mask;
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
- if (tdp_enabled) {
- if (!kvm_is_mmio_pfn(pfn)) {
- mt_mask = get_memory_type(vcpu, gfn) <<
- kvm_x86_ops->get_mt_mask_shift();
- mt_mask |= VMX_EPT_IGMT_BIT;
- } else
- mt_mask = MTRR_TYPE_UNCACHABLE <<
- kvm_x86_ops->get_mt_mask_shift();
- spte |= mt_mask;
- }
+ if (tdp_enabled)
+ spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
+ kvm_is_mmio_pfn(pfn));
spte |= (u64)pfn << PAGE_SHIFT;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 053f3c5..2bbe1de 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2617,7 +2617,7 @@ static int get_npt_level(void)
#endif
}
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
return 0;
}
@@ -2678,7 +2678,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
- .get_mt_mask_shift = svm_get_mt_mask_shift,
+ .get_mt_mask = svm_get_mt_mask,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 85db8b2..db853b6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3678,9 +3678,32 @@ static int get_ept_level(void)
return VMX_EPT_DEFAULT_GAW + 1;
}
-static int vmx_get_mt_mask_shift(void)
+static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
- return VMX_EPT_MT_EPTE_SHIFT;
+ int ret;
+
+ /* For VT-d and EPT combination
+ * 1. MMIO: always map as UC
+ * 2. EPT with VT-d:
+ * a. VT-d with snooping control feature: snooping control feature of
+ * VT-d engine can guarantee the cache correctness. Just set it
+ * to WB to keep consistent with host. So the same as item 3.
+ * b. VT-d without snooping control feature: can't guarantee the
+ * result, try to trust guest.
+ * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+ * consistent with host MTRR
+ */
+ if (is_mmio)
+ ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
+ else if (vcpu->kvm->arch.iommu_domain &&
+ !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
+ ret = kvm_get_guest_memory_type(vcpu, gfn) <<
+ VMX_EPT_MT_EPTE_SHIFT;
+ else
+ ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
+ | VMX_EPT_IGMT_BIT;
+
+ return ret;
}
static struct kvm_x86_ops vmx_x86_ops = {
@@ -3737,7 +3760,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.interrupt_allowed = vmx_interrupt_allowed,
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
- .get_mt_mask_shift = vmx_get_mt_mask_shift,
+ .get_mt_mask = vmx_get_mt_mask,
};
static int __init vmx_init(void)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c40375..1514758 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
pfn_t pfn;
int i, r = 0;
struct iommu_domain *domain = kvm->arch.iommu_domain;
+ int flags;
/* check if iommu exists and in use */
if (!domain)
return 0;
+ flags = IOMMU_READ | IOMMU_WRITE;
+ if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+ flags |= IOMMU_CACHE;
+
for (i = 0; i < npages; i++) {
/* check if already mapped */
if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
r = iommu_map_range(domain,
gfn_to_gpa(gfn),
pfn_to_hpa(pfn),
- PAGE_SIZE,
- IOMMU_READ | IOMMU_WRITE);
+ PAGE_SIZE, flags);
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
{
struct pci_dev *pdev = NULL;
struct iommu_domain *domain = kvm->arch.iommu_domain;
- int r;
+ int r, last_flags;
/* check if iommu exists and in use */
if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
return r;
}
+ last_flags = kvm->arch.iommu_flags;
+ if (iommu_domain_has_cap(kvm->arch.iommu_domain,
+ IOMMU_CAP_CACHE_COHERENCY))
+ kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+
+ /* Check if need to update IOMMU page table for guest memory */
+ if ((last_flags ^ kvm->arch.iommu_flags) ==
+ KVM_IOMMU_CACHE_COHERENCY) {
+ kvm_iommu_unmap_memslots(kvm);
+ r = kvm_iommu_map_memslots(kvm);
+ if (r)
+ goto out_unmap;
+ }
+
printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
return 0;
+out_unmap:
+ kvm_iommu_unmap_memslots(kvm);
+ return r;
}
int kvm_deassign_device(struct kvm *kvm,
--
1.5.4.5
next prev parent reply other threads:[~2009-04-27 9:47 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-04-27 9:47 [PATCH 1/2] KVM: Discard shadow_mt_mask Sheng Yang
2009-04-27 9:47 ` Sheng Yang [this message]
2009-04-27 11:30 ` Sheng Yang
2009-04-27 11:44 ` Sheng Yang
2009-04-27 22:27 ` Marcelo Tosatti
2009-04-28 0:54 ` Sheng Yang
-- strict thread matches above, loose matches on Subject: below --
2009-04-27 12:35 [PATCH 1/2] KVM: Replace get_mt_mask_shift with get_mt_mask Sheng Yang
2009-04-27 12:35 ` [PATCH 2/2] KVM: Enable snooping control for supported hardware Sheng Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1240825665-15016-2-git-send-email-sheng@linux.intel.com \
--to=sheng@linux.intel.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox