public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26  8:55 ` [PATCH 1/2] VT-d: changes to support KVM Amit Shah
@ 2008-08-26  8:55   ` Amit Shah
  2008-08-26 10:28     ` Zhang, Xiantao
  2008-09-03 16:52     ` Amit Shah
  0 siblings, 2 replies; 25+ messages in thread
From: Amit Shah @ 2008-08-26  8:55 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Amit Shah

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  203 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 266 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..4336769
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			  gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			rc = intel_iommu_page_mapping(domain,
+						      gfn_to_gpa(gfn),
+						      pfn_to_hpa(pfn),
+						      PAGE_SIZE,
+						      DMA_PTE_READ |
+						      DMA_PTE_WRITE);
+			if (rc) {
+				kvm_release_pfn_clean(pfn);
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				return rc;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			return 0;
+		}
+
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc) {
+			up_read(&kvm->slots_lock);
+			return rc;
+		}
+	}
+	up_read(&kvm->slots_lock);
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int rc;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "intel iommu not found\n");
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+
+	rc = kvm_iommu_map_memslots(kvm);
+	if (rc)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					 pdev);
+	if (rc) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return rc;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		for_each_pci_dev(pdev) {
+			if ((pdev->bus->number == entry->host_busnr) &&
+			    (pdev->devfn == entry->host_devfn))
+				break;
+		}
+
+		if (pdev == NULL)
+			return -ENODEV;
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain,
+				       pdev->bus->number, pdev->devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bfc7c33..38ab48b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -276,9 +277,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1145,6 +1155,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4264,6 +4277,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 982b6b2..fcc8088 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -513,6 +514,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0309571..191bfe1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out_free;
+
 	return 0;
 
 out_free:
-- 
1.5.4.3


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26  8:55   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
@ 2008-08-26 10:28     ` Zhang, Xiantao
  2008-08-26 10:35       ` Amit Shah
  2008-09-03 16:52     ` Amit Shah
  1 sibling, 1 reply; 25+ messages in thread
From: Zhang, Xiantao @ 2008-08-26 10:28 UTC (permalink / raw)
  To: Amit Shah, avi
  Cc: kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to avoid
future code move.  
Thanks
Xiantao

Amit Shah wrote:
> From: Ben-Ami Yassour <benami@il.ibm.com>
> 
> Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>
> 
> This patch enables PCI device assignment based on VT-d support.
> When a device is assigned to the guest, the guest memory is pinned and
> the mapping is updated in the VT-d IOMMU.
> 
> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
> and also control enable/disable from userspace]
> 
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> ---
>  arch/x86/kvm/Makefile      |    3 +
>  arch/x86/kvm/vtd.c         |  203
>  ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c     
>  |   14 +++ include/asm-x86/kvm_host.h |    3 +
>  include/linux/kvm.h        |    3 +
>  include/linux/kvm_host.h   |   32 +++++++
>  virt/kvm/kvm_main.c        |    9 ++-
>  7 files changed, 266 insertions(+), 1 deletions(-)
>  create mode 100644 arch/x86/kvm/vtd.c
> 
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index d0e940b..3072b17 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
> 
>  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o
>  	lapic.o \ i8254.o
> +ifeq ($(CONFIG_DMAR),y)
> +kvm-objs += vtd.o
> +endif
>  obj-$(CONFIG_KVM) += kvm.o
>  kvm-intel-objs = vmx.o
>  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..4336769
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,203 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it + * under the terms and conditions of the GNU General
> Public License, + * version 2, as published by the Free Software
> Foundation. + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> General Public License for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with + * this program; if not, write to the Free Software
> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA
> 02111-1307 USA. + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Copyright IBM Corporation, 2008
> + * Author: Allen M. Kay <allen.m.kay@intel.com>
> + * Author: Weidong Han <weidong.han@intel.com>
> + * Author: Ben-Ami Yassour <benami@il.ibm.com>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +			  gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	int i, rc;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	for (i = 0; i < npages; i++) {
> +		/* check if already mapped */
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		if (pfn && !is_mmio_pfn(pfn))
> +			continue;
> +
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (!is_mmio_pfn(pfn)) {
> +			rc = intel_iommu_page_mapping(domain,
> +						      gfn_to_gpa(gfn),
> +						      pfn_to_hpa(pfn),
> +						      PAGE_SIZE,
> +						      DMA_PTE_READ |
> +						      DMA_PTE_WRITE);
> +			if (rc) {
> +				kvm_release_pfn_clean(pfn);
> +				printk(KERN_DEBUG "kvm_iommu_map_pages:"
> +				       "iommu failed to map pfn=%lx\n",
pfn);
> +				return rc;
> +			}
> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +			       "invalid pfn=%lx\n", pfn);
> +			return 0;
> +		}
> +
> +		gfn++;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc) {
> +			up_read(&kvm->slots_lock);
> +			return rc;
> +		}
> +	}
> +	up_read(&kvm->slots_lock);
> +	return 0;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +	int rc;
> +
> +	if (!intel_iommu_found()) {
> +		printk(KERN_ERR "intel iommu not found\n");
> +		return -ENODEV;
> +	}
> +
> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> +	       assigned_dev->host_busnr,
> +	       PCI_SLOT(assigned_dev->host_devfn),
> +	       PCI_FUNC(assigned_dev->host_devfn));
> +
> +	pdev = assigned_dev->dev;
> +
> +	if (pdev == NULL) {
> +		if (kvm->arch.intel_iommu_domain) {
> +
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> +			kvm->arch.intel_iommu_domain = NULL;
> +		}
> +		return -ENODEV;
> +	}
> +
> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> +
> +	rc = kvm_iommu_map_memslots(kvm);
> +	if (rc)
> +		goto out_unmap;
> +
> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> +			       pdev->bus->number, pdev->devfn);
> +
> +	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> +					 pdev);
> +	if (rc) {
> +		printk(KERN_ERR "Domain context map for %s failed",
> +		       pci_name(pdev));
> +		goto out_unmap;
> +	}
> +	return 0;
> +
> +out_unmap:
> +	kvm_iommu_unmap_memslots(kvm);
> +	return rc;
> +}
> +
> +static void kvm_iommu_put_pages(struct kvm *kvm,
> +			       gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +	int i;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		kvm_release_pfn_clean(pfn);
> +		gfn++;
> +	}
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i;
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +				    kvm->memslots[i].npages);
> +	}
> +	up_read(&kvm->slots_lock);
> +
> +	return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	struct kvm_assigned_dev_kernel *entry;
> +	struct pci_dev *pdev = NULL;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
> +		       entry->host_busnr,
> +		       PCI_SLOT(entry->host_devfn),
> +		       PCI_FUNC(entry->host_devfn));
> +
> +		for_each_pci_dev(pdev) {
> +			if ((pdev->bus->number == entry->host_busnr) &&
> +			    (pdev->devfn == entry->host_devfn))
> +				break;
> +		}
> +
> +		if (pdev == NULL)
> +			return -ENODEV;
> +
> +		/* detach kvm dmar domain */
> +		intel_iommu_detach_dev(domain,
> +				       pdev->bus->number, pdev->devfn);
> +	}
> +	kvm_iommu_unmap_memslots(kvm);
> +	intel_iommu_domain_exit(domain);
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bfc7c33..38ab48b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -35,6 +35,7 @@
>  #include <linux/module.h>
>  #include <linux/mman.h>
>  #include <linux/highmem.h>
> +#include <linux/intel-iommu.h>
> 
>  #include <asm/uaccess.h>
>  #include <asm/msr.h>
> @@ -276,9 +277,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm
> *kvm, 
> 
>  	list_add(&match->list, &kvm->arch.assigned_dev_head);
> 
> +	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
> +		r = kvm_iommu_map_guest(kvm, match);
> +		if (r)
> +			goto out_list_del;
> +	}
> +
>  out:
>  	mutex_unlock(&kvm->lock);
>  	return r;
> +out_list_del:
> +	list_del(&match->list);
> +	pci_release_regions(dev);
>  out_disable:
>  	pci_disable_device(dev);
>  out_put:
> @@ -1145,6 +1155,9 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_PV_MMU:
>  		r = !tdp_enabled;
>  		break;
> +	case KVM_CAP_IOMMU:
> +		r = intel_iommu_found();
> +		break;
>  	default:
>  		r = 0;
>  		break;
> @@ -4264,6 +4277,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
> 
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +	kvm_iommu_unmap_guest(kvm);
>  	kvm_free_assigned_devices(kvm);
>  	kvm_free_pit(kvm);
>  	kfree(kvm->arch.vpic);
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 982b6b2..fcc8088 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -364,6 +364,7 @@ struct kvm_arch{
>  	 */
>  	struct list_head active_mmu_pages;
>  	struct list_head assigned_dev_head;
> +	struct dmar_domain *intel_iommu_domain;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> @@ -513,6 +514,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu,
>  gpa_t gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long
>  		  bytes, gpa_t addr, unsigned long *ret);
> 
> +int is_mmio_pfn(pfn_t pfn);
> +
>  extern bool tdp_enabled;
> 
>  enum emulation_result {
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ef4bc6f..4269be1 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -384,6 +384,7 @@ struct kvm_trace_rec {
>  #define KVM_CAP_COALESCED_MMIO 15
>  #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected
>  in guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17
> +#define KVM_CAP_IOMMU 18
> 
>  /*
>   * ioctls for VM fds
> @@ -495,4 +496,6 @@ struct kvm_assigned_irq {
>  	__u32 flags;
>  };
> 
> +#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> +
>  #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a18aaad..b703890 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
> 
> +#ifdef CONFIG_DMAR
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +			unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +#else /* CONFIG_DMAR */
> +static inline int kvm_iommu_map_pages(struct kvm *kvm,
> +				      gfn_t base_gfn,
> +				      unsigned long npages)
> +{
> +	return 0;
> +}
> +
> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
> +				      struct kvm_assigned_dev_kernel
> +				      *assigned_dev)
> +{
> +	return -ENODEV;
> +}
> +
> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_DMAR */
> +
>  static inline void kvm_guest_enter(void)
>  {
>  	account_system_vtime(current);
> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>  	return (gpa_t)gfn << PAGE_SHIFT;
>  }
> 
> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
> +{
> +	return (hpa_t)pfn << PAGE_SHIFT;
> +}
> +
>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
>  	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0309571..191bfe1 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -41,6 +41,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/mman.h>
>  #include <linux/swap.h>
> +#include <linux/intel-iommu.h>
> 
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>  	return likely(n >= 0 && n < KVM_MAX_VCPUS);
>  }
> 
> -static inline int is_mmio_pfn(pfn_t pfn)
> +inline int is_mmio_pfn(pfn_t pfn)
>  {
>  	if (pfn_valid(pfn))
>  		return PageReserved(pfn_to_page(pfn));
> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  	}
> 
>  	kvm_free_physmem_slot(&old, &new);
> +
> +	/* map the pages in iommu page table */
> +	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
> +	if (r)
> +		goto out_free;
> +
>  	return 0;
> 
>  out_free:
> --
> 1.5.4.3


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:28     ` Zhang, Xiantao
@ 2008-08-26 10:35       ` Amit Shah
  2008-08-26 10:42         ` Zhang, Xiantao
  0 siblings, 1 reply; 25+ messages in thread
From: Amit Shah @ 2008-08-26 10:35 UTC (permalink / raw)
  To: Zhang, Xiantao
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

* On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to avoid
> future code move.

As of now, device assignment resides inside the x86 directory and is only 
tested in x86 environment. Once we support ia64, we'll have a lot of files 
moving anyway.

However, I don't have any preference for the location of vtd.c; depends on 
what Avi thinks.

Amit

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:35       ` Amit Shah
@ 2008-08-26 10:42         ` Zhang, Xiantao
  2008-08-26 10:57           ` Amit Shah
  2008-08-26 14:41           ` Avi Kivity
  0 siblings, 2 replies; 25+ messages in thread
From: Zhang, Xiantao @ 2008-08-26 10:42 UTC (permalink / raw)
  To: Amit Shah
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

Amit Shah wrote:
> * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
>> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
>> avoid future code move.
> 
> As of now, device assignment resides inside the x86 directory and is
> only tested in x86 environment. Once we support ia64, we'll have a
> lot of files moving anyway.

Just a suggestion.  Even if put it @ virt/kvm, we still can make it only
compiled with x86 before eanbling it for ia64. :)
Have you considered the cross-arch support in the current code?

> However, I don't have any preference for the location of vtd.c;
> depends on what Avi thinks.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:42         ` Zhang, Xiantao
@ 2008-08-26 10:57           ` Amit Shah
  2008-08-26 11:04             ` Zhang, Xiantao
  2008-08-26 14:41           ` Avi Kivity
  1 sibling, 1 reply; 25+ messages in thread
From: Amit Shah @ 2008-08-26 10:57 UTC (permalink / raw)
  To: Zhang, Xiantao
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

* On Tuesday 26 Aug 2008 16:12:56 Zhang, Xiantao wrote:
> Amit Shah wrote:
> > * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
> >> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
> >> avoid future code move.
> >
> > As of now, device assignment resides inside the x86 directory and is
> > only tested in x86 environment. Once we support ia64, we'll have a
> > lot of files moving anyway.
>
> Just a suggestion.  Even if put it @ virt/kvm, we still can make it only
> compiled with x86 before eanbling it for ia64. :)

Sure; I'm fine with that :-)

> Have you considered the cross-arch support in the current code?

I've not, since I don't know much about ia64, but Allen had mentioned that it 
shouldn't take much effort to port the current code to ia64.

Amit

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:57           ` Amit Shah
@ 2008-08-26 11:04             ` Zhang, Xiantao
  0 siblings, 0 replies; 25+ messages in thread
From: Zhang, Xiantao @ 2008-08-26 11:04 UTC (permalink / raw)
  To: Amit Shah
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

Amit Shah wrote:
> * On Tuesday 26 Aug 2008 16:12:56 Zhang, Xiantao wrote:
>> Amit Shah wrote:
>>> * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
>>>> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
>>>> avoid future code move.
>>> 
>>> As of now, device assignment resides inside the x86 directory and is
>>> only tested in x86 environment. Once we support ia64, we'll have a
>>> lot of files moving anyway.
>> 
>> Just a suggestion.  Even if put it @ virt/kvm, we still can make it
>> only compiled with x86 before eanbling it for ia64. :)
> 
> Sure; I'm fine with that :-)

>> Have you considered the cross-arch support in the current code?
> 
> I've not, since I don't know much about ia64, but Allen had mentioned
> that it shouldn't take much effort to port the current code to ia64.

Okay. Thanks for your explanation! 
Xiantao


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:42         ` Zhang, Xiantao
  2008-08-26 10:57           ` Amit Shah
@ 2008-08-26 14:41           ` Avi Kivity
  2008-08-26 15:09             ` Han, Weidong
  1 sibling, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2008-08-26 14:41 UTC (permalink / raw)
  To: Zhang, Xiantao
  Cc: Amit Shah, kvm, muli, anthony, jbarnes, Woodhouse, David,
	Gross, Mark, benami, Han, Weidong, Kay, Allen M

Zhang, Xiantao wrote:
> Amit Shah wrote:
>   
>> * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
>>     
>>> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
>>> avoid future code move.
>>>       
>> As of now, device assignment resides inside the x86 directory and is
>> only tested in x86 environment. Once we support ia64, we'll have a
>> lot of files moving anyway.
>>     
>
> Just a suggestion.  Even if put it @ virt/kvm, we still can make it only
>   

I'm fine with keeping it in x86 and moving it later, since the code is 
late already.  However if someone is willing to do the work to move it 
to virt/kvm/, I'm happy with that as well.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 14:41           ` Avi Kivity
@ 2008-08-26 15:09             ` Han, Weidong
  0 siblings, 0 replies; 25+ messages in thread
From: Han, Weidong @ 2008-08-26 15:09 UTC (permalink / raw)
  To: Avi Kivity, Zhang, Xiantao
  Cc: Amit Shah, kvm, muli, anthony, jbarnes, Woodhouse, David,
	Gross, Mark, benami, Kay, Allen M

Avi Kivity wrote:
> Zhang, Xiantao wrote:
>> Amit Shah wrote:
>> 
>>> * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
>>> 
>>>> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
>>>> avoid future code move. 
>>>> 
>>> As of now, device assignment resides inside the x86 directory and is
>>> only tested in x86 environment. Once we support ia64, we'll have a
>>> lot of files moving anyway. 
>>> 
>> 
>> Just a suggestion.  Even if put it @ virt/kvm, we still can make it
>> only 
>> 
> 
> I'm fine with keeping it in x86 and moving it later, since the code is
> late already.  However if someone is willing to do the work to move it
> to virt/kvm/, I'm happy with that as well.

I think we'd better keep it in x86 at this moment. Making VT-d code
arch-independent is our goal. Moving that file is not enough, let's do
it cleanly after merging VT-d patches into upstream.

Randy (Weidong)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26  8:55   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
  2008-08-26 10:28     ` Zhang, Xiantao
@ 2008-09-03 16:52     ` Amit Shah
  2008-09-09  7:18       ` Han, Weidong
  1 sibling, 1 reply; 25+ messages in thread
From: Amit Shah @ 2008-09-03 16:52 UTC (permalink / raw)
  To: Ben-Ami Yassour1
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Avi Kivity

There are a couple of things here that might need some error handling:

* On Tuesday 26 August 2008 14:25:35 Amit Shah wrote:
> From: Ben-Ami Yassour <benami@il.ibm.com>
>
> Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>
>
> This patch enables PCI device assignment based on VT-d support.
> When a device is assigned to the guest, the guest memory is pinned and
> the mapping is updated in the VT-d IOMMU.
>
> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
> and also control enable/disable from userspace]
>
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>


> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +			  gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	int i, rc;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	for (i = 0; i < npages; i++) {
> +		/* check if already mapped */
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		if (pfn && !is_mmio_pfn(pfn))
> +			continue;
> +
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (!is_mmio_pfn(pfn)) {
> +			rc = intel_iommu_page_mapping(domain,
> +						      gfn_to_gpa(gfn),
> +						      pfn_to_hpa(pfn),
> +						      PAGE_SIZE,
> +						      DMA_PTE_READ |
> +						      DMA_PTE_WRITE);
> +			if (rc) {
> +				kvm_release_pfn_clean(pfn);
> +				printk(KERN_DEBUG "kvm_iommu_map_pages:"
> +				       "iommu failed to map pfn=%lx\n", pfn);
> +				return rc;
> +			}
> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +			       "invalid pfn=%lx\n", pfn);
> +			return 0;
> +		}

In the error case, this function should itself call unmap_pages so that either 
all pages are mapped or none are. Also makes it easier to bail out in the two 
places this function gets called.

> +
> +		gfn++;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc) {
> +			up_read(&kvm->slots_lock);
> +			return rc;
> +		}
> +	}
> +	up_read(&kvm->slots_lock);
> +	return 0;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +	int rc;
> +
> +	if (!intel_iommu_found()) {
> +		printk(KERN_ERR "intel iommu not found\n");
> +		return -ENODEV;
> +	}
> +
> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> +	       assigned_dev->host_busnr,
> +	       PCI_SLOT(assigned_dev->host_devfn),
> +	       PCI_FUNC(assigned_dev->host_devfn));
> +
> +	pdev = assigned_dev->dev;
> +
> +	if (pdev == NULL) {
> +		if (kvm->arch.intel_iommu_domain) {
> +			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> +			kvm->arch.intel_iommu_domain = NULL;
> +		}
> +		return -ENODEV;
> +	}
> +
> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);

check if we really got the domain

> +
> +	rc = kvm_iommu_map_memslots(kvm);
> +	if (rc)
> +		goto out_unmap;
> +
> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> +			       pdev->bus->number, pdev->devfn);
> +
> +	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> +					 pdev);

This function name (as Mark points out) doesn't make much sense; can this be 
changed?

> +	if (rc) {
> +		printk(KERN_ERR "Domain context map for %s failed",
> +		       pci_name(pdev));
> +		goto out_unmap;
> +	}
> +	return 0;
> +
> +out_unmap:
> +	kvm_iommu_unmap_memslots(kvm);
> +	return rc;
> +}
> +
> +static void kvm_iommu_put_pages(struct kvm *kvm,
> +			       gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +	int i;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		kvm_release_pfn_clean(pfn);
> +		gfn++;
> +	}
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i;
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +				    kvm->memslots[i].npages);
> +	}
> +	up_read(&kvm->slots_lock);
> +
> +	return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	struct kvm_assigned_dev_kernel *entry;
> +	struct pci_dev *pdev = NULL;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
> +		       entry->host_busnr,
> +		       PCI_SLOT(entry->host_devfn),
> +		       PCI_FUNC(entry->host_devfn));
> +
> +		for_each_pci_dev(pdev) {
> +			if ((pdev->bus->number == entry->host_busnr) &&
> +			    (pdev->devfn == entry->host_devfn))
> +				break;
> +		}

We store the PCI dev in entry->dev; no need to scan this entire list.

> +
> +		if (pdev == NULL)
> +			return -ENODEV;
> +
> +		/* detach kvm dmar domain */
> +		intel_iommu_detach_dev(domain,
> +				       pdev->bus->number, pdev->devfn);
> +	}
> +	kvm_iommu_unmap_memslots(kvm);
> +	intel_iommu_domain_exit(domain);
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bfc7c33..38ab48b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -35,6 +35,7 @@
>  #include <linux/module.h>
>  #include <linux/mman.h>
>  #include <linux/highmem.h>
> +#include <linux/intel-iommu.h>
>
>  #include <asm/uaccess.h>
>  #include <asm/msr.h>
> @@ -276,9 +277,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>
>  	list_add(&match->list, &kvm->arch.assigned_dev_head);
>
> +	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
> +		r = kvm_iommu_map_guest(kvm, match);
> +		if (r)
> +			goto out_list_del;
> +	}
> +
>  out:
>  	mutex_unlock(&kvm->lock);
>  	return r;
> +out_list_del:
> +	list_del(&match->list);
> +	pci_release_regions(dev);
>  out_disable:
>  	pci_disable_device(dev);
>  out_put:
> @@ -1145,6 +1155,9 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_PV_MMU:
>  		r = !tdp_enabled;
>  		break;
> +	case KVM_CAP_IOMMU:
> +		r = intel_iommu_found();
> +		break;
>  	default:
>  		r = 0;
>  		break;
> @@ -4264,6 +4277,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
>
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +	kvm_iommu_unmap_guest(kvm);
>  	kvm_free_assigned_devices(kvm);
>  	kvm_free_pit(kvm);
>  	kfree(kvm->arch.vpic);
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 982b6b2..fcc8088 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -364,6 +364,7 @@ struct kvm_arch{
>  	 */
>  	struct list_head active_mmu_pages;
>  	struct list_head assigned_dev_head;
> +	struct dmar_domain *intel_iommu_domain;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> @@ -513,6 +514,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t
> gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
>  		  gpa_t addr, unsigned long *ret);
>
> +int is_mmio_pfn(pfn_t pfn);
> +
>  extern bool tdp_enabled;
>
>  enum emulation_result {
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ef4bc6f..4269be1 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -384,6 +384,7 @@ struct kvm_trace_rec {
>  #define KVM_CAP_COALESCED_MMIO 15
>  #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in
> guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17
> +#define KVM_CAP_IOMMU 18
>
>  /*
>   * ioctls for VM fds
> @@ -495,4 +496,6 @@ struct kvm_assigned_irq {
>  	__u32 flags;
>  };
>
> +#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> +
>  #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a18aaad..b703890 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>
> +#ifdef CONFIG_DMAR
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +			unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +#else /* CONFIG_DMAR */
> +static inline int kvm_iommu_map_pages(struct kvm *kvm,
> +				      gfn_t base_gfn,
> +				      unsigned long npages)
> +{
> +	return 0;
> +}
> +
> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
> +				      struct kvm_assigned_dev_kernel
> +				      *assigned_dev)
> +{
> +	return -ENODEV;
> +}
> +
> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_DMAR */
> +
>  static inline void kvm_guest_enter(void)
>  {
>  	account_system_vtime(current);
> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>  	return (gpa_t)gfn << PAGE_SHIFT;
>  }
>
> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
> +{
> +	return (hpa_t)pfn << PAGE_SHIFT;
> +}
> +

This can be a separate patch.

>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
>  	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0309571..191bfe1 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -41,6 +41,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/mman.h>
>  #include <linux/swap.h>
> +#include <linux/intel-iommu.h>
>
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>  	return likely(n >= 0 && n < KVM_MAX_VCPUS);
>  }
>
> -static inline int is_mmio_pfn(pfn_t pfn)
> +inline int is_mmio_pfn(pfn_t pfn)
>  {
>  	if (pfn_valid(pfn))
>  		return PageReserved(pfn_to_page(pfn));
> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  	}
>
>  	kvm_free_physmem_slot(&old, &new);
> +
> +	/* map the pages in iommu page table */
> +	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
> +	if (r)
> +		goto out_free;

Doing the unmapping in the map function will mean we don't have to check for 
error return values here.

> +
>  	return 0;
>
>  out_free:



^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-03 16:52     ` Amit Shah
@ 2008-09-09  7:18       ` Han, Weidong
  0 siblings, 0 replies; 25+ messages in thread
From: Han, Weidong @ 2008-09-09  7:18 UTC (permalink / raw)
  To: Amit Shah, Ben-Ami Yassour1
  Cc: kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M, Avi Kivity

Amit Shah wrote:
> There are a couple of things here that might need some error handling:
> 
> * On Tuesday 26 August 2008 14:25:35 Amit Shah wrote:
>> From: Ben-Ami Yassour <benami@il.ibm.com>
>> 
>> Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>
>> 
>> This patch enables PCI device assignment based on VT-d support.
>> When a device is assigned to the guest, the guest memory is pinned
>> and 
>> the mapping is updated in the VT-d IOMMU.
>> 
>> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
>> and also control enable/disable from userspace]
>> 
>> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
>> Signed-off-by: Weidong Han <weidong.han@intel.com>
>> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
>> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> 
> 
>> +#include <linux/list.h>
>> +#include <linux/kvm_host.h>
>> +#include <linux/pci.h>
>> +#include <linux/dmar.h>
>> +#include <linux/intel-iommu.h>
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +
>> +int kvm_iommu_map_pages(struct kvm *kvm,
>> +			  gfn_t base_gfn, unsigned long npages)
>> +{
>> +	gfn_t gfn = base_gfn;
>> +	pfn_t pfn;
>> +	int i, rc;
>> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
>> +	/* check if iommu exists and in use */
>> +	if (!domain)
>> +		return 0;
>> +
>> +	for (i = 0; i < npages; i++) {
>> +		/* check if already mapped */
>> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
>> +						     gfn_to_gpa(gfn));
>> +		if (pfn && !is_mmio_pfn(pfn))
>> +			continue;
>> +
>> +		pfn = gfn_to_pfn(kvm, gfn);
>> +		if (!is_mmio_pfn(pfn)) {
>> +			rc = intel_iommu_page_mapping(domain,
>> +						      gfn_to_gpa(gfn),
>> +						      pfn_to_hpa(pfn),
>> +						      PAGE_SIZE,
>> +						      DMA_PTE_READ |
>> +						      DMA_PTE_WRITE);
>> +			if (rc) {
>> +				kvm_release_pfn_clean(pfn);
>> +				printk(KERN_DEBUG "kvm_iommu_map_pages:"
>> +				       "iommu failed to map pfn=%lx\n",
pfn);
>> +				return rc;
>> +			}
>> +		} else {
>> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
>> +			       "invalid pfn=%lx\n", pfn);
>> +			return 0;
>> +		}
> 
> In the error case, this function should itself call unmap_pages so
> that either all pages are mapped or none are. Also makes it easier to
> bail out in the two places this function gets called.
> 

Good catch. Will fix it.

>> +
>> +		gfn++;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int kvm_iommu_map_memslots(struct kvm *kvm) +{
>> +	int i, rc;
>> +
>> +	down_read(&kvm->slots_lock);
>> +	for (i = 0; i < kvm->nmemslots; i++) {
>> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
>> +					 kvm->memslots[i].npages);
>> +		if (rc) {
>> +			up_read(&kvm->slots_lock);
>> +			return rc;
>> +		}
>> +	}
>> +	up_read(&kvm->slots_lock);
>> +	return 0;
>> +}
>> +
>> +int kvm_iommu_map_guest(struct kvm *kvm,
>> +			struct kvm_assigned_dev_kernel *assigned_dev)
>> +{
>> +	struct pci_dev *pdev = NULL;
>> +	int rc;
>> +
>> +	if (!intel_iommu_found()) {
>> +		printk(KERN_ERR "intel iommu not found\n");
>> +		return -ENODEV;
>> +	}
>> +
>> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
>> +	       assigned_dev->host_busnr,
>> +	       PCI_SLOT(assigned_dev->host_devfn),
>> +	       PCI_FUNC(assigned_dev->host_devfn));
>> +
>> +	pdev = assigned_dev->dev;
>> +
>> +	if (pdev == NULL) {
>> +		if (kvm->arch.intel_iommu_domain) {
>> +
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
>> +			kvm->arch.intel_iommu_domain = NULL;
>> +		}
>> +		return -ENODEV;
>> +	}
>> +
>> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> 
> check if we really got the domain

yes, need a check here.

> 
>> +
>> +	rc = kvm_iommu_map_memslots(kvm);
>> +	if (rc)
>> +		goto out_unmap;
>> +
>> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
>> +			       pdev->bus->number, pdev->devfn);
>> +
>> +	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
>> +					 pdev);
> 
> This function name (as Mark points out) doesn't make much sense; can
> this be changed?

This function name keeps consistent with original name in vtd driver. do
you want to add a verb in the name? 

> 
>> +	if (rc) {
>> +		printk(KERN_ERR "Domain context map for %s failed", +

>> pci_name(pdev)); +		goto out_unmap;
>> +	}
>> +	return 0;
>> +
>> +out_unmap:
>> +	kvm_iommu_unmap_memslots(kvm);
>> +	return rc;
>> +}
>> +
>> +static void kvm_iommu_put_pages(struct kvm *kvm,
>> +			       gfn_t base_gfn, unsigned long npages)
>> +{
>> +	gfn_t gfn = base_gfn;
>> +	pfn_t pfn;
>> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
int i;
>> +
>> +	for (i = 0; i < npages; i++) {
>> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
>> +						     gfn_to_gpa(gfn));
>> +		kvm_release_pfn_clean(pfn);
>> +		gfn++;
>> +	}
>> +}
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{
>> +	int i;
>> +	down_read(&kvm->slots_lock);
>> +	for (i = 0; i < kvm->nmemslots; i++) {
>> +		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
>> +				    kvm->memslots[i].npages);
>> +	}
>> +	up_read(&kvm->slots_lock);
>> +
>> +	return 0;
>> +}
>> +
>> +int kvm_iommu_unmap_guest(struct kvm *kvm)
>> +{
>> +	struct kvm_assigned_dev_kernel *entry;
>> +	struct pci_dev *pdev = NULL;
>> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
>> +	/* check if iommu exists and in use */
>> +	if (!domain)
>> +		return 0;
>> +
>> +	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
>> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", +

>> entry->host_busnr, +		       PCI_SLOT(entry->host_devfn),
>> +		       PCI_FUNC(entry->host_devfn));
>> +
>> +		for_each_pci_dev(pdev) {
>> +			if ((pdev->bus->number == entry->host_busnr) &&
>> +			    (pdev->devfn == entry->host_devfn))
>> +				break;
>> +		}
> 
> We store the PCI dev in entry->dev; no need to scan this entire list.

yes, it's not neccessary.

> 
>> +
>> +		if (pdev == NULL)
>> +			return -ENODEV;
>> +
>> +		/* detach kvm dmar domain */
>> +		intel_iommu_detach_dev(domain,
>> +				       pdev->bus->number, pdev->devfn);
>> +	}
>> +	kvm_iommu_unmap_memslots(kvm);
>> +	intel_iommu_domain_exit(domain);
>> +	return 0;
>> +}
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index a18aaad..b703890 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>> 
>> +#ifdef CONFIG_DMAR
>> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
>> +			unsigned long npages); +int
kvm_iommu_map_guest(struct kvm *kvm,
>> +			struct kvm_assigned_dev_kernel *assigned_dev);
>> +int kvm_iommu_unmap_guest(struct kvm *kvm);
>> +#else /* CONFIG_DMAR */
>> +static inline int kvm_iommu_map_pages(struct kvm *kvm, +

>> gfn_t base_gfn, +				      unsigned long
npages)
>> +{
>> +	return 0;
>> +}
>> +
>> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
>> +				      struct kvm_assigned_dev_kernel
>> +				      *assigned_dev)
>> +{
>> +	return -ENODEV;
>> +}
>> +
>> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{
>> +	return 0;
>> +}
>> +#endif /* CONFIG_DMAR */
>> +
>>  static inline void kvm_guest_enter(void)
>>  {
>>  	account_system_vtime(current);
>> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>>  	return (gpa_t)gfn << PAGE_SHIFT;
>>  }
>> 
>> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
>> +{
>> +	return (hpa_t)pfn << PAGE_SHIFT;
>> +}
>> +
> 
> This can be a separate patch.
> 
>>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)  {
>>  	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 0309571..191bfe1 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -41,6 +41,7 @@
>>  #include <linux/pagemap.h>
>>  #include <linux/mman.h>
>>  #include <linux/swap.h>
>> +#include <linux/intel-iommu.h>
>> 
>>  #include <asm/processor.h>
>>  #include <asm/io.h>
>> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>>  	return likely(n >= 0 && n < KVM_MAX_VCPUS);
>>  }
>> 
>> -static inline int is_mmio_pfn(pfn_t pfn)
>> +inline int is_mmio_pfn(pfn_t pfn)
>>  {
>>  	if (pfn_valid(pfn))
>>  		return PageReserved(pfn_to_page(pfn));
>> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
>> 
>>  	kvm_free_physmem_slot(&old, &new);
>> +
>> +	/* map the pages in iommu page table */
>> +	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
>> +	if (r)
>> +		goto out_free;
> 
> Doing the unmapping in the map function will mean we don't have to
> check for error return values here.
> 
>> +
>>  	return 0;
>> 
>>  out_free:


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 2/2] KVM: Device Assignment with VT-d
@ 2008-09-09 13:51 Han, Weidong
  2008-09-09 14:39 ` Amit Shah
  0 siblings, 1 reply; 25+ messages in thread
From: Han, Weidong @ 2008-09-09 13:51 UTC (permalink / raw)
  To: avi
  Cc: kvm, Amit Shah, muli, anthony, jbarnes, Woodhouse, David,
	Gross, Mark, benami, Kay, Allen M, Yang, Sheng, mgross

[-- Attachment #1: Type: text/plain, Size: 11849 bytes --]

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  201
++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 264 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o
lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..d80f117
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			  gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			rc = intel_iommu_page_mapping(domain,
+						      gfn_to_gpa(gfn),
+						      pfn_to_hpa(pfn),
+						      PAGE_SIZE,
+						      DMA_PTE_READ |
+						      DMA_PTE_WRITE);
+			if (rc) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n",
pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return rc;
+	
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc) {
+			up_read(&kvm->slots_lock);
+			return rc;
+		}
+	}
+	up_read(&kvm->slots_lock);
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int rc;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "intel iommu not found\n");
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	rc = kvm_iommu_map_memslots(kvm);
+	if (rc)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					 pdev);
+	if (rc) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return rc;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm
*kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..addd874 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t
gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in
guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.5.1

[-- Attachment #2: 0002-Enable-pci-device-assignment-based-on-VT-d-support.patch --]
[-- Type: application/octet-stream, Size: 11415 bytes --]

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  201 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 264 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..d80f117
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			  gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			rc = intel_iommu_page_mapping(domain,
+						      gfn_to_gpa(gfn),
+						      pfn_to_hpa(pfn),
+						      PAGE_SIZE,
+						      DMA_PTE_READ |
+						      DMA_PTE_WRITE);
+			if (rc) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return rc;
+	
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc) {
+			up_read(&kvm->slots_lock);
+			return rc;
+		}
+	}
+	up_read(&kvm->slots_lock);
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int rc;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "intel iommu not found\n");
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	rc = kvm_iommu_map_memslots(kvm);
+	if (rc)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					 pdev);
+	if (rc) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return rc;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..addd874 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.5.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 13:51 Han, Weidong
@ 2008-09-09 14:39 ` Amit Shah
  2008-09-09 15:05   ` Han, Weidong
  0 siblings, 1 reply; 25+ messages in thread
From: Amit Shah @ 2008-09-09 14:39 UTC (permalink / raw)
  To: Han, Weidong
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M, Yang, Sheng, mgross

* On Tuesday 09 September 2008 19:21:20 Han, Weidong wrote:

> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc) {
> +			up_read(&kvm->slots_lock);
> +			return rc;
> +		}
> +	}
> +	up_read(&kvm->slots_lock);
> +	return 0;
> +}

I simplified this to:

static int kvm_iommu_map_memslots(struct kvm *kvm)
{
	int i, r;

	down_read(&kvm->slots_lock);
	for (i = 0; i < kvm->nmemslots; i++) {
		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
					kvm->memslots[i].npages);
		if (r)
			break;
	}
	up_read(&kvm->slots_lock);
	return r;
}

Also cleaned up some whitespace.

I'll send out the patchset soon.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 14:44 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
@ 2008-09-09 14:44   ` Amit Shah
  0 siblings, 0 replies; 25+ messages in thread
From: Amit Shah @ 2008-09-09 14:44 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Amit Shah

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  198 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 261 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..8660b2a
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, r;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			r = intel_iommu_page_mapping(domain,
+						     gfn_to_gpa(gfn),
+						     pfn_to_hpa(pfn),
+						     PAGE_SIZE,
+						     DMA_PTE_READ |
+						     DMA_PTE_WRITE);
+			if (r) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return rc;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, r;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					kvm->memslots[i].npages);
+		if (r)
+			break;
+	}
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev);
+	if (r) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return rc;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..addd874 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.6.0.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 14:39 ` Amit Shah
@ 2008-09-09 15:05   ` Han, Weidong
  0 siblings, 0 replies; 25+ messages in thread
From: Han, Weidong @ 2008-09-09 15:05 UTC (permalink / raw)
  To: Amit Shah
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M, Yang, Sheng, mgross

Amit, 

Thanks for your quick fixing.

Randy (Weidong)

Amit Shah wrote:
> * On Tuesday 09 September 2008 19:21:20 Han, Weidong wrote:
> 
>> +static int kvm_iommu_map_memslots(struct kvm *kvm) +{
>> +	int i, rc;
>> +
>> +	down_read(&kvm->slots_lock);
>> +	for (i = 0; i < kvm->nmemslots; i++) {
>> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
>> +					 kvm->memslots[i].npages);
>> +		if (rc) {
>> +			up_read(&kvm->slots_lock);
>> +			return rc;
>> +		}
>> +	}
>> +	up_read(&kvm->slots_lock);
>> +	return 0;
>> +}
> 
> I simplified this to:
> 
> static int kvm_iommu_map_memslots(struct kvm *kvm)
> {
> 	int i, r;
> 
> 	down_read(&kvm->slots_lock);
> 	for (i = 0; i < kvm->nmemslots; i++) {
> 		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> 					kvm->memslots[i].npages);
> 		if (r)
> 			break;
> 	}
> 	up_read(&kvm->slots_lock);
> 	return r;
> }
> 
> Also cleaned up some whitespace.
> 
> I'll send out the patchset soon.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* VT-d support for device assignment
@ 2008-09-09 15:37 Amit Shah
  2008-09-09 15:37 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
  2008-09-14  0:46 ` VT-d support for device assignment Avi Kivity
  0 siblings, 2 replies; 25+ messages in thread
From: Amit Shah @ 2008-09-09 15:37 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay


Sorry for the resends; this one fixes two compile errors introduced by me and a warning.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/2] VT-d: Changes to support KVM
  2008-09-09 15:37 VT-d support for device assignment Amit Shah
@ 2008-09-09 15:37 ` Amit Shah
  2008-09-09 15:37   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
  2008-09-10 15:53   ` [PATCH 1/2] VT-d: Changes to support KVM Avi Kivity
  2008-09-14  0:46 ` VT-d support for device assignment Avi Kivity
  1 sibling, 2 replies; 25+ messages in thread
From: Amit Shah @ 2008-09-09 15:37 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Amit Shah

From: Kay, Allen M <allen.m.kay@intel.com>

This patch extends the VT-d driver to support KVM

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 drivers/pci/dmar.c          |    4 +-
 drivers/pci/intel-iommu.c   |  116 ++++++++++++++-
 drivers/pci/intel-iommu.h   |  344 -----------------------------------------
 drivers/pci/iova.c          |    2 +-
 drivers/pci/iova.h          |   52 -------
 include/linux/intel-iommu.h |  355 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/iova.h        |   52 +++++++
 7 files changed, 522 insertions(+), 403 deletions(-)
 delete mode 100644 drivers/pci/intel-iommu.h
 delete mode 100644 drivers/pci/iova.h
 create mode 100644 include/linux/intel-iommu.h
 create mode 100644 include/linux/iova.h

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae..1df28ea 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -26,8 +26,8 @@
 
 #include <linux/pci.h>
 #include <linux/dmar.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8d0e60a..3175a4c 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -33,8 +33,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
 #include <linux/timer.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -160,7 +160,7 @@ static inline void *alloc_domain_mem(void)
 	return iommu_kmem_cache_alloc(iommu_domain_cache);
 }
 
-static inline void free_domain_mem(void *vaddr)
+static void free_domain_mem(void *vaddr)
 {
 	kmem_cache_free(iommu_domain_cache, vaddr);
 }
@@ -1414,7 +1414,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
  * find_domain
  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
  */
-struct dmar_domain *
+static struct dmar_domain *
 find_domain(struct pci_dev *pdev)
 {
 	struct device_domain_info *info;
@@ -2430,3 +2430,111 @@ int __init intel_iommu_init(void)
 	return 0;
 }
 
+void intel_iommu_domain_exit(struct dmar_domain *domain)
+{
+	u64 end;
+
+	/* Domain 0 is reserved, so dont process it */
+	if (!domain)
+		return;
+
+	end = DOMAIN_MAX_ADDR(domain->gaw);
+	end = end & (~PAGE_MASK_4K);
+
+	/* clear ptes */
+	dma_pte_clear_range(domain, 0, end);
+
+	/* free page tables */
+	dma_pte_free_pagetable(domain, 0, end);
+
+	iommu_free_domain(domain);
+	free_domain_mem(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
+
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+	struct dmar_domain *domain;
+	struct intel_iommu *iommu;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
+		return NULL;
+	}
+
+	iommu = drhd->iommu;
+	if (!iommu) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: iommu == NULL\n");
+		return NULL;
+	}
+	domain = iommu_alloc_domain(iommu);
+	if (!domain) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain == NULL\n");
+		return NULL;
+	}
+	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain_init() failed\n");
+		intel_iommu_domain_exit(domain);
+		return NULL;
+	}
+	return domain;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+
+int intel_iommu_context_mapping(
+	struct dmar_domain *domain, struct pci_dev *pdev)
+{
+	int rc;
+	rc = domain_context_mapping(domain, pdev);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+
+int intel_iommu_page_mapping(
+	struct dmar_domain *domain, dma_addr_t iova,
+	u64 hpa, size_t size, int prot)
+{
+	int rc;
+	rc = domain_page_mapping(domain, iova, hpa, size, prot);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+{
+	detach_domain_for_dev(domain, bus, devfn);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+
+struct dmar_domain *
+intel_iommu_find_domain(struct pci_dev *pdev)
+{
+	return find_domain(pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+
+int intel_iommu_found(void)
+{
+	return g_num_of_iommus;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_found);
+
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+{
+	struct dma_pte *pte;
+	u64 pfn;
+
+	pfn = 0;
+	pte = addr_to_dma_pte(domain, iova);
+
+	if (pte)
+		pfn = dma_pte_addr(*pte);
+
+	return pfn >> PAGE_SHIFT_4K;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
deleted file mode 100644
index afc0ad9..0000000
--- a/drivers/pci/intel-iommu.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Ashok Raj <ashok.raj@intel.com>
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- */
-
-#ifndef _INTEL_IOMMU_H_
-#define _INTEL_IOMMU_H_
-
-#include <linux/types.h>
-#include <linux/msi.h>
-#include <linux/sysdev.h>
-#include "iova.h"
-#include <linux/io.h>
-
-/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
- */
-#define PAGE_SHIFT_4K		(12)
-#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
-
-#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
-#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
-
-/*
- * Intel IOMMU register specification per version 1.0 public spec.
- */
-
-#define	DMAR_VER_REG	0x0	/* Arch version supported by this IOMMU */
-#define	DMAR_CAP_REG	0x8	/* Hardware supported capabilities */
-#define	DMAR_ECAP_REG	0x10	/* Extended capabilities supported */
-#define	DMAR_GCMD_REG	0x18	/* Global command register */
-#define	DMAR_GSTS_REG	0x1c	/* Global status register */
-#define	DMAR_RTADDR_REG	0x20	/* Root entry table */
-#define	DMAR_CCMD_REG	0x28	/* Context command reg */
-#define	DMAR_FSTS_REG	0x34	/* Fault Status register */
-#define	DMAR_FECTL_REG	0x38	/* Fault control register */
-#define	DMAR_FEDATA_REG	0x3c	/* Fault event interrupt data register */
-#define	DMAR_FEADDR_REG	0x40	/* Fault event interrupt addr register */
-#define	DMAR_FEUADDR_REG 0x44	/* Upper address register */
-#define	DMAR_AFLOG_REG	0x58	/* Advanced Fault control */
-#define	DMAR_PMEN_REG	0x64	/* Enable Protected Memory Region */
-#define	DMAR_PLMBASE_REG 0x68	/* PMRR Low addr */
-#define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
-#define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
-#define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
-
-#define OFFSET_STRIDE		(9)
-/*
-#define dmar_readl(dmar, reg) readl(dmar + reg)
-#define dmar_readq(dmar, reg) ({ \
-		u32 lo, hi; \
-		lo = readl(dmar + reg); \
-		hi = readl(dmar + reg + 4); \
-		(((u64) hi) << 32) + lo; })
-*/
-static inline u64 dmar_readq(void __iomem *addr)
-{
-	u32 lo, hi;
-	lo = readl(addr);
-	hi = readl(addr + 4);
-	return (((u64) hi) << 32) + lo;
-}
-
-static inline void dmar_writeq(void __iomem *addr, u64 val)
-{
-	writel((u32)val, addr);
-	writel((u32)(val >> 32), addr + 4);
-}
-
-#define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
-#define DMAR_VER_MINOR(v)		((v) & 0x0f)
-
-/*
- * Decoding Capability Register
- */
-#define cap_read_drain(c)	(((c) >> 55) & 1)
-#define cap_write_drain(c)	(((c) >> 54) & 1)
-#define cap_max_amask_val(c)	(((c) >> 48) & 0x3f)
-#define cap_num_fault_regs(c)	((((c) >> 40) & 0xff) + 1)
-#define cap_pgsel_inv(c)	(((c) >> 39) & 1)
-
-#define cap_super_page_val(c)	(((c) >> 34) & 0xf)
-#define cap_super_offset(c)	(((find_first_bit(&cap_super_page_val(c), 4)) \
-					* OFFSET_STRIDE) + 21)
-
-#define cap_fault_reg_offset(c)	((((c) >> 24) & 0x3ff) * 16)
-#define cap_max_fault_reg_offset(c) \
-	(cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
-
-#define cap_zlr(c)		(((c) >> 22) & 1)
-#define cap_isoch(c)		(((c) >> 23) & 1)
-#define cap_mgaw(c)		((((c) >> 16) & 0x3f) + 1)
-#define cap_sagaw(c)		(((c) >> 8) & 0x1f)
-#define cap_caching_mode(c)	(((c) >> 7) & 1)
-#define cap_phmr(c)		(((c) >> 6) & 1)
-#define cap_plmr(c)		(((c) >> 5) & 1)
-#define cap_rwbf(c)		(((c) >> 4) & 1)
-#define cap_afl(c)		(((c) >> 3) & 1)
-#define cap_ndoms(c)		(((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
-/*
- * Extended Capability Register
- */
-
-#define ecap_niotlb_iunits(e)	((((e) >> 24) & 0xff) + 1)
-#define ecap_iotlb_offset(e) 	((((e) >> 8) & 0x3ff) * 16)
-#define ecap_max_iotlb_offset(e) \
-	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
-#define ecap_coherent(e)	((e) & 0x1)
-
-
-/* IOTLB_REG */
-#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
-#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
-#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
-#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
-#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
-#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
-#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
-#define DMA_TLB_DID(id)	(((u64)((id) & 0xffff)) << 32)
-#define DMA_TLB_IVT (((u64)1) << 63)
-#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
-#define DMA_TLB_MAX_SIZE (0x3f)
-
-/* PMEN_REG */
-#define DMA_PMEN_EPM (((u32)1)<<31)
-#define DMA_PMEN_PRS (((u32)1)<<0)
-
-/* GCMD_REG */
-#define DMA_GCMD_TE (((u32)1) << 31)
-#define DMA_GCMD_SRTP (((u32)1) << 30)
-#define DMA_GCMD_SFL (((u32)1) << 29)
-#define DMA_GCMD_EAFL (((u32)1) << 28)
-#define DMA_GCMD_WBF (((u32)1) << 27)
-
-/* GSTS_REG */
-#define DMA_GSTS_TES (((u32)1) << 31)
-#define DMA_GSTS_RTPS (((u32)1) << 30)
-#define DMA_GSTS_FLS (((u32)1) << 29)
-#define DMA_GSTS_AFLS (((u32)1) << 28)
-#define DMA_GSTS_WBFS (((u32)1) << 27)
-
-/* CCMD_REG */
-#define DMA_CCMD_ICC (((u64)1) << 63)
-#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
-#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
-#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
-#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
-#define DMA_CCMD_MASK_NOBIT 0
-#define DMA_CCMD_MASK_1BIT 1
-#define DMA_CCMD_MASK_2BIT 2
-#define DMA_CCMD_MASK_3BIT 3
-#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
-#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
-
-/* FECTL_REG */
-#define DMA_FECTL_IM (((u32)1) << 31)
-
-/* FSTS_REG */
-#define DMA_FSTS_PPF ((u32)2)
-#define DMA_FSTS_PFO ((u32)1)
-#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
-
-/* FRCD_REG, 32 bits access */
-#define DMA_FRCD_F (((u32)1) << 31)
-#define dma_frcd_type(d) ((d >> 30) & 1)
-#define dma_frcd_fault_reason(c) (c & 0xff)
-#define dma_frcd_source_id(c) (c & 0xffff)
-#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
-
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	val;
-	u64	rsvd1;
-};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-	return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-	root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-	root->val |= value & PAGE_MASK_4K;
-}
-
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-	return (struct context_entry *)
-		(root_present(root)?phys_to_virt(
-		root->val & PAGE_MASK_4K):
-		NULL);
-}
-
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-	do { \
-		(c).lo &= (((u64)-1) << 4) | 3; \
-		(c).lo |= ((val) & 3) << 2; \
-	} while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
-
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
-	u64 val;
-};
-#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
-
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
-
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
-#define dma_set_pte_addr(p, addr) do {\
-		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
-
-struct intel_iommu;
-
-struct dmar_domain {
-	int	id;			/* domain id */
-	struct intel_iommu *iommu;	/* back pointer to owning iommu */
-
-	struct list_head devices; 	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
-
-	struct dma_pte	*pgd;		/* virtual address */
-	spinlock_t	mapping_lock;	/* page table lock */
-	int		gaw;		/* max guest address width */
-
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
-
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-	int		flags;
-};
-
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus numer */
-	u8 devfn;		/* PCI devfn number */
-	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
-	struct dmar_domain *domain; /* pointer to domain */
-};
-
-extern int init_dmars(void);
-
-struct intel_iommu {
-	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
-	u64		cap;
-	u64		ecap;
-	unsigned long 	*domain_ids; /* bitmap of domains */
-	struct dmar_domain **domains; /* ptr to domains */
-	int		seg;
-	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
-	spinlock_t	lock; /* protect context, domain ids */
-	spinlock_t	register_lock; /* protect register handling */
-	struct root_entry *root_entry; /* virtual address */
-
-	unsigned int irq;
-	unsigned char name[7];    /* Device Name */
-	struct msi_msg saved_msg;
-	struct sys_device sysdev;
-};
-
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
-{
-	return;
-}
-#endif /* !CONFIG_DMAR_GFX_WA */
-
-#endif
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 3ef4ac0..2287116 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -7,7 +7,7 @@
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
-#include "iova.h"
+#include <linux/iova.h>
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h
deleted file mode 100644
index 228f6c9..0000000
--- a/drivers/pci/iova.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This file is released under the GPLv2.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- *
- */
-
-#ifndef _IOVA_H_
-#define _IOVA_H_
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/rbtree.h>
-#include <linux/dma-mapping.h>
-
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN		(1)
-
-/* iova structure */
-struct iova {
-	struct rb_node	node;
-	unsigned long	pfn_hi; /* IOMMU dish out addr hi */
-	unsigned long	pfn_lo; /* IOMMU dish out addr lo */
-};
-
-/* holds all the iova translations for a domain */
-struct iova_domain {
-	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
-	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
-	struct rb_root	rbroot;		/* iova domain rbtree root */
-	struct rb_node	*cached32_node; /* Save last alloced node */
-	unsigned long	dma_32bit_pfn;
-};
-
-struct iova *alloc_iova_mem(void);
-void free_iova_mem(struct iova *iova);
-void free_iova(struct iova_domain *iovad, unsigned long pfn);
-void __free_iova(struct iova_domain *iovad, struct iova *iova);
-struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
-	unsigned long limit_pfn,
-	bool size_aligned);
-struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
-	unsigned long pfn_hi);
-void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
-void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
-struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
-void put_iova_domain(struct iova_domain *iovad);
-
-#endif
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
new file mode 100644
index 0000000..1490fc0
--- /dev/null
+++ b/include/linux/intel-iommu.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Ashok Raj <ashok.raj@intel.com>
+ * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ */
+
+#ifndef _INTEL_IOMMU_H_
+#define _INTEL_IOMMU_H_
+
+#include <linux/types.h>
+#include <linux/msi.h>
+#include <linux/sysdev.h>
+#include "iova.h"
+#include <linux/io.h>
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K		(12)
+#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
+
+/*
+ * Intel IOMMU register specification per version 1.0 public spec.
+ */
+
+#define	DMAR_VER_REG	0x0	/* Arch version supported by this IOMMU */
+#define	DMAR_CAP_REG	0x8	/* Hardware supported capabilities */
+#define	DMAR_ECAP_REG	0x10	/* Extended capabilities supported */
+#define	DMAR_GCMD_REG	0x18	/* Global command register */
+#define	DMAR_GSTS_REG	0x1c	/* Global status register */
+#define	DMAR_RTADDR_REG	0x20	/* Root entry table */
+#define	DMAR_CCMD_REG	0x28	/* Context command reg */
+#define	DMAR_FSTS_REG	0x34	/* Fault Status register */
+#define	DMAR_FECTL_REG	0x38	/* Fault control register */
+#define	DMAR_FEDATA_REG	0x3c	/* Fault event interrupt data register */
+#define	DMAR_FEADDR_REG	0x40	/* Fault event interrupt addr register */
+#define	DMAR_FEUADDR_REG 0x44	/* Upper address register */
+#define	DMAR_AFLOG_REG	0x58	/* Advanced Fault control */
+#define	DMAR_PMEN_REG	0x64	/* Enable Protected Memory Region */
+#define	DMAR_PLMBASE_REG 0x68	/* PMRR Low addr */
+#define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
+#define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
+#define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
+
+#define OFFSET_STRIDE		(9)
+/*
+#define dmar_readl(dmar, reg) readl(dmar + reg)
+#define dmar_readq(dmar, reg) ({ \
+		u32 lo, hi; \
+		lo = readl(dmar + reg); \
+		hi = readl(dmar + reg + 4); \
+		(((u64) hi) << 32) + lo; })
+*/
+static inline u64 dmar_readq(void __iomem *addr)
+{
+	u32 lo, hi;
+	lo = readl(addr);
+	hi = readl(addr + 4);
+	return (((u64) hi) << 32) + lo;
+}
+
+static inline void dmar_writeq(void __iomem *addr, u64 val)
+{
+	writel((u32)val, addr);
+	writel((u32)(val >> 32), addr + 4);
+}
+
+#define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
+#define DMAR_VER_MINOR(v)		((v) & 0x0f)
+
+/*
+ * Decoding Capability Register
+ */
+#define cap_read_drain(c)	(((c) >> 55) & 1)
+#define cap_write_drain(c)	(((c) >> 54) & 1)
+#define cap_max_amask_val(c)	(((c) >> 48) & 0x3f)
+#define cap_num_fault_regs(c)	((((c) >> 40) & 0xff) + 1)
+#define cap_pgsel_inv(c)	(((c) >> 39) & 1)
+
+#define cap_super_page_val(c)	(((c) >> 34) & 0xf)
+#define cap_super_offset(c)	(((find_first_bit(&cap_super_page_val(c), 4)) \
+					* OFFSET_STRIDE) + 21)
+
+#define cap_fault_reg_offset(c)	((((c) >> 24) & 0x3ff) * 16)
+#define cap_max_fault_reg_offset(c) \
+	(cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
+
+#define cap_zlr(c)		(((c) >> 22) & 1)
+#define cap_isoch(c)		(((c) >> 23) & 1)
+#define cap_mgaw(c)		((((c) >> 16) & 0x3f) + 1)
+#define cap_sagaw(c)		(((c) >> 8) & 0x1f)
+#define cap_caching_mode(c)	(((c) >> 7) & 1)
+#define cap_phmr(c)		(((c) >> 6) & 1)
+#define cap_plmr(c)		(((c) >> 5) & 1)
+#define cap_rwbf(c)		(((c) >> 4) & 1)
+#define cap_afl(c)		(((c) >> 3) & 1)
+#define cap_ndoms(c)		(((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
+/*
+ * Extended Capability Register
+ */
+
+#define ecap_niotlb_iunits(e)	((((e) >> 24) & 0xff) + 1)
+#define ecap_iotlb_offset(e) 	((((e) >> 8) & 0x3ff) * 16)
+#define ecap_max_iotlb_offset(e) \
+	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
+#define ecap_coherent(e)	((e) & 0x1)
+
+
+/* IOTLB_REG */
+#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
+#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
+#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
+#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
+#define DMA_TLB_DID(id)	(((u64)((id) & 0xffff)) << 32)
+#define DMA_TLB_IVT (((u64)1) << 63)
+#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_TLB_MAX_SIZE (0x3f)
+
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
+/* GCMD_REG */
+#define DMA_GCMD_TE (((u32)1) << 31)
+#define DMA_GCMD_SRTP (((u32)1) << 30)
+#define DMA_GCMD_SFL (((u32)1) << 29)
+#define DMA_GCMD_EAFL (((u32)1) << 28)
+#define DMA_GCMD_WBF (((u32)1) << 27)
+
+/* GSTS_REG */
+#define DMA_GSTS_TES (((u32)1) << 31)
+#define DMA_GSTS_RTPS (((u32)1) << 30)
+#define DMA_GSTS_FLS (((u32)1) << 29)
+#define DMA_GSTS_AFLS (((u32)1) << 28)
+#define DMA_GSTS_WBFS (((u32)1) << 27)
+
+/* CCMD_REG */
+#define DMA_CCMD_ICC (((u64)1) << 63)
+#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
+#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
+#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
+#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
+#define DMA_CCMD_MASK_NOBIT 0
+#define DMA_CCMD_MASK_1BIT 1
+#define DMA_CCMD_MASK_2BIT 2
+#define DMA_CCMD_MASK_3BIT 3
+#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
+#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
+
+/* FECTL_REG */
+#define DMA_FECTL_IM (((u32)1) << 31)
+
+/* FSTS_REG */
+#define DMA_FSTS_PPF ((u32)2)
+#define DMA_FSTS_PFO ((u32)1)
+#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
+
+/* FRCD_REG, 32 bits access */
+#define DMA_FRCD_F (((u32)1) << 31)
+#define dma_frcd_type(d) ((d >> 30) & 1)
+#define dma_frcd_fault_reason(c) (c & 0xff)
+#define dma_frcd_source_id(c) (c & 0xffff)
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64	val;
+	u64	rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+	return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+	root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+	root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+	return (struct context_entry *)
+		(root_present(root)?phys_to_virt(
+		root->val & PAGE_MASK_4K):
+		NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi &  7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+	do { \
+		(c).lo &= (((u64)-1) << 4) | 3; \
+		(c).lo |= ((val) & 3) << 2; \
+	} while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+	u64 val;
+};
+#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+	int	id;			/* domain id */
+	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+
+	struct list_head devices; 	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	spinlock_t	mapping_lock;	/* page table lock */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+	int		flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus numer */
+	u8 devfn;		/* PCI devfn number */
+	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+
+struct intel_iommu {
+	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
+	u64		cap;
+	u64		ecap;
+	unsigned long 	*domain_ids; /* bitmap of domains */
+	struct dmar_domain **domains; /* ptr to domains */
+	int		seg;
+	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+	spinlock_t	lock; /* protect context, domain ids */
+	spinlock_t	register_lock; /* protect register handling */
+	struct root_entry *root_entry; /* virtual address */
+
+	unsigned int irq;
+	unsigned char name[7];    /* Device Name */
+	struct msi_msg saved_msg;
+	struct sys_device sysdev;
+};
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+	return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+void intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
+int intel_iommu_context_mapping(struct dmar_domain *domain,
+				struct pci_dev *pdev);
+int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+			     u64 hpa, size_t size, int prot);
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
+struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+int intel_iommu_found(void);
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+
+#endif
diff --git a/include/linux/iova.h b/include/linux/iova.h
new file mode 100644
index 0000000..228f6c9
--- /dev/null
+++ b/include/linux/iova.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *
+ */
+
+#ifndef _IOVA_H_
+#define _IOVA_H_
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/dma-mapping.h>
+
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN		(1)
+
+/* iova structure */
+struct iova {
+	struct rb_node	node;
+	unsigned long	pfn_hi; /* IOMMU dish out addr hi */
+	unsigned long	pfn_lo; /* IOMMU dish out addr lo */
+};
+
+/* holds all the iova translations for a domain */
+struct iova_domain {
+	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
+	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
+	struct rb_root	rbroot;		/* iova domain rbtree root */
+	struct rb_node	*cached32_node; /* Save last alloced node */
+	unsigned long	dma_32bit_pfn;
+};
+
+struct iova *alloc_iova_mem(void);
+void free_iova_mem(struct iova *iova);
+void free_iova(struct iova_domain *iovad, unsigned long pfn);
+void __free_iova(struct iova_domain *iovad, struct iova *iova);
+struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
+	unsigned long limit_pfn,
+	bool size_aligned);
+struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
+	unsigned long pfn_hi);
+void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
+void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
+void put_iova_domain(struct iova_domain *iovad);
+
+#endif
-- 
1.6.0.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 15:37 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
@ 2008-09-09 15:37   ` Amit Shah
  2008-09-11  7:21     ` Han, Weidong
  2008-09-10 15:53   ` [PATCH 1/2] VT-d: Changes to support KVM Avi Kivity
  1 sibling, 1 reply; 25+ messages in thread
From: Amit Shah @ 2008-09-09 15:37 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Amit Shah

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  198 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 261 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..667bf3f
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, r;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	r = -EINVAL;
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			r = intel_iommu_page_mapping(domain,
+						     gfn_to_gpa(gfn),
+						     pfn_to_hpa(pfn),
+						     PAGE_SIZE,
+						     DMA_PTE_READ |
+						     DMA_PTE_WRITE);
+			if (r) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, r;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					kvm->memslots[i].npages);
+		if (r)
+			break;
+	}
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev);
+	if (r) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..addd874 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.6.0.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] VT-d: Changes to support KVM
  2008-09-09 15:37 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
  2008-09-09 15:37   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
@ 2008-09-10 15:53   ` Avi Kivity
  2008-09-10 18:11     ` Jesse Barnes
  2008-09-11  6:11     ` Han, Weidong
  1 sibling, 2 replies; 25+ messages in thread
From: Avi Kivity @ 2008-09-10 15:53 UTC (permalink / raw)
  To: Amit Shah
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay

Amit Shah wrote:
> From: Kay, Allen M <allen.m.kay@intel.com>
>
> This patch extends the VT-d driver to support KVM
>
> [Ben: fixed memory pinning]
>
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
>
> Acked-by: Mark Gross <mgross@linux.intel.com>
> ---
>  drivers/pci/dmar.c          |    4 +-
>  drivers/pci/intel-iommu.c   |  116 ++++++++++++++-
>  drivers/pci/intel-iommu.h   |  344 -----------------------------------------
>  drivers/pci/iova.c          |    2 +-
>  drivers/pci/iova.h          |   52 -------
>  include/linux/intel-iommu.h |  355 +++++++++++++++++++++++++++++++++++++++++++
>  include/linux/iova.h        |   52 +++++++
>  7 files changed, 522 insertions(+), 403 deletions(-)
>  delete mode 100644 drivers/pci/intel-iommu.h
>  delete mode 100644 drivers/pci/iova.h
>  create mode 100644 include/linux/intel-iommu.h
>  create mode 100644 include/linux/iova.h
>   

Please resend with git's -M option, so we can review the file moves.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] VT-d: Changes to support KVM
  2008-09-10 15:53   ` [PATCH 1/2] VT-d: Changes to support KVM Avi Kivity
@ 2008-09-10 18:11     ` Jesse Barnes
  2008-09-10 19:06       ` David Woodhouse
  2008-09-11  6:11     ` Han, Weidong
  1 sibling, 1 reply; 25+ messages in thread
From: Jesse Barnes @ 2008-09-10 18:11 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Amit Shah, kvm, muli, anthony, david.woodhouse, mark.gross,
	benami, weidong.han, allen.m.kay

On Wednesday, September 10, 2008 8:53 am Avi Kivity wrote:
> Amit Shah wrote:
> > From: Kay, Allen M <allen.m.kay@intel.com>
> >
> > This patch extends the VT-d driver to support KVM
> >
> > [Ben: fixed memory pinning]
> >
> > Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> > Signed-off-by: Weidong Han <weidong.han@intel.com>
> > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> > Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> >
> > Acked-by: Mark Gross <mgross@linux.intel.com>
> > ---
> >  drivers/pci/dmar.c          |    4 +-
> >  drivers/pci/intel-iommu.c   |  116 ++++++++++++++-
> >  drivers/pci/intel-iommu.h   |  344
> > ----------------------------------------- drivers/pci/iova.c          |  
> >  2 +-
> >  drivers/pci/iova.h          |   52 -------
> >  include/linux/intel-iommu.h |  355
> > +++++++++++++++++++++++++++++++++++++++++++ include/linux/iova.h        |
> >   52 +++++++
> >  7 files changed, 522 insertions(+), 403 deletions(-)
> >  delete mode 100644 drivers/pci/intel-iommu.h
> >  delete mode 100644 drivers/pci/iova.h
> >  create mode 100644 include/linux/intel-iommu.h
> >  create mode 100644 include/linux/iova.h
>
> Please resend with git's -M option, so we can review the file moves.

I assume the KVM bits depend on this patch.  I can take it (after the next 
spin) unless David already has his IOMMU tree set up, in which case he should 
probably apply them.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] VT-d: Changes to support KVM
  2008-09-10 18:11     ` Jesse Barnes
@ 2008-09-10 19:06       ` David Woodhouse
  0 siblings, 0 replies; 25+ messages in thread
From: David Woodhouse @ 2008-09-10 19:06 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Avi Kivity, Amit Shah, kvm, muli, anthony, mark.gross, benami,
	weidong.han, allen.m.kay

On Wed, 2008-09-10 at 11:11 -0700, Jesse Barnes wrote:
> I assume the KVM bits depend on this patch.  I can take it (after the next 
> spin) unless David already has his IOMMU tree set up, in which case he should 
> probably apply them.

I haven't -- I hadn't actually got any further muttering to you that it
might make sense to set up such a tree.

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 1/2] VT-d: Changes to support KVM
  2008-09-10 15:53   ` [PATCH 1/2] VT-d: Changes to support KVM Avi Kivity
  2008-09-10 18:11     ` Jesse Barnes
@ 2008-09-11  6:11     ` Han, Weidong
  1 sibling, 0 replies; 25+ messages in thread
From: Han, Weidong @ 2008-09-11  6:11 UTC (permalink / raw)
  To: Avi Kivity, Amit Shah
  Cc: kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M

[-- Attachment #1: Type: text/plain, Size: 7759 bytes --]

Avi Kivity wrote:
> Amit Shah wrote:
>> From: Kay, Allen M <allen.m.kay@intel.com>
>> 
>> This patch extends the VT-d driver to support KVM
>> 
>> [Ben: fixed memory pinning]
>> 
>> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
>> Signed-off-by: Weidong Han <weidong.han@intel.com>
>> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
>> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
>> 
>> Acked-by: Mark Gross <mgross@linux.intel.com>
>> ---
>>  drivers/pci/dmar.c          |    4 +-
>>  drivers/pci/intel-iommu.c   |  116 ++++++++++++++-
>>  drivers/pci/intel-iommu.h   |  344
>>  ----------------------------------------- drivers/pci/iova.c       
>>  |    2 +- drivers/pci/iova.h          |   52 -------
>>  include/linux/intel-iommu.h |  355
>>  +++++++++++++++++++++++++++++++++++++++++++ include/linux/iova.h   
>>  |   52 +++++++ 7 files changed, 522 insertions(+), 403 deletions(-)
>>  delete mode 100644 drivers/pci/intel-iommu.h
>>  delete mode 100644 drivers/pci/iova.h
>>  create mode 100644 include/linux/intel-iommu.h
>>  create mode 100644 include/linux/iova.h
>> 
> 
> Please resend with git's -M option, so we can review the file moves.

Hi Avi and Amit,

I regenerated the patch with git -M option. It's easy to review now.
Thanks.


From: Kay, Allen M <allen.m.kay@intel.com>

This patch extends the VT-d driver to support KVM

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 drivers/pci/dmar.c                           |    4 +-
 drivers/pci/intel-iommu.c                    |  116
+++++++++++++++++++++++++-
 drivers/pci/iova.c                           |    2 +-
 {drivers/pci => include/linux}/intel-iommu.h |   11 +++
 {drivers/pci => include/linux}/iova.h        |    0 
 5 files changed, 126 insertions(+), 7 deletions(-)
 rename {drivers/pci => include/linux}/intel-iommu.h (95%)
 rename {drivers/pci => include/linux}/iova.h (100%)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae..1df28ea 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -26,8 +26,8 @@
 
 #include <linux/pci.h>
 #include <linux/dmar.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8d0e60a..3175a4c 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -33,8 +33,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
 #include <linux/timer.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -160,7 +160,7 @@ static inline void *alloc_domain_mem(void)
 	return iommu_kmem_cache_alloc(iommu_domain_cache);
 }
 
-static inline void free_domain_mem(void *vaddr)
+static void free_domain_mem(void *vaddr)
 {
 	kmem_cache_free(iommu_domain_cache, vaddr);
 }
@@ -1414,7 +1414,7 @@ static void domain_remove_dev_info(struct
dmar_domain *domain)
  * find_domain
  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
  */
-struct dmar_domain *
+static struct dmar_domain *
 find_domain(struct pci_dev *pdev)
 {
 	struct device_domain_info *info;
@@ -2430,3 +2430,111 @@ int __init intel_iommu_init(void)
 	return 0;
 }
 
+void intel_iommu_domain_exit(struct dmar_domain *domain)
+{
+	u64 end;
+
+	/* Domain 0 is reserved, so dont process it */
+	if (!domain)
+		return;
+
+	end = DOMAIN_MAX_ADDR(domain->gaw);
+	end = end & (~PAGE_MASK_4K);
+
+	/* clear ptes */
+	dma_pte_clear_range(domain, 0, end);
+
+	/* free page tables */
+	dma_pte_free_pagetable(domain, 0, end);
+
+	iommu_free_domain(domain);
+	free_domain_mem(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
+
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+	struct dmar_domain *domain;
+	struct intel_iommu *iommu;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "intel_iommu_domain_alloc: drhd ==
NULL\n");
+		return NULL;
+	}
+
+	iommu = drhd->iommu;
+	if (!iommu) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: iommu == NULL\n");
+		return NULL;
+	}
+	domain = iommu_alloc_domain(iommu);
+	if (!domain) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain == NULL\n");
+		return NULL;
+	}
+	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain_init()
failed\n");
+		intel_iommu_domain_exit(domain);
+		return NULL;
+	}
+	return domain;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+
+int intel_iommu_context_mapping(
+	struct dmar_domain *domain, struct pci_dev *pdev)
+{
+	int rc;
+	rc = domain_context_mapping(domain, pdev);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+
+int intel_iommu_page_mapping(
+	struct dmar_domain *domain, dma_addr_t iova,
+	u64 hpa, size_t size, int prot)
+{
+	int rc;
+	rc = domain_page_mapping(domain, iova, hpa, size, prot);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8
devfn)
+{
+	detach_domain_for_dev(domain, bus, devfn);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+
+struct dmar_domain *
+intel_iommu_find_domain(struct pci_dev *pdev)
+{
+	return find_domain(pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+
+int intel_iommu_found(void)
+{
+	return g_num_of_iommus;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_found);
+
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+{
+	struct dma_pte *pte;
+	u64 pfn;
+
+	pfn = 0;
+	pte = addr_to_dma_pte(domain, iova);
+
+	if (pte)
+		pfn = dma_pte_addr(*pte);
+
+	return pfn >> PAGE_SHIFT_4K;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 3ef4ac0..2287116 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -7,7 +7,7 @@
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
-#include "iova.h"
+#include <linux/iova.h>
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
diff --git a/drivers/pci/intel-iommu.h b/include/linux/intel-iommu.h
similarity index 95%
rename from drivers/pci/intel-iommu.h
rename to include/linux/intel-iommu.h
index afc0ad9..1490fc0 100644
--- a/drivers/pci/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -341,4 +341,15 @@ static inline void iommu_prepare_gfx_mapping(void)
 }
 #endif /* !CONFIG_DMAR_GFX_WA */
 
+void intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
+int intel_iommu_context_mapping(struct dmar_domain *domain,
+				struct pci_dev *pdev);
+int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t
iova,
+			     u64 hpa, size_t size, int prot);
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8
devfn);
+struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+int intel_iommu_found(void);
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+
 #endif
diff --git a/drivers/pci/iova.h b/include/linux/iova.h
similarity index 100%
rename from drivers/pci/iova.h
rename to include/linux/iova.h
-- 
1.5.1

[-- Attachment #2: VT-d-Changes-to-support-KVM.patch --]
[-- Type: application/octet-stream, Size: 6227 bytes --]

From: Kay, Allen M <allen.m.kay@intel.com>

This patch extends the VT-d driver to support KVM

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 drivers/pci/dmar.c                           |    4 +-
 drivers/pci/intel-iommu.c                    |  116 +++++++++++++++++++++++++-
 drivers/pci/iova.c                           |    2 +-
 {drivers/pci => include/linux}/intel-iommu.h |   11 +++
 {drivers/pci => include/linux}/iova.h        |    0 
 5 files changed, 126 insertions(+), 7 deletions(-)
 rename {drivers/pci => include/linux}/intel-iommu.h (95%)
 rename {drivers/pci => include/linux}/iova.h (100%)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae..1df28ea 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -26,8 +26,8 @@
 
 #include <linux/pci.h>
 #include <linux/dmar.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8d0e60a..3175a4c 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -33,8 +33,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
 #include <linux/timer.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -160,7 +160,7 @@ static inline void *alloc_domain_mem(void)
 	return iommu_kmem_cache_alloc(iommu_domain_cache);
 }
 
-static inline void free_domain_mem(void *vaddr)
+static void free_domain_mem(void *vaddr)
 {
 	kmem_cache_free(iommu_domain_cache, vaddr);
 }
@@ -1414,7 +1414,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
  * find_domain
  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
  */
-struct dmar_domain *
+static struct dmar_domain *
 find_domain(struct pci_dev *pdev)
 {
 	struct device_domain_info *info;
@@ -2430,3 +2430,111 @@ int __init intel_iommu_init(void)
 	return 0;
 }
 
+void intel_iommu_domain_exit(struct dmar_domain *domain)
+{
+	u64 end;
+
+	/* Domain 0 is reserved, so dont process it */
+	if (!domain)
+		return;
+
+	end = DOMAIN_MAX_ADDR(domain->gaw);
+	end = end & (~PAGE_MASK_4K);
+
+	/* clear ptes */
+	dma_pte_clear_range(domain, 0, end);
+
+	/* free page tables */
+	dma_pte_free_pagetable(domain, 0, end);
+
+	iommu_free_domain(domain);
+	free_domain_mem(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
+
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+	struct dmar_domain *domain;
+	struct intel_iommu *iommu;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
+		return NULL;
+	}
+
+	iommu = drhd->iommu;
+	if (!iommu) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: iommu == NULL\n");
+		return NULL;
+	}
+	domain = iommu_alloc_domain(iommu);
+	if (!domain) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain == NULL\n");
+		return NULL;
+	}
+	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain_init() failed\n");
+		intel_iommu_domain_exit(domain);
+		return NULL;
+	}
+	return domain;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+
+int intel_iommu_context_mapping(
+	struct dmar_domain *domain, struct pci_dev *pdev)
+{
+	int rc;
+	rc = domain_context_mapping(domain, pdev);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+
+int intel_iommu_page_mapping(
+	struct dmar_domain *domain, dma_addr_t iova,
+	u64 hpa, size_t size, int prot)
+{
+	int rc;
+	rc = domain_page_mapping(domain, iova, hpa, size, prot);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+{
+	detach_domain_for_dev(domain, bus, devfn);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+
+struct dmar_domain *
+intel_iommu_find_domain(struct pci_dev *pdev)
+{
+	return find_domain(pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+
+int intel_iommu_found(void)
+{
+	return g_num_of_iommus;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_found);
+
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+{
+	struct dma_pte *pte;
+	u64 pfn;
+
+	pfn = 0;
+	pte = addr_to_dma_pte(domain, iova);
+
+	if (pte)
+		pfn = dma_pte_addr(*pte);
+
+	return pfn >> PAGE_SHIFT_4K;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 3ef4ac0..2287116 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -7,7 +7,7 @@
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
-#include "iova.h"
+#include <linux/iova.h>
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
diff --git a/drivers/pci/intel-iommu.h b/include/linux/intel-iommu.h
similarity index 95%
rename from drivers/pci/intel-iommu.h
rename to include/linux/intel-iommu.h
index afc0ad9..1490fc0 100644
--- a/drivers/pci/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -341,4 +341,15 @@ static inline void iommu_prepare_gfx_mapping(void)
 }
 #endif /* !CONFIG_DMAR_GFX_WA */
 
+void intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
+int intel_iommu_context_mapping(struct dmar_domain *domain,
+				struct pci_dev *pdev);
+int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+			     u64 hpa, size_t size, int prot);
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
+struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+int intel_iommu_found(void);
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+
 #endif
diff --git a/drivers/pci/iova.h b/include/linux/iova.h
similarity index 100%
rename from drivers/pci/iova.h
rename to include/linux/iova.h
-- 
1.5.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 15:37   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
@ 2008-09-11  7:21     ` Han, Weidong
  2008-09-14  0:49       ` Avi Kivity
  0 siblings, 1 reply; 25+ messages in thread
From: Han, Weidong @ 2008-09-11  7:21 UTC (permalink / raw)
  To: Amit Shah, avi
  Cc: kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M

[-- Attachment #1: Type: text/plain, Size: 12990 bytes --]

This patch only can work on x86, it breaks build on other architectures.
It is caused by kvm_irq_ack_notifier and kvm_assigned_dev_kernel are
defined under x86, while they are always used in
include/linux/kvm_host.h whether CONFIG_DMAR is set or not. I move these
two definitions to include/linux/kvm_host.h, and attached the updated
patch.

Randy (Weidong)

Amit Shah wrote:
> From: Ben-Ami Yassour <benami@il.ibm.com>
> 
> Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>
> 
> This patch enables PCI device assignment based on VT-d support.
> When a device is assigned to the guest, the guest memory is pinned and
> the mapping is updated in the VT-d IOMMU.
> 
> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
> and also control enable/disable from userspace]
> 
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> 
> Acked-by: Mark Gross <mgross@linux.intel.com>
> ---
>  arch/x86/kvm/Makefile      |    3 +
>  arch/x86/kvm/vtd.c         |  198
>  ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c     
>  |   14 +++ include/asm-x86/kvm_host.h |    3 +
>  include/linux/kvm.h        |    3 +
>  include/linux/kvm_host.h   |   32 +++++++
>  virt/kvm/kvm_main.c        |    9 ++-
>  7 files changed, 261 insertions(+), 1 deletions(-)
>  create mode 100644 arch/x86/kvm/vtd.c
> 
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index d0e940b..3072b17 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
> 
>  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o
>  	lapic.o \ i8254.o
> +ifeq ($(CONFIG_DMAR),y)
> +kvm-objs += vtd.o
> +endif
>  obj-$(CONFIG_KVM) += kvm.o
>  kvm-intel-objs = vmx.o
>  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..667bf3f
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,198 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it + * under the terms and conditions of the GNU General
> Public License, + * version 2, as published by the Free Software
> Foundation. + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> General Public License for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with + * this program; if not, write to the Free Software
> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA
> 02111-1307 USA. + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Copyright IBM Corporation, 2008
> + * Author: Allen M. Kay <allen.m.kay@intel.com>
> + * Author: Weidong Han <weidong.han@intel.com>
> + * Author: Ben-Ami Yassour <benami@il.ibm.com>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +static void kvm_iommu_put_pages(struct kvm *kvm,
> +				gfn_t base_gfn, unsigned long npages);
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +			gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	int i, r;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	r = -EINVAL;
> +	for (i = 0; i < npages; i++) {
> +		/* check if already mapped */
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		if (pfn && !is_mmio_pfn(pfn))
> +			continue;
> +
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (!is_mmio_pfn(pfn)) {
> +			r = intel_iommu_page_mapping(domain,
> +						     gfn_to_gpa(gfn),
> +						     pfn_to_hpa(pfn),
> +						     PAGE_SIZE,
> +						     DMA_PTE_READ |
> +						     DMA_PTE_WRITE);
> +			if (r) {
> +				printk(KERN_DEBUG "kvm_iommu_map_pages:"
> +				       "iommu failed to map pfn=%lx\n",
pfn);
> +				goto unmap_pages;
> +			}
> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +			       "invalid pfn=%lx\n", pfn);
> +			goto unmap_pages;
> +		}
> +		gfn++;
> +	}
> +	return 0;
> +
> +unmap_pages:
> +	kvm_iommu_put_pages(kvm, base_gfn, i);
> +	return r;
> +}
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, r;
> +
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					kvm->memslots[i].npages);
> +		if (r)
> +			break;
> +	}
> +	up_read(&kvm->slots_lock);
> +	return r;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +	int r;
> +
> +	if (!intel_iommu_found()) {
> +		printk(KERN_ERR "%s: intel iommu not found\n",
__func__);
> +		return -ENODEV;
> +	}
> +
> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> +	       assigned_dev->host_busnr,
> +	       PCI_SLOT(assigned_dev->host_devfn),
> +	       PCI_FUNC(assigned_dev->host_devfn));
> +
> +	pdev = assigned_dev->dev;
> +
> +	if (pdev == NULL) {
> +		if (kvm->arch.intel_iommu_domain) {
> +
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> +			kvm->arch.intel_iommu_domain = NULL;
> +		}
> +		return -ENODEV;
> +	}
> +
> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> +	if (!kvm->arch.intel_iommu_domain)
> +		return -ENODEV;
> +
> +	r = kvm_iommu_map_memslots(kvm);
> +	if (r)
> +		goto out_unmap;
> +
> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> +			       pdev->bus->number, pdev->devfn);
> +
> +	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> +					pdev);
> +	if (r) {
> +		printk(KERN_ERR "Domain context map for %s failed",
> +		       pci_name(pdev));
> +		goto out_unmap;
> +	}
> +	return 0;
> +
> +out_unmap:
> +	kvm_iommu_unmap_memslots(kvm);
> +	return r;
> +}
> +
> +static void kvm_iommu_put_pages(struct kvm *kvm,
> +			       gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +	int i;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		kvm_release_pfn_clean(pfn);
> +		gfn++;
> +	}
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i;
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +				    kvm->memslots[i].npages);
> +	}
> +	up_read(&kvm->slots_lock);
> +
> +	return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	struct kvm_assigned_dev_kernel *entry;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
> +		       entry->host_busnr,
> +		       PCI_SLOT(entry->host_devfn),
> +		       PCI_FUNC(entry->host_devfn));
> +
> +		/* detach kvm dmar domain */
> +		intel_iommu_detach_dev(domain, entry->host_busnr,
> +				       entry->host_devfn);
> +	}
> +	kvm_iommu_unmap_memslots(kvm);
> +	intel_iommu_domain_exit(domain);
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 3f3cb71..342f67a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -35,6 +35,7 @@
>  #include <linux/module.h>
>  #include <linux/mman.h>
>  #include <linux/highmem.h>
> +#include <linux/intel-iommu.h>
> 
>  #include <asm/uaccess.h>
>  #include <asm/msr.h>
> @@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm
> *kvm, 
> 
>  	list_add(&match->list, &kvm->arch.assigned_dev_head);
> 
> +	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
> +		r = kvm_iommu_map_guest(kvm, match);
> +		if (r)
> +			goto out_list_del;
> +	}
> +
>  out:
>  	mutex_unlock(&kvm->lock);
>  	return r;
> +out_list_del:
> +	list_del(&match->list);
> +	pci_release_regions(dev);
>  out_disable:
>  	pci_disable_device(dev);
>  out_put:
> @@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_PV_MMU:
>  		r = !tdp_enabled;
>  		break;
> +	case KVM_CAP_IOMMU:
> +		r = intel_iommu_found();
> +		break;
>  	default:
>  		r = 0;
>  		break;
> @@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
> 
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +	kvm_iommu_unmap_guest(kvm);
>  	kvm_free_assigned_devices(kvm);
>  	kvm_free_pit(kvm);
>  	kfree(kvm->arch.vpic);
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 815efc3..addd874 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -364,6 +364,7 @@ struct kvm_arch{
>  	 */
>  	struct list_head active_mmu_pages;
>  	struct list_head assigned_dev_head;
> +	struct dmar_domain *intel_iommu_domain;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> @@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu,
>  gpa_t gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long
>  		  bytes, gpa_t addr, unsigned long *ret);
> 
> +int is_mmio_pfn(pfn_t pfn);
> +
>  extern bool tdp_enabled;
> 
>  enum emulation_result {
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ef4bc6f..4269be1 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -384,6 +384,7 @@ struct kvm_trace_rec {
>  #define KVM_CAP_COALESCED_MMIO 15
>  #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected
>  in guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17
> +#define KVM_CAP_IOMMU 18
> 
>  /*
>   * ioctls for VM fds
> @@ -495,4 +496,6 @@ struct kvm_assigned_irq {
>  	__u32 flags;
>  };
> 
> +#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> +
>  #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a18aaad..b703890 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
> 
> +#ifdef CONFIG_DMAR
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +			unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +#else /* CONFIG_DMAR */
> +static inline int kvm_iommu_map_pages(struct kvm *kvm,
> +				      gfn_t base_gfn,
> +				      unsigned long npages)
> +{
> +	return 0;
> +}
> +
> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
> +				      struct kvm_assigned_dev_kernel
> +				      *assigned_dev)
> +{
> +	return -ENODEV;
> +}
> +
> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_DMAR */
> +
>  static inline void kvm_guest_enter(void)
>  {
>  	account_system_vtime(current);
> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>  	return (gpa_t)gfn << PAGE_SHIFT;
>  }
> 
> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
> +{
> +	return (hpa_t)pfn << PAGE_SHIFT;
> +}
> +
>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
>  	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index de3b029..6b55960 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -41,6 +41,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/mman.h>
>  #include <linux/swap.h>
> +#include <linux/intel-iommu.h>
> 
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>  	return likely(n >= 0 && n < KVM_MAX_VCPUS);
>  }
> 
> -static inline int is_mmio_pfn(pfn_t pfn)
> +inline int is_mmio_pfn(pfn_t pfn)
>  {
>  	if (pfn_valid(pfn))
>  		return PageReserved(pfn_to_page(pfn));
> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  	}
> 
>  	kvm_free_physmem_slot(&old, &new);
> +
> +	/* map the pages in iommu page table */
> +	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
> +	if (r)
> +		goto out;
> +
>  	return 0;
> 
>  out_free:


[-- Attachment #2: kvm-device-assignment-with-vtd.patch --]
[-- Type: application/octet-stream, Size: 12405 bytes --]

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  198 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |   23 +-----
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   52 ++++++++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 281 insertions(+), 21 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..667bf3f
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, r;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	r = -EINVAL;
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			r = intel_iommu_page_mapping(domain,
+						     gfn_to_gpa(gfn),
+						     pfn_to_hpa(pfn),
+						     PAGE_SIZE,
+						     DMA_PTE_READ |
+						     DMA_PTE_WRITE);
+			if (r) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, r;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					kvm->memslots[i].npages);
+		if (r)
+			break;
+	}
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev);
+	if (r) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..d1175b8 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -331,26 +331,6 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
-struct kvm_irq_ack_notifier {
-	struct hlist_node link;
-	unsigned gsi;
-	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
-};
-
-struct kvm_assigned_dev_kernel {
-	struct kvm_irq_ack_notifier ack_notifier;
-	struct work_struct interrupt_work;
-	struct list_head list;
-	int assigned_dev_id;
-	int host_busnr;
-	int host_devfn;
-	int host_irq;
-	int guest_irq;
-	int irq_requested;
-	struct pci_dev *dev;
-	struct kvm *kvm;
-};
-
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -364,6 +344,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +495,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..9f48374 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,53 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+struct kvm_irq_ack_notifier {
+	struct hlist_node link;
+	unsigned gsi;
+	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
+};
+
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct work_struct interrupt_work;
+	struct list_head list;
+	int assigned_dev_id;
+	int host_busnr;
+	int host_devfn;
+	int host_irq;
+	int guest_irq;
+	int irq_requested;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+};
+
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +354,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.5.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: VT-d support for device assignment
  2008-09-09 15:37 VT-d support for device assignment Amit Shah
  2008-09-09 15:37 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
@ 2008-09-14  0:46 ` Avi Kivity
  2008-09-23 18:57   ` Jesse Barnes
  1 sibling, 1 reply; 25+ messages in thread
From: Avi Kivity @ 2008-09-14  0:46 UTC (permalink / raw)
  To: Amit Shah
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay

Amit Shah wrote:
> Sorry for the resends; this one fixes two compile errors introduced by me and a warning.
>   


Applied both, thanks.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-11  7:21     ` Han, Weidong
@ 2008-09-14  0:49       ` Avi Kivity
  0 siblings, 0 replies; 25+ messages in thread
From: Avi Kivity @ 2008-09-14  0:49 UTC (permalink / raw)
  To: Han, Weidong
  Cc: Amit Shah, kvm, muli, anthony, jbarnes, Woodhouse, David,
	Gross, Mark, benami, Kay, Allen M

Han, Weidong wrote:
> This patch only can work on x86, it breaks build on other architectures.
> It is caused by kvm_irq_ack_notifier and kvm_assigned_dev_kernel are
> defined under x86, while they are always used in
> include/linux/kvm_host.h whether CONFIG_DMAR is set or not. I move these
> two definitions to include/linux/kvm_host.h, and attached the updated
> patch.
>
>   

Thanks.  I replaced the old patch with your new version.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: VT-d support for device assignment
  2008-09-14  0:46 ` VT-d support for device assignment Avi Kivity
@ 2008-09-23 18:57   ` Jesse Barnes
  0 siblings, 0 replies; 25+ messages in thread
From: Jesse Barnes @ 2008-09-23 18:57 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Amit Shah, kvm, muli, anthony, david.woodhouse, mark.gross,
	benami, weidong.han, allen.m.kay

On Saturday, September 13, 2008 5:46 pm Avi Kivity wrote:
> Amit Shah wrote:
> > Sorry for the resends; this one fixes two compile errors introduced by me
> > and a warning.
>
> Applied both, thanks.

You can add my s-o-b to the IOMMU patch if you want Avi, it's fine with me if 
you push those particular PCI bits (along with the recent build fix).

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2008-09-23 18:57 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-09 15:37 VT-d support for device assignment Amit Shah
2008-09-09 15:37 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
2008-09-09 15:37   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
2008-09-11  7:21     ` Han, Weidong
2008-09-14  0:49       ` Avi Kivity
2008-09-10 15:53   ` [PATCH 1/2] VT-d: Changes to support KVM Avi Kivity
2008-09-10 18:11     ` Jesse Barnes
2008-09-10 19:06       ` David Woodhouse
2008-09-11  6:11     ` Han, Weidong
2008-09-14  0:46 ` VT-d support for device assignment Avi Kivity
2008-09-23 18:57   ` Jesse Barnes
  -- strict thread matches above, loose matches on Subject: below --
2008-09-09 14:44 Amit Shah
2008-09-09 14:44 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
2008-09-09 14:44   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
2008-09-09 13:51 Han, Weidong
2008-09-09 14:39 ` Amit Shah
2008-09-09 15:05   ` Han, Weidong
2008-08-26  8:55 VT-d support for device assignment Amit Shah
2008-08-26  8:55 ` [PATCH 1/2] VT-d: changes to support KVM Amit Shah
2008-08-26  8:55   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
2008-08-26 10:28     ` Zhang, Xiantao
2008-08-26 10:35       ` Amit Shah
2008-08-26 10:42         ` Zhang, Xiantao
2008-08-26 10:57           ` Amit Shah
2008-08-26 11:04             ` Zhang, Xiantao
2008-08-26 14:41           ` Avi Kivity
2008-08-26 15:09             ` Han, Weidong
2008-09-03 16:52     ` Amit Shah
2008-09-09  7:18       ` Han, Weidong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox