public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: PCIPT: VT-d support
  2008-07-02 15:33 ` [PATCH] VT-d : changes to support KVM Ben-Ami Yassour
@ 2008-07-02 15:33   ` Ben-Ami Yassour
  2008-07-03  4:55     ` Han, Weidong
       [not found]     ` <08DF4D958216244799FC84F3514D70F00181AB10@pdsmsx415.ccr.corp.intel.com>
  0 siblings, 2 replies; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-02 15:33 UTC (permalink / raw)
  To: amit.shah; +Cc: kvm, muli, benami, weidong.han, Kay, Allen M

From: Kay, Allen M <allen.m.kay@intel.com>

This patch includes the functions to support VT-d for passthrough
devices.

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
---
 arch/x86/kvm/Makefile      |    2 +-
 arch/x86/kvm/vtd.c         |  191 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vtd.h         |   36 ++++++++
 include/asm-x86/kvm_host.h |   17 ++++
 include/asm-x86/kvm_para.h |   14 +++
 include/linux/kvm_host.h   |    8 ++
 6 files changed, 267 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c
 create mode 100644 arch/x86/kvm/vtd.h

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..5d9d079 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -11,7 +11,7 @@ endif
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
-	i8254.o
+	i8254.o vtd.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..f665357
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+#include "vtd.h"
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct page *page;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	if (!domain)
+		return -EFAULT;
+
+	for (i = 0; i < npages; i++) {
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (pfn_valid(pfn)) {
+			rc = kvm_intel_iommu_page_mapping(domain,
+							  gfn << PAGE_SHIFT,
+							  pfn << PAGE_SHIFT,
+							  PAGE_SIZE,
+							  DMA_PTE_READ |
+							  DMA_PTE_WRITE);
+			if (rc) {
+				page = pfn_to_page(pfn);
+				put_page(page);
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			return 0;
+		}
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+	struct pci_dev *pdev = NULL;
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       pci_pt_dev->host.busnr,
+	       PCI_SLOT(pci_pt_dev->host.devfn),
+	       PCI_FUNC(pci_pt_dev->host.devfn));
+
+	for_each_pci_dev(pdev) {
+		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
+		    (pdev->devfn == pci_pt_dev->host.devfn)) {
+			break;
+		}
+	}
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			kvm_intel_iommu_domain_exit(kvm->arch.
+						    intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = kvm_intel_iommu_domain_alloc(pdev);
+
+	if (kvm_iommu_map_memslots(kvm)) {
+		kvm_iommu_unmap_memslots(kvm);
+		return -EFAULT;
+	}
+
+	kvm_intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+				   pdev->bus->number, pdev->devfn);
+
+	if (kvm_intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					    pdev)) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static int kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct page *page;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	if (!domain)
+		return -EFAULT;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)kvm_intel_iommu_gfn_to_pfn(domain,
+							gfn);
+
+		if (pfn && pfn_valid(pfn)) {
+			page = pfn_to_page(pfn);
+			put_page(page);
+		}
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i, rc;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_pci_pt_dev_list *entry;
+	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->pt_dev.host.busnr,
+		       PCI_SLOT(entry->pt_dev.host.devfn),
+		       PCI_FUNC(entry->pt_dev.host.devfn));
+
+		for_each_pci_dev(pdev) {
+			if ((pdev->bus->number == entry->pt_dev.host.busnr) &&
+			    (pdev->devfn == entry->pt_dev.host.devfn))
+				break;
+		}
+
+		if (pdev == NULL)
+			return -ENODEV;
+
+		/* detach kvm dmar domain */
+		kvm_intel_iommu_detach_dev(domain,
+					   pdev->bus->number, pdev->devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	kvm_intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/vtd.h b/arch/x86/kvm/vtd.h
new file mode 100644
index 0000000..9823cfd
--- /dev/null
+++ b/arch/x86/kvm/vtd.h
@@ -0,0 +1,36 @@
+#ifndef __KVM_INTEL_IOMMU_H
+#define __KVM_INTEL_IOMMU_H
+
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ */
+
+/* #define DEBUG */
+
+int kvm_intel_iommu_context_mapping(struct dmar_domain *d,
+			struct pci_dev *pdev);
+int kvm_intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+			u64 hpa, size_t size, int prot);
+void kvm_intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
+struct dmar_domain *kvm_intel_iommu_domain_alloc(struct pci_dev *pdev);
+void kvm_intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *kvm_intel_iommu_find_domain(struct pci_dev *pdev);
+
+#endif
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index bae1b76..19194a2 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -308,6 +308,21 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
+struct kvm_pci_passthrough_dev_kernel {
+	struct kvm_pci_pt_info guest;
+	struct kvm_pci_pt_info host;
+	struct pci_dev *dev;
+};
+
+/* This list is to store the guest bus:device:function-irq and host
+ * bus:device:function-irq mapping for assigned devices.
+ */
+struct kvm_pci_pt_dev_list {
+	struct list_head list;
+	struct kvm_pci_passthrough_dev_kernel pt_dev;
+};
+
+
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -320,6 +335,8 @@ struct kvm_arch{
 	 * Hash table of struct kvm_mmu_page.
 	 */
 	struct list_head active_mmu_pages;
+	struct list_head pci_pt_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index bfd9900..a833f6b 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -139,4 +139,18 @@ static inline unsigned int kvm_arch_para_features(void)
 
 #endif
 
+/* Stores information for identifying host PCI devices assigned to the
+ * guest: this is used in the host kernel and in the userspace.
+ */
+struct kvm_pci_pt_info {
+	unsigned char busnr;
+	unsigned int devfn;
+	__u32 irq;
+};
+
+/* Mapping between host and guest PCI device */
+struct kvm_pci_passthrough_dev {
+	struct kvm_pci_pt_info guest;
+	struct kvm_pci_pt_info host;
+};
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d220b49..39b7c32 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -278,6 +278,14 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_pci_passthrough_dev *pci_pt_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+int kvm_intel_iommu_found(void);
+u64 kvm_intel_iommu_gfn_to_pfn(struct dmar_domain *domain, u64 gfn);
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* RE: [PATCH] KVM: PCIPT: VT-d support
  2008-07-02 15:33   ` [PATCH] KVM: PCIPT: VT-d support Ben-Ami Yassour
@ 2008-07-03  4:55     ` Han, Weidong
  2008-07-06 11:15       ` Ben-Ami Yassour
       [not found]     ` <08DF4D958216244799FC84F3514D70F00181AB10@pdsmsx415.ccr.corp.intel.com>
  1 sibling, 1 reply; 20+ messages in thread
From: Han, Weidong @ 2008-07-03  4:55 UTC (permalink / raw)
  To: Ben-Ami Yassour, amit.shah; +Cc: kvm, muli, Kay, Allen M

Ben-Ami Yassour wrote:
> From: Kay, Allen M <allen.m.kay@intel.com>
> 
> This patch includes the functions to support VT-d for passthrough
> devices.
> 
> [Ben: fixed memory pinning]
> 
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> ---
>  arch/x86/kvm/Makefile      |    2 +-
>  arch/x86/kvm/vtd.c         |  191
>  ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vtd.h     
>  |   36 ++++++++ include/asm-x86/kvm_host.h |   17 ++++
>  include/asm-x86/kvm_para.h |   14 +++
>  include/linux/kvm_host.h   |    8 ++
>  6 files changed, 267 insertions(+), 1 deletions(-)
>  create mode 100644 arch/x86/kvm/vtd.c
>  create mode 100644 arch/x86/kvm/vtd.h
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index d220b49..39b7c32 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -278,6 +278,14 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
> 
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +			unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_pci_passthrough_dev *pci_pt_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +int kvm_intel_iommu_found(void);
> +u64 kvm_intel_iommu_gfn_to_pfn(struct dmar_domain *domain, u64 gfn);
> +

Needn't to add kvm_intel_iommu_gfn_to_pfn() declaration here. It's
exposed by VT-d driver.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] KVM: PCIPT: VT-d support
       [not found]     ` <08DF4D958216244799FC84F3514D70F00181AB10@pdsmsx415.ccr.corp.intel.com>
@ 2008-07-03  6:16       ` Han, Weidong
  2008-07-03 11:33         ` Ben-Ami Yassour
  0 siblings, 1 reply; 20+ messages in thread
From: Han, Weidong @ 2008-07-03  6:16 UTC (permalink / raw)
  To: Han, Weidong, Ben-Ami Yassour, amit.shah; +Cc: kvm, muli, Kay, Allen M

Han, Weidong wrote:
> Ben-Ami Yassour wrote:
>> From: Kay, Allen M <allen.m.kay@intel.com>
>> 
>> This patch includes the functions to support VT-d for passthrough
>> devices. 
>> 
>> [Ben: fixed memory pinning]
>> 
>> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
>> Signed-off-by: Weidong Han <weidong.han@intel.com>
>> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
>> ---
>>  arch/x86/kvm/Makefile      |    2 +-
>>  arch/x86/kvm/vtd.c         |  191
>>  ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vtd.h
>>  |   36 ++++++++ include/asm-x86/kvm_host.h |   17 ++++
>>  include/asm-x86/kvm_para.h |   14 +++
>>  include/linux/kvm_host.h   |    8 ++
>>  6 files changed, 267 insertions(+), 1 deletions(-)
>>  create mode 100644 arch/x86/kvm/vtd.c
>>  create mode 100644 arch/x86/kvm/vtd.h
>> 
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index d220b49..39b7c32 100644 --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -278,6 +278,14 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>> 
>> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
>> +			unsigned long npages); +int
kvm_iommu_map_guest(struct kvm *kvm,
>> +			struct kvm_pci_passthrough_dev *pci_pt_dev);
>> +int kvm_iommu_unmap_guest(struct kvm *kvm);
>> +int kvm_intel_iommu_found(void);
>> +u64 kvm_intel_iommu_gfn_to_pfn(struct dmar_domain *domain, u64 gfn);
>> +
> 
> Needn't to add kvm_intel_iommu_gfn_to_pfn() declaration here. It's
> exposed by VT-d driver. 

I think it's better to declare kvm_intel_iommu_gfn_to_pfn() in vtd.h,
because it's only used in vtd.c.

Randy (Weidong)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] KVM: PCIPT: VT-d support
  2008-07-03  6:16       ` Han, Weidong
@ 2008-07-03 11:33         ` Ben-Ami Yassour
  0 siblings, 0 replies; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-03 11:33 UTC (permalink / raw)
  To: Han, Weidong; +Cc: amit.shah, kvm, Muli Ben-Yehuda, Kay, Allen M

On Thu, 2008-07-03 at 14:16 +0800, Han, Weidong wrote:
> Han, Weidong wrote:
> > Ben-Ami Yassour wrote:
> >> From: Kay, Allen M <allen.m.kay@intel.com>
> >> 
> >> This patch includes the functions to support VT-d for passthrough
> >> devices. 
> >> 
> >> [Ben: fixed memory pinning]
> >> 
> >> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> >> Signed-off-by: Weidong Han <weidong.han@intel.com>
> >> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> >> ---
> >>  arch/x86/kvm/Makefile      |    2 +-
> >>  arch/x86/kvm/vtd.c         |  191
> >>  ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vtd.h
> >>  |   36 ++++++++ include/asm-x86/kvm_host.h |   17 ++++
> >>  include/asm-x86/kvm_para.h |   14 +++
> >>  include/linux/kvm_host.h   |    8 ++
> >>  6 files changed, 267 insertions(+), 1 deletions(-)
> >>  create mode 100644 arch/x86/kvm/vtd.c
> >>  create mode 100644 arch/x86/kvm/vtd.h
> >> 
> >> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >> index d220b49..39b7c32 100644 --- a/include/linux/kvm_host.h
> >> +++ b/include/linux/kvm_host.h
> >> @@ -278,6 +278,14 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
> >>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
> >>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
> >> 
> >> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> >> +			unsigned long npages); +int
> kvm_iommu_map_guest(struct kvm *kvm,
> >> +			struct kvm_pci_passthrough_dev *pci_pt_dev);
> >> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> >> +int kvm_intel_iommu_found(void);
> >> +u64 kvm_intel_iommu_gfn_to_pfn(struct dmar_domain *domain, u64 gfn);
> >> +
> > 
> > Needn't to add kvm_intel_iommu_gfn_to_pfn() declaration here. It's
> > exposed by VT-d driver. 
> 
> I think it's better to declare kvm_intel_iommu_gfn_to_pfn() in vtd.h,
> because it's only used in vtd.c.
Agree.
Another option is to declare these functions in intel-iommu.h, since it
is implemented in intel-iommu.c.

Regards,
Ben

> 
> Randy (Weidong)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] KVM: PCIPT: VT-d support
  2008-07-06 10:52 ` [PATCH] VT-d : changes to support KVM Ben-Ami Yassour
@ 2008-07-06 10:52   ` Ben-Ami Yassour
  2008-07-09 15:49     ` Anthony Liguori
  0 siblings, 1 reply; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-06 10:52 UTC (permalink / raw)
  To: amit.shah; +Cc: kvm, muli, benami, weidong.han, Kay, Allen M

From: Kay, Allen M <allen.m.kay@intel.com>

This patch includes the functions to support VT-d for passthrough
devices.

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
---
 arch/x86/kvm/Makefile      |    2 +-
 arch/x86/kvm/vtd.c         |  189 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/kvm_host.h |   17 ++++
 include/asm-x86/kvm_para.h |   14 +++
 include/linux/kvm_host.h   |    6 ++
 5 files changed, 227 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..5d9d079 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -11,7 +11,7 @@ endif
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
-	i8254.o
+	i8254.o vtd.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..5abeef1
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct page *page;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	if (!domain)
+		return -EFAULT;
+
+	for (i = 0; i < npages; i++) {
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (pfn_valid(pfn)) {
+			rc = intel_iommu_page_mapping(domain,
+						      gfn << PAGE_SHIFT,
+						      pfn << PAGE_SHIFT,
+						      PAGE_SIZE,
+						      DMA_PTE_READ |
+						      DMA_PTE_WRITE);
+			if (rc) {
+				page = pfn_to_page(pfn);
+				put_page(page);
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			return 0;
+		}
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+	struct pci_dev *pdev = NULL;
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       pci_pt_dev->host.busnr,
+	       PCI_SLOT(pci_pt_dev->host.devfn),
+	       PCI_FUNC(pci_pt_dev->host.devfn));
+
+	for_each_pci_dev(pdev) {
+		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
+		    (pdev->devfn == pci_pt_dev->host.devfn)) {
+			break;
+		}
+	}
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+
+	if (kvm_iommu_map_memslots(kvm)) {
+		kvm_iommu_unmap_memslots(kvm);
+		return -EFAULT;
+	}
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	if (intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev)) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static int kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct page *page;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	if (!domain)
+		return -EFAULT;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn << PAGE_SHIFT);
+
+		if (pfn && pfn_valid(pfn)) {
+			page = pfn_to_page(pfn);
+			put_page(page);
+		}
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i, rc;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_pci_pt_dev_list *entry;
+	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->pt_dev.host.busnr,
+		       PCI_SLOT(entry->pt_dev.host.devfn),
+		       PCI_FUNC(entry->pt_dev.host.devfn));
+
+		for_each_pci_dev(pdev) {
+			if ((pdev->bus->number == entry->pt_dev.host.busnr) &&
+			    (pdev->devfn == entry->pt_dev.host.devfn))
+				break;
+		}
+
+		if (pdev == NULL)
+			return -ENODEV;
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain,
+				       pdev->bus->number, pdev->devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 41332f6..9391e57 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -308,6 +308,21 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
+struct kvm_pci_passthrough_dev_kernel {
+	struct kvm_pci_pt_info guest;
+	struct kvm_pci_pt_info host;
+	struct pci_dev *dev;
+};
+
+/* This list is to store the guest bus:device:function-irq and host
+ * bus:device:function-irq mapping for assigned devices.
+ */
+struct kvm_pci_pt_dev_list {
+	struct list_head list;
+	struct kvm_pci_passthrough_dev_kernel pt_dev;
+};
+
+
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -320,6 +335,8 @@ struct kvm_arch{
 	 * Hash table of struct kvm_mmu_page.
 	 */
 	struct list_head active_mmu_pages;
+	struct list_head pci_pt_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 76f3921..88153f4 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -144,4 +144,18 @@ static inline unsigned int kvm_arch_para_features(void)
 
 #endif
 
+/* Stores information for identifying host PCI devices assigned to the
+ * guest: this is used in the host kernel and in the userspace.
+ */
+struct kvm_pci_pt_info {
+	unsigned char busnr;
+	unsigned int devfn;
+	__u32 irq;
+};
+
+/* Mapping between host and guest PCI device */
+struct kvm_pci_passthrough_dev {
+	struct kvm_pci_pt_info guest;
+	struct kvm_pci_pt_info host;
+};
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fc685c5..424534b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -278,6 +278,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_pci_passthrough_dev *pci_pt_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* RE: [PATCH] KVM: PCIPT: VT-d support
  2008-07-03  4:55     ` Han, Weidong
@ 2008-07-06 11:15       ` Ben-Ami Yassour
  0 siblings, 0 replies; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-06 11:15 UTC (permalink / raw)
  To: Han, Weidong; +Cc: amit.shah, kvm, Muli Ben-Yehuda, Kay, Allen M

On Thu, 2008-07-03 at 12:55 +0800, Han, Weidong wrote:
> Ben-Ami Yassour wrote:
> > From: Kay, Allen M <allen.m.kay@intel.com>
> > 
> > This patch includes the functions to support VT-d for passthrough
> > devices.
> > 
> > [Ben: fixed memory pinning]
> > 
> > Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> > Signed-off-by: Weidong Han <weidong.han@intel.com>
> > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> > ---
> >  arch/x86/kvm/Makefile      |    2 +-
> >  arch/x86/kvm/vtd.c         |  191
> >  ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vtd.h     
> >  |   36 ++++++++ include/asm-x86/kvm_host.h |   17 ++++
> >  include/asm-x86/kvm_para.h |   14 +++
> >  include/linux/kvm_host.h   |    8 ++
> >  6 files changed, 267 insertions(+), 1 deletions(-)
> >  create mode 100644 arch/x86/kvm/vtd.c
> >  create mode 100644 arch/x86/kvm/vtd.h
> > 
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index d220b49..39b7c32 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -278,6 +278,14 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
> >  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
> >  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
> > 
> > +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> > +			unsigned long npages);
> > +int kvm_iommu_map_guest(struct kvm *kvm,
> > +			struct kvm_pci_passthrough_dev *pci_pt_dev);
> > +int kvm_iommu_unmap_guest(struct kvm *kvm);
> > +int kvm_intel_iommu_found(void);
> > +u64 kvm_intel_iommu_gfn_to_pfn(struct dmar_domain *domain, u64 gfn);
> > +
> 
> Needn't to add kvm_intel_iommu_gfn_to_pfn() declaration here. It's
> exposed by VT-d driver.

I sent a new version of the patches in which all the prototypes of the functions that are implemented in intel-iommu.c are in intel-iommu.h

> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-06 10:52   ` [PATCH] KVM: PCIPT: VT-d support Ben-Ami Yassour
@ 2008-07-09 15:49     ` Anthony Liguori
  2008-07-10  9:19       ` Ben-Ami Yassour
  0 siblings, 1 reply; 20+ messages in thread
From: Anthony Liguori @ 2008-07-09 15:49 UTC (permalink / raw)
  To: Ben-Ami Yassour; +Cc: amit.shah, kvm, muli, weidong.han, Kay, Allen M

Ben-Ami Yassour wrote:
> From: Kay, Allen M <allen.m.kay@intel.com>
>
> This patch includes the functions to support VT-d for passthrough
> devices.
>
> [Ben: fixed memory pinning]
>
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> ---
>  arch/x86/kvm/Makefile      |    2 +-
>  arch/x86/kvm/vtd.c         |  189 ++++++++++++++++++++++++++++++++++++++++++++
>  include/asm-x86/kvm_host.h |   17 ++++
>  include/asm-x86/kvm_para.h |   14 +++
>  include/linux/kvm_host.h   |    6 ++
>  5 files changed, 227 insertions(+), 1 deletions(-)
>  create mode 100644 arch/x86/kvm/vtd.c
>
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index d0e940b..5d9d079 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -11,7 +11,7 @@ endif
>  EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
>  
>  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
> -	i8254.o
> +	i8254.o vtd.o
>  obj-$(CONFIG_KVM) += kvm.o
>  kvm-intel-objs = vmx.o
>  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..5abeef1
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,189 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Author: Allen M. Kay <allen.m.kay@intel.com>
> + * Author: Weidong Han <weidong.han@intel.com>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +			gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct page *page;
> +	int i, rc;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	if (!domain)
> +		return -EFAULT;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (pfn_valid(pfn)) {
>   

As I've mentioned before, this is wrong.  We should add MMIO pages to 
the VT-d tables but at any rate, pfn_valid() doesn't work for checking 
if something is MMIO.

> +			rc = intel_iommu_page_mapping(domain,
> +						      gfn << PAGE_SHIFT,
> +						      pfn << PAGE_SHIFT,
> +						      PAGE_SIZE,
> +						      DMA_PTE_READ |
> +						      DMA_PTE_WRITE);
> +			if (rc) {
> +				page = pfn_to_page(pfn);
> +				put_page(page);
>   

This should be kvm_release_pfn_clean().

> +			}
> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +			       "invalid pfn=%lx\n", pfn);
> +			return 0;
> +		}
> +		gfn++;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc)
> +			return rc;
> +	}
> +	return 0;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_pci_passthrough_dev *pci_pt_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +
> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> +	       pci_pt_dev->host.busnr,
> +	       PCI_SLOT(pci_pt_dev->host.devfn),
> +	       PCI_FUNC(pci_pt_dev->host.devfn));
> +
> +	for_each_pci_dev(pdev) {
> +		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
> +		    (pdev->devfn == pci_pt_dev->host.devfn)) {
> +			break;
> +		}
> +	}
> +
> +	if (pdev == NULL) {
> +		if (kvm->arch.intel_iommu_domain) {
> +			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> +			kvm->arch.intel_iommu_domain = NULL;
> +		}
> +		return -ENODEV;
> +	}
> +
> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> +
> +	if (kvm_iommu_map_memslots(kvm)) {
> +		kvm_iommu_unmap_memslots(kvm);
> +		return -EFAULT;
> +	}
> +
> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> +			       pdev->bus->number, pdev->devfn);
> +
> +	if (intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> +					pdev)) {
> +		printk(KERN_ERR "Domain context map for %s failed",
> +		       pci_name(pdev));
> +		return -EFAULT;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_put_pages(struct kvm *kvm,
> +			       gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct page *page;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +	int i;
> +
> +	if (!domain)
> +		return -EFAULT;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn << PAGE_SHIFT);
> +
> +		if (pfn && pfn_valid(pfn)) {
> +			page = pfn_to_page(pfn);
> +			put_page(page);
> +		}
> +		gfn++;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc)
> +			return rc;
> +	}
> +	return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	struct kvm_pci_pt_dev_list *entry;
> +	struct pci_dev *pdev = NULL;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	if (!domain)
> +		return 0;
> +
> +	list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) {
> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
> +		       entry->pt_dev.host.busnr,
> +		       PCI_SLOT(entry->pt_dev.host.devfn),
> +		       PCI_FUNC(entry->pt_dev.host.devfn));
> +
> +		for_each_pci_dev(pdev) {
> +			if ((pdev->bus->number == entry->pt_dev.host.busnr) &&
> +			    (pdev->devfn == entry->pt_dev.host.devfn))
> +				break;
> +		}
> +
> +		if (pdev == NULL)
> +			return -ENODEV;
> +
> +		/* detach kvm dmar domain */
> +		intel_iommu_detach_dev(domain,
> +				       pdev->bus->number, pdev->devfn);
> +	}
> +	kvm_iommu_unmap_memslots(kvm);
> +	intel_iommu_domain_exit(domain);
> +	return 0;
> +}
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 41332f6..9391e57 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -308,6 +308,21 @@ struct kvm_mem_alias {
>  	gfn_t target_gfn;
>  };
>  
> +struct kvm_pci_passthrough_dev_kernel {
> +	struct kvm_pci_pt_info guest;
> +	struct kvm_pci_pt_info host;
> +	struct pci_dev *dev;
> +};
> +
> +/* This list is to store the guest bus:device:function-irq and host
> + * bus:device:function-irq mapping for assigned devices.
> + */
> +struct kvm_pci_pt_dev_list {
> +	struct list_head list;
> +	struct kvm_pci_passthrough_dev_kernel pt_dev;
> +};
> +
> +
>  struct kvm_arch{
>  	int naliases;
>  	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
> @@ -320,6 +335,8 @@ struct kvm_arch{
>  	 * Hash table of struct kvm_mmu_page.
>  	 */
>  	struct list_head active_mmu_pages;
> +	struct list_head pci_pt_dev_head;
> +	struct dmar_domain *intel_iommu_domain;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
> index 76f3921..88153f4 100644
> --- a/include/asm-x86/kvm_para.h
> +++ b/include/asm-x86/kvm_para.h
> @@ -144,4 +144,18 @@ static inline unsigned int kvm_arch_para_features(void)
>  
>  #endif
>  
> +/* Stores information for identifying host PCI devices assigned to the
> + * guest: this is used in the host kernel and in the userspace.
> + */
> +struct kvm_pci_pt_info {
> +	unsigned char busnr;
> +	unsigned int devfn;
> +	__u32 irq;
>   

Userspace should not have to provide the IRQ.  Just use the PCI device 
structure to find it.  We should use different structures for host/guest 
devices.

> +};
> +
> +/* Mapping between host and guest PCI device */
> +struct kvm_pci_passthrough_dev {
> +	struct kvm_pci_pt_info guest;
> +	struct kvm_pci_pt_info host;
> +};
>  #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index fc685c5..424534b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -278,6 +278,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>  
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +			unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_pci_passthrough_dev *pci_pt_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +
>  static inline void kvm_guest_enter(void)
>  {
>  	account_system_vtime(current);
>   

I'm not seeing these functions called anywhere?  Am I missing something 
obvious or is part of your patch missing?

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 20+ messages in thread

* VT-d pci passthrough patches
@ 2008-07-10  9:14 Ben-Ami Yassour
  2008-07-10  9:14 ` [PATCH] VT-d : changes to support KVM Ben-Ami Yassour
  0 siblings, 1 reply; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-10  9:14 UTC (permalink / raw)
  To: amit.shah; +Cc: kvm, muli, benami, weidong.han, anthony

The following two patches contains the VT-d support for pci passthrough
devices. 
The first patch contains the changes that are required to the generic
VT-d code.
The second patch contains the changes to KVM.

These patches contains fixes based on comments by Anthony,
on the pervious version.

The VT-d code is used by the pci-passthrough code to map the entire
guest memory in the IOMMU.
Once mmu-notifiers go in, we will be able to support finer grained forms
of protection.

The main differences from the previous version, sent by Weidong, are
fixes for the page pinning.
In the destruction path we need to call put_page for each page that was
mapped in the iommu and pinned during initialization..
The KVM process page tables are already released by the time the KVM vm
destroy path is executed.
Therefore we can not use gfn_to_pfn (which uses get_user_pages) to get
the pages.
The approach in the current patch is to find the pages to unpin by
looking at the IOMMU page table.

This patch applies to the Amit's PCI passthrough tree.

Any comments are welcome.

Regards,
Ben



^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] VT-d : changes to support KVM
  2008-07-10  9:14 VT-d pci passthrough patches Ben-Ami Yassour
@ 2008-07-10  9:14 ` Ben-Ami Yassour
  2008-07-10  9:14   ` [PATCH] KVM: PCIPT: VT-d support Ben-Ami Yassour
  0 siblings, 1 reply; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-10  9:14 UTC (permalink / raw)
  To: amit.shah; +Cc: kvm, muli, benami, weidong.han, anthony, Kay, Allen M

From: Kay, Allen M <allen.m.kay@intel.com>

This patch extends the VT-d driver to support KVM

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
---
 drivers/pci/dmar.c                           |    4 +-
 drivers/pci/intel-iommu.c                    |  117 +++++++++++++++++++++++++-
 drivers/pci/iova.c                           |    2 +-
 {drivers/pci => include/linux}/intel-iommu.h |   11 +++
 {drivers/pci => include/linux}/iova.h        |    0 
 5 files changed, 127 insertions(+), 7 deletions(-)
 rename {drivers/pci => include/linux}/intel-iommu.h (94%)
 rename {drivers/pci => include/linux}/iova.h (100%)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index f941f60..a58a5b0 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -26,8 +26,8 @@
 
 #include <linux/pci.h>
 #include <linux/dmar.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 66c0fd2..6ad2c75 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -20,6 +20,7 @@
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
+#undef DEBUG
 #include <linux/init.h>
 #include <linux/bitmap.h>
 #include <linux/debugfs.h>
@@ -33,8 +34,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
 #include <linux/timer.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
 #include <asm/cacheflush.h>
 #include <asm/gart.h>
@@ -160,7 +161,7 @@ static inline void *alloc_domain_mem(void)
 	return iommu_kmem_cache_alloc(iommu_domain_cache);
 }
 
-static inline void free_domain_mem(void *vaddr)
+static void free_domain_mem(void *vaddr)
 {
 	kmem_cache_free(iommu_domain_cache, vaddr);
 }
@@ -1414,7 +1415,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
  * find_domain
  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
  */
-struct dmar_domain *
+static struct dmar_domain *
 find_domain(struct pci_dev *pdev)
 {
 	struct device_domain_info *info;
@@ -2408,3 +2409,111 @@ int __init intel_iommu_init(void)
 	return 0;
 }
 
+void intel_iommu_domain_exit(struct dmar_domain *domain)
+{
+	u64 end;
+
+	/* Domain 0 is reserved, so dont process it */
+	if (!domain)
+		return;
+
+	end = DOMAIN_MAX_ADDR(domain->gaw);
+	end = end & (~PAGE_MASK_4K);
+
+	/* clear ptes */
+	dma_pte_clear_range(domain, 0, end);
+
+	/* free page tables */
+	dma_pte_free_pagetable(domain, 0, end);
+
+	iommu_free_domain(domain);
+	free_domain_mem(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
+
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+	struct dmar_domain *domain;
+	struct intel_iommu *iommu;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
+		return NULL;
+	}
+
+	iommu = drhd->iommu;
+	if (!iommu) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: iommu == NULL\n");
+		return NULL;
+	}
+	domain = iommu_alloc_domain(iommu);
+	if (!domain) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain == NULL\n");
+		return NULL;
+	}
+	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain_init() failed\n");
+		intel_iommu_domain_exit(domain);
+		return NULL;
+	}
+	return domain;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+
+int intel_iommu_context_mapping(
+	struct dmar_domain *domain, struct pci_dev *pdev)
+{
+	int rc;
+	rc = domain_context_mapping(domain, pdev);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+
+int intel_iommu_page_mapping(
+	struct dmar_domain *domain, dma_addr_t iova,
+	u64 hpa, size_t size, int prot)
+{
+	int rc;
+	rc = domain_page_mapping(domain, iova, hpa, size, prot);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+{
+	detach_domain_for_dev(domain, bus, devfn);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+
+struct dmar_domain *
+intel_iommu_find_domain(struct pci_dev *pdev)
+{
+	return find_domain(pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+
+int intel_iommu_found(void)
+{
+	return g_num_of_iommus;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_found);
+
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+{
+	struct dma_pte *pte;
+	u64 pfn;
+
+	pfn = 0;
+	pte = addr_to_dma_pte(domain, iova);
+
+	if (pte)
+		pfn = dma_pte_addr(*pte);
+
+	return pfn >> PAGE_SHIFT_4K;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 3ef4ac0..2287116 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -7,7 +7,7 @@
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
-#include "iova.h"
+#include <linux/iova.h>
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
diff --git a/drivers/pci/intel-iommu.h b/include/linux/intel-iommu.h
similarity index 94%
rename from drivers/pci/intel-iommu.h
rename to include/linux/intel-iommu.h
index afc0ad9..1490fc0 100644
--- a/drivers/pci/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -341,4 +341,15 @@ static inline void iommu_prepare_gfx_mapping(void)
 }
 #endif /* !CONFIG_DMAR_GFX_WA */
 
+void intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
+int intel_iommu_context_mapping(struct dmar_domain *domain,
+				struct pci_dev *pdev);
+int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+			     u64 hpa, size_t size, int prot);
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
+struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+int intel_iommu_found(void);
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+
 #endif
diff --git a/drivers/pci/iova.h b/include/linux/iova.h
similarity index 100%
rename from drivers/pci/iova.h
rename to include/linux/iova.h
-- 
1.5.6


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] KVM: PCIPT: VT-d support
  2008-07-10  9:14 ` [PATCH] VT-d : changes to support KVM Ben-Ami Yassour
@ 2008-07-10  9:14   ` Ben-Ami Yassour
  2008-07-10  9:30     ` Yang, Sheng
  0 siblings, 1 reply; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-10  9:14 UTC (permalink / raw)
  To: amit.shah; +Cc: kvm, muli, benami, weidong.han, anthony, Kay, Allen M

From: Kay, Allen M <allen.m.kay@intel.com>

This patch includes the functions to support VT-d for passthrough
devices.

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
---
 arch/x86/kvm/Makefile      |    2 +-
 arch/x86/kvm/vtd.c         |  176 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   10 +++
 include/asm-x86/kvm_host.h |    1 +
 include/linux/kvm_host.h   |    6 ++
 virt/kvm/kvm_main.c        |    6 ++
 6 files changed, 200 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..5d9d079 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -11,7 +11,7 @@ endif
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
-	i8254.o
+	i8254.o vtd.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..83efb8a
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	if (!domain)
+		return -EFAULT;
+
+	for (i = 0; i < npages; i++) {
+		pfn = gfn_to_pfn(kvm, gfn);
+		rc = intel_iommu_page_mapping(domain,
+					      gfn << PAGE_SHIFT,
+						      pfn << PAGE_SHIFT,
+					      PAGE_SIZE,
+					      DMA_PTE_READ |
+					      DMA_PTE_WRITE);
+		if (rc)
+			kvm_release_pfn_clean(pfn);
+
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+	struct pci_dev *pdev = NULL;
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       pci_pt_dev->host.busnr,
+	       PCI_SLOT(pci_pt_dev->host.devfn),
+	       PCI_FUNC(pci_pt_dev->host.devfn));
+
+	for_each_pci_dev(pdev) {
+		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
+		    (pdev->devfn == pci_pt_dev->host.devfn)) {
+			break;
+		}
+	}
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+
+	if (kvm_iommu_map_memslots(kvm)) {
+		kvm_iommu_unmap_memslots(kvm);
+		return -EFAULT;
+	}
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	if (intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev)) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static int kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	if (!domain)
+		return -EFAULT;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn << PAGE_SHIFT);
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i, rc;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_pci_pt_dev_list *entry;
+	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->pt_dev.host.busnr,
+		       PCI_SLOT(entry->pt_dev.host.devfn),
+		       PCI_FUNC(entry->pt_dev.host.devfn));
+
+		for_each_pci_dev(pdev) {
+			if ((pdev->bus->number == entry->pt_dev.host.busnr) &&
+			    (pdev->devfn == entry->pt_dev.host.devfn))
+				break;
+		}
+
+		if (pdev == NULL)
+			return -ENODEV;
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain,
+				       pdev->bus->number, pdev->devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d4d4e0c..19d2ae3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -333,6 +334,13 @@ static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm,
 	if (irqchip_in_kernel(kvm))
 		set_bit(dev->irq, pt_irq_handled);
 	write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+
+	if (intel_iommu_found()) {
+		r = kvm_iommu_map_guest(kvm, pci_pt_dev);
+		if (r)
+			goto out_put;
+	}
+
 out:
 	return r;
 out_put:
@@ -4272,6 +4280,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	if (intel_iommu_found())
+		kvm_iommu_unmap_guest(kvm);
 	kvm_free_pci_passthrough(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 0c6699f..953080e 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -354,6 +354,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head pci_pt_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d220b49..a16e2e3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -278,6 +278,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_pci_passthrough_dev *pci_pt_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 531d635..dc67d90 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -422,6 +423,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	if (intel_iommu_found())
+		kvm_iommu_map_pages(kvm, base_gfn, npages);
+
 	return 0;
 
 out_free:
-- 
1.5.6


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-09 15:49     ` Anthony Liguori
@ 2008-07-10  9:19       ` Ben-Ami Yassour
  0 siblings, 0 replies; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-10  9:19 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: amit.shah, kvm, Muli Ben-Yehuda, weidong.han, Kay, Allen M

On Wed, 2008-07-09 at 10:49 -0500, Anthony Liguori wrote:
> Ben-Ami Yassour wrote:
> > From: Kay, Allen M <allen.m.kay@intel.com>
> >
> > This patch includes the functions to support VT-d for passthrough
> > devices.
> >
> > [Ben: fixed memory pinning]
> >
> > Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> > Signed-off-by: Weidong Han <weidong.han@intel.com>
> > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> > ---
> >  arch/x86/kvm/Makefile      |    2 +-
> >  arch/x86/kvm/vtd.c         |  189 ++++++++++++++++++++++++++++++++++++++++++++
> >  include/asm-x86/kvm_host.h |   17 ++++
> >  include/asm-x86/kvm_para.h |   14 +++
> >  include/linux/kvm_host.h   |    6 ++
> >  5 files changed, 227 insertions(+), 1 deletions(-)
> >  create mode 100644 arch/x86/kvm/vtd.c
> >
> > diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> > index d0e940b..5d9d079 100644
> > --- a/arch/x86/kvm/Makefile
> > +++ b/arch/x86/kvm/Makefile
> > @@ -11,7 +11,7 @@ endif
> >  EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
> >  
> >  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
> > -	i8254.o
> > +	i8254.o vtd.o
> >  obj-$(CONFIG_KVM) += kvm.o
> >  kvm-intel-objs = vmx.o
> >  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> > diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> > new file mode 100644
> > index 0000000..5abeef1
> > --- /dev/null
> > +++ b/arch/x86/kvm/vtd.c
> > @@ -0,0 +1,189 @@
> > +/*
> > + * Copyright (c) 2006, Intel Corporation.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU General Public License,
> > + * version 2, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > + * more details.
> > + *
> > + * You should have received a copy of the GNU General Public License along with
> > + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> > + * Place - Suite 330, Boston, MA 02111-1307 USA.
> > + *
> > + * Copyright (C) 2006-2008 Intel Corporation
> > + * Author: Allen M. Kay <allen.m.kay@intel.com>
> > + * Author: Weidong Han <weidong.han@intel.com>
> > + */
> > +
> > +#include <linux/list.h>
> > +#include <linux/kvm_host.h>
> > +#include <linux/pci.h>
> > +#include <linux/dmar.h>
> > +#include <linux/intel-iommu.h>
> > +
> > +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> > +
> > +int kvm_iommu_map_pages(struct kvm *kvm,
> > +			gfn_t base_gfn, unsigned long npages)
> > +{
> > +	gfn_t gfn = base_gfn;
> > +	pfn_t pfn;
> > +	struct page *page;
> > +	int i, rc;
> > +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> > +
> > +	if (!domain)
> > +		return -EFAULT;
> > +
> > +	for (i = 0; i < npages; i++) {
> > +		pfn = gfn_to_pfn(kvm, gfn);
> > +		if (pfn_valid(pfn)) {
> >   
> 
> As I've mentioned before, this is wrong.  We should add MMIO pages to 
> the VT-d tables but at any rate, pfn_valid() doesn't work for checking 
> if something is MMIO.

removing the check.

> 
> > +			rc = intel_iommu_page_mapping(domain,
> > +						      gfn << PAGE_SHIFT,
> > +						      pfn << PAGE_SHIFT,
> > +						      PAGE_SIZE,
> > +						      DMA_PTE_READ |
> > +						      DMA_PTE_WRITE);
> > +			if (rc) {
> > +				page = pfn_to_page(pfn);
> > +				put_page(page);
> >   
> 
> This should be kvm_release_pfn_clean().

fixed in the new version.

> 
> > +			}
> > +		} else {
> > +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> > +			       "invalid pfn=%lx\n", pfn);
> > +			return 0;
> > +		}
> > +		gfn++;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int kvm_iommu_map_memslots(struct kvm *kvm)
> > +{
> > +	int i, rc;
> > +	for (i = 0; i < kvm->nmemslots; i++) {
> > +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> > +					 kvm->memslots[i].npages);
> > +		if (rc)
> > +			return rc;
> > +	}
> > +	return 0;
> > +}
> > +
> > +int kvm_iommu_map_guest(struct kvm *kvm,
> > +			struct kvm_pci_passthrough_dev *pci_pt_dev)
> > +{
> > +	struct pci_dev *pdev = NULL;
> > +
> > +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> > +	       pci_pt_dev->host.busnr,
> > +	       PCI_SLOT(pci_pt_dev->host.devfn),
> > +	       PCI_FUNC(pci_pt_dev->host.devfn));
> > +
> > +	for_each_pci_dev(pdev) {
> > +		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
> > +		    (pdev->devfn == pci_pt_dev->host.devfn)) {
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (pdev == NULL) {
> > +		if (kvm->arch.intel_iommu_domain) {
> > +			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> > +			kvm->arch.intel_iommu_domain = NULL;
> > +		}
> > +		return -ENODEV;
> > +	}
> > +
> > +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> > +
> > +	if (kvm_iommu_map_memslots(kvm)) {
> > +		kvm_iommu_unmap_memslots(kvm);
> > +		return -EFAULT;
> > +	}
> > +
> > +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> > +			       pdev->bus->number, pdev->devfn);
> > +
> > +	if (intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> > +					pdev)) {
> > +		printk(KERN_ERR "Domain context map for %s failed",
> > +		       pci_name(pdev));
> > +		return -EFAULT;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int kvm_iommu_put_pages(struct kvm *kvm,
> > +			       gfn_t base_gfn, unsigned long npages)
> > +{
> > +	gfn_t gfn = base_gfn;
> > +	pfn_t pfn;
> > +	struct page *page;
> > +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> > +	int i;
> > +
> > +	if (!domain)
> > +		return -EFAULT;
> > +
> > +	for (i = 0; i < npages; i++) {
> > +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> > +						     gfn << PAGE_SHIFT);
> > +
> > +		if (pfn && pfn_valid(pfn)) {
> > +			page = pfn_to_page(pfn);
> > +			put_page(page);

based on the comment above, changing this to: kvm_release_pfn_clean

> > +		}
> > +		gfn++;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> > +{
> > +	int i, rc;
> > +	for (i = 0; i < kvm->nmemslots; i++) {
> > +		rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> > +					 kvm->memslots[i].npages);
> > +		if (rc)
> > +			return rc;
> > +	}
> > +	return 0;
> > +}
> > +
> > +int kvm_iommu_unmap_guest(struct kvm *kvm)
> > +{
> > +	struct kvm_pci_pt_dev_list *entry;
> > +	struct pci_dev *pdev = NULL;
> > +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> > +
> > +	if (!domain)
> > +		return 0;
> > +
> > +	list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) {
> > +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
> > +		       entry->pt_dev.host.busnr,
> > +		       PCI_SLOT(entry->pt_dev.host.devfn),
> > +		       PCI_FUNC(entry->pt_dev.host.devfn));
> > +
> > +		for_each_pci_dev(pdev) {
> > +			if ((pdev->bus->number == entry->pt_dev.host.busnr) &&
> > +			    (pdev->devfn == entry->pt_dev.host.devfn))
> > +				break;
> > +		}
> > +
> > +		if (pdev == NULL)
> > +			return -ENODEV;
> > +
> > +		/* detach kvm dmar domain */
> > +		intel_iommu_detach_dev(domain,
> > +				       pdev->bus->number, pdev->devfn);
> > +	}
> > +	kvm_iommu_unmap_memslots(kvm);
> > +	intel_iommu_domain_exit(domain);
> > +	return 0;
> > +}
> > diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> > index 41332f6..9391e57 100644
> > --- a/include/asm-x86/kvm_host.h
> > +++ b/include/asm-x86/kvm_host.h
> > @@ -308,6 +308,21 @@ struct kvm_mem_alias {
> >  	gfn_t target_gfn;
> >  };
> >  
> > +struct kvm_pci_passthrough_dev_kernel {
> > +	struct kvm_pci_pt_info guest;
> > +	struct kvm_pci_pt_info host;
> > +	struct pci_dev *dev;
> > +};
> > +
> > +/* This list is to store the guest bus:device:function-irq and host
> > + * bus:device:function-irq mapping for assigned devices.
> > + */
> > +struct kvm_pci_pt_dev_list {
> > +	struct list_head list;
> > +	struct kvm_pci_passthrough_dev_kernel pt_dev;
> > +};
> > +
> > +
> >  struct kvm_arch{
> >  	int naliases;
> >  	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
> > @@ -320,6 +335,8 @@ struct kvm_arch{
> >  	 * Hash table of struct kvm_mmu_page.
> >  	 */
> >  	struct list_head active_mmu_pages;
> > +	struct list_head pci_pt_dev_head;
> > +	struct dmar_domain *intel_iommu_domain;
> >  	struct kvm_pic *vpic;
> >  	struct kvm_ioapic *vioapic;
> >  	struct kvm_pit *vpit;
> > diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
> > index 76f3921..88153f4 100644
> > --- a/include/asm-x86/kvm_para.h
> > +++ b/include/asm-x86/kvm_para.h
> > @@ -144,4 +144,18 @@ static inline unsigned int kvm_arch_para_features(void)
> >  
> >  #endif
> >  
> > +/* Stores information for identifying host PCI devices assigned to the
> > + * guest: this is used in the host kernel and in the userspace.
> > + */
> > +struct kvm_pci_pt_info {
> > +	unsigned char busnr;
> > +	unsigned int devfn;
> > +	__u32 irq;
> >   
> 
> Userspace should not have to provide the IRQ.  Just use the PCI device 
> structure to find it.  We should use different structures for host/guest 
> devices.

This is part of the PCIPT code (not the VT-d code).
I agree that it needs to change, but its not related to the VT-d code.
This struct was included in the patch just so the VT-d code can be
compiled we on the main KVM tree without the PCI passthrough code.
The new patch that I sent is rebased on Amit's tree.

> 
> > +};
> > +
> > +/* Mapping between host and guest PCI device */
> > +struct kvm_pci_passthrough_dev {
> > +	struct kvm_pci_pt_info guest;
> > +	struct kvm_pci_pt_info host;
> > +};
> >  #endif
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index fc685c5..424534b 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -278,6 +278,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
> >  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
> >  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
> >  
> > +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> > +			unsigned long npages);
> > +int kvm_iommu_map_guest(struct kvm *kvm,
> > +			struct kvm_pci_passthrough_dev *pci_pt_dev);
> > +int kvm_iommu_unmap_guest(struct kvm *kvm);
> > +
> >  static inline void kvm_guest_enter(void)
> >  {
> >  	account_system_vtime(current);
> >   
> 
> I'm not seeing these functions called anywhere?  Am I missing something 
> obvious or is part of your patch missing?

The 3 APIs above are the interface between the PCI passthrough code and
the VT-d code. I am resending the patch rebased on Amit's tree so you
can see the way that the PCI passthrough code is calling this interface.

Thanks,
Ben

> 
> Regards,
> 
> Anthony Liguori


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-10  9:14   ` [PATCH] KVM: PCIPT: VT-d support Ben-Ami Yassour
@ 2008-07-10  9:30     ` Yang, Sheng
  2008-07-10  9:51       ` Ben-Ami Yassour
  0 siblings, 1 reply; 20+ messages in thread
From: Yang, Sheng @ 2008-07-10  9:30 UTC (permalink / raw)
  To: kvm; +Cc: Ben-Ami Yassour, amit.shah, muli, weidong.han, anthony,
	Kay, Allen M

On Thursday 10 July 2008 17:14:42 Ben-Ami Yassour wrote:
> From: Kay, Allen M <allen.m.kay@intel.com>
>
> This patch includes the functions to support VT-d for passthrough
> devices.
>
> [Ben: fixed memory pinning]
>
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> ---
>  arch/x86/kvm/Makefile      |    2 +-
>  arch/x86/kvm/vtd.c         |  176
> ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c    
>     |   10 +++
>  include/asm-x86/kvm_host.h |    1 +
>  include/linux/kvm_host.h   |    6 ++
>  virt/kvm/kvm_main.c        |    6 ++
>  6 files changed, 200 insertions(+), 1 deletions(-)
>  create mode 100644 arch/x86/kvm/vtd.c
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 531d635..dc67d90 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -41,6 +41,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/mman.h>
>  #include <linux/swap.h>
> +#include <linux/intel-iommu.h>
>
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -422,6 +423,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  	}
>
>  	kvm_free_physmem_slot(&old, &new);
> +
> +	/* map the pages in iommu page table */
> +	if (intel_iommu_found())
> +		kvm_iommu_map_pages(kvm, base_gfn, npages);
> +

I don't understand why we need this along with  
kvm_iommu_map_memslots(). This works during the memory setup, and in 
kvm_iommu_map_guest() we do it again with the overlapped memory 
region?

I think even if we need pin all pages, we still just need do it 
once...

--
Thanks
Yang, Sheng

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-10  9:30     ` Yang, Sheng
@ 2008-07-10  9:51       ` Ben-Ami Yassour
  2008-07-10 10:07         ` Yang, Sheng
  0 siblings, 1 reply; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-10  9:51 UTC (permalink / raw)
  To: Yang, Sheng
  Cc: kvm, amit.shah, Muli Ben-Yehuda, weidong.han, anthony,
	Kay, Allen M

On Thu, 2008-07-10 at 17:30 +0800, Yang, Sheng wrote:
> On Thursday 10 July 2008 17:14:42 Ben-Ami Yassour wrote:
> > From: Kay, Allen M <allen.m.kay@intel.com>
> >
> > This patch includes the functions to support VT-d for passthrough
> > devices.
> >
> > [Ben: fixed memory pinning]
> >
> > Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> > Signed-off-by: Weidong Han <weidong.han@intel.com>
> > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> > ---
> >  arch/x86/kvm/Makefile      |    2 +-
> >  arch/x86/kvm/vtd.c         |  176
> > ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c    
> >     |   10 +++
> >  include/asm-x86/kvm_host.h |    1 +
> >  include/linux/kvm_host.h   |    6 ++
> >  virt/kvm/kvm_main.c        |    6 ++
> >  6 files changed, 200 insertions(+), 1 deletions(-)
> >  create mode 100644 arch/x86/kvm/vtd.c
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 531d635..dc67d90 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -41,6 +41,7 @@
> >  #include <linux/pagemap.h>
> >  #include <linux/mman.h>
> >  #include <linux/swap.h>
> > +#include <linux/intel-iommu.h>
> >
> >  #include <asm/processor.h>
> >  #include <asm/io.h>
> > @@ -422,6 +423,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
> >  	}
> >
> >  	kvm_free_physmem_slot(&old, &new);
> > +
> > +	/* map the pages in iommu page table */
> > +	if (intel_iommu_found())
> > +		kvm_iommu_map_pages(kvm, base_gfn, npages);
> > +
> 
> I don't understand why we need this along with  
> kvm_iommu_map_memslots(). This works during the memory setup, and in 
> kvm_iommu_map_guest() we do it again with the overlapped memory 
> region?
> 
> I think even if we need pin all pages, we still just need do it 
> once...

We map the entire guest memory on initialization by going over all the
existing memory slots.
If a new slot is created later then we need to map it as well, this is
the call that you see here.

Regards,
Ben



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-10  9:51       ` Ben-Ami Yassour
@ 2008-07-10 10:07         ` Yang, Sheng
  2008-07-10 14:41           ` Muli Ben-Yehuda
  0 siblings, 1 reply; 20+ messages in thread
From: Yang, Sheng @ 2008-07-10 10:07 UTC (permalink / raw)
  To: Ben-Ami Yassour
  Cc: kvm, amit.shah, Muli Ben-Yehuda, weidong.han, anthony,
	Kay, Allen M

On Thursday 10 July 2008 17:51:53 Ben-Ami Yassour wrote:
> On Thu, 2008-07-10 at 17:30 +0800, Yang, Sheng wrote:
> > On Thursday 10 July 2008 17:14:42 Ben-Ami Yassour wrote:
> > > From: Kay, Allen M <allen.m.kay@intel.com>
> > >
> > > This patch includes the functions to support VT-d for
> > > passthrough devices.
> > >
> > > [Ben: fixed memory pinning]
> > >
> > > Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> > > Signed-off-by: Weidong Han <weidong.han@intel.com>
> > > Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> > > ---
> > >  arch/x86/kvm/Makefile      |    2 +-
> > >  arch/x86/kvm/vtd.c         |  176
> > > ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c
> > >
> > >     |   10 +++
> > >
> > >  include/asm-x86/kvm_host.h |    1 +
> > >  include/linux/kvm_host.h   |    6 ++
> > >  virt/kvm/kvm_main.c        |    6 ++
> > >  6 files changed, 200 insertions(+), 1 deletions(-)
> > >  create mode 100644 arch/x86/kvm/vtd.c
> > >
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index 531d635..dc67d90 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -41,6 +41,7 @@
> > >  #include <linux/pagemap.h>
> > >  #include <linux/mman.h>
> > >  #include <linux/swap.h>
> > > +#include <linux/intel-iommu.h>
> > >
> > >  #include <asm/processor.h>
> > >  #include <asm/io.h>
> > > @@ -422,6 +423,11 @@ int __kvm_set_memory_region(struct kvm
> > > *kvm, }
> > >
> > >  	kvm_free_physmem_slot(&old, &new);
> > > +
> > > +	/* map the pages in iommu page table */
> > > +	if (intel_iommu_found())
> > > +		kvm_iommu_map_pages(kvm, base_gfn, npages);
> > > +
> >
> > I don't understand why we need this along with
> > kvm_iommu_map_memslots(). This works during the memory setup, and
> > in kvm_iommu_map_guest() we do it again with the overlapped
> > memory region?
> >
> > I think even if we need pin all pages, we still just need do it
> > once...
>
> We map the entire guest memory on initialization by going over all
> the existing memory slots.
> If a new slot is created later then we need to map it as well, this
> is the call that you see here.

I think it's may be unnecessary to map pages when device assigned. The 
table can be set up along with set_memory_region(), it covered all 
memory slots already, or I miss something here?

-- 
Thanks
Yang, Sheng

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-10 10:07         ` Yang, Sheng
@ 2008-07-10 14:41           ` Muli Ben-Yehuda
  2008-07-10 14:57             ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Muli Ben-Yehuda @ 2008-07-10 14:41 UTC (permalink / raw)
  To: Yang, Sheng
  Cc: Ben-Ami Yassour1, kvm, amit.shah, weidong.han, anthony,
	Kay, Allen M

On Thu, Jul 10, 2008 at 06:07:27PM +0800, Yang, Sheng wrote:

> I think it's may be unnecessary to map pages when device
> assigned. The table can be set up along with set_memory_region(), it
> covered all memory slots already, or I miss something here?

VT-d is only initialized after the slots are originally created, so
when VT-d is initialized is map all of the existing slots separately,
and then for each new slot that may be added we'll catch it via
set_memory_region().

Cheers,
Muli

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-10 14:41           ` Muli Ben-Yehuda
@ 2008-07-10 14:57             ` Avi Kivity
  2008-07-10 16:00               ` Muli Ben-Yehuda
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-07-10 14:57 UTC (permalink / raw)
  To: Muli Ben-Yehuda
  Cc: Yang, Sheng, Ben-Ami Yassour1, kvm, amit.shah, weidong.han,
	anthony, Kay, Allen M

Muli Ben-Yehuda wrote:
> On Thu, Jul 10, 2008 at 06:07:27PM +0800, Yang, Sheng wrote:
>
>   
>> I think it's may be unnecessary to map pages when device
>> assigned. The table can be set up along with set_memory_region(), it
>> covered all memory slots already, or I miss something here?
>>     
>
> VT-d is only initialized after the slots are originally created, so
> when VT-d is initialized is map all of the existing slots separately,
> and then for each new slot that may be added we'll catch it via
> set_memory_region().
>   

It makes sense to initialize VT-d before.  Since memslots can be created 
and destroyed dynamically, with the current implementation we can see

  create slot
  create slot
  init VT-d
  create slot

which means we need to support both slot-creation-after-VT-d and 
init-VT-d-after-slot-creation.  If we initialize VT-d up front, we only 
need to support (and test) one scenario.

On the other hand, this means that you will not be able to assign 
devices unless you specified this when creating the VM; but I think this 
is fair.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-10 14:57             ` Avi Kivity
@ 2008-07-10 16:00               ` Muli Ben-Yehuda
  2008-07-13  7:34                 ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Muli Ben-Yehuda @ 2008-07-10 16:00 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Yang, Sheng, Ben-Ami Yassour1, kvm, amit.shah, weidong.han,
	anthony, Kay, Allen M

On Thu, Jul 10, 2008 at 05:57:56PM +0300, Avi Kivity wrote:

> It makes sense to initialize VT-d before.  Since memslots can be
> created and destroyed dynamically, with the current implementation
> we can see
>
>  create slot
>  create slot
>  init VT-d
>  create slot
>
> which means we need to support both slot-creation-after-VT-d and
> init-VT-d-after-slot-creation.  If we initialize VT-d up front, we
> only need to support (and test) one scenario.

Fair enough---we'll take a look at whether initializing VT-d first
introduces any complications other than hot-plug (see below).

> On the other hand, this means that you will not be able to assign
> devices unless you specified this when creating the VM; but I think
> this is fair.

It will be nice to support hot-plugged pass-through devices some time
in the future.

Cheers,
Muli

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-10 16:00               ` Muli Ben-Yehuda
@ 2008-07-13  7:34                 ` Avi Kivity
  2008-07-14  7:49                   ` Ben-Ami Yassour
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-07-13  7:34 UTC (permalink / raw)
  To: Muli Ben-Yehuda
  Cc: Yang, Sheng, Ben-Ami Yassour1, kvm, amit.shah, weidong.han,
	anthony, Kay, Allen M

Muli Ben-Yehuda wrote:
>
>> On the other hand, this means that you will not be able to assign
>> devices unless you specified this when creating the VM; but I think
>> this is fair.
>>     
>
> It will be nice to support hot-plugged pass-through devices some time
> in the future.
>
>   

And it should be easy to do too.  The question is whether we need to 
allocate the iommu domain up front (which would mean that a command line 
parameter would be needed, meaning roughly 'support pci device 
assignment on this guest'.  Kind of like -usb, which means 'support usb 
device assignment on this guest'.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-13  7:34                 ` Avi Kivity
@ 2008-07-14  7:49                   ` Ben-Ami Yassour
  2008-07-14  8:48                     ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Ben-Ami Yassour @ 2008-07-14  7:49 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Muli Ben-Yehuda, Yang, Sheng, kvm, amit.shah, weidong.han,
	anthony, Kay, Allen M

On Sun, 2008-07-13 at 10:34 +0300, Avi Kivity wrote:
> Muli Ben-Yehuda wrote:
> >
> >> On the other hand, this means that you will not be able to assign
> >> devices unless you specified this when creating the VM; but I think
> >> this is fair.
> >>     
> >
> > It will be nice to support hot-plugged pass-through devices some time
> > in the future.
> >
> >   
> 
> And it should be easy to do too.  The question is whether we need to 
> allocate the iommu domain up front (which would mean that a command line 
> parameter would be needed, meaning roughly 'support pci device 
> assignment on this guest'.  Kind of like -usb, which means 'support usb 
> device assignment on this guest'.
> 

I don't think that its that simple...
On a single machine you can have multiple iommu units, each of which controls certain PCI slots.
To configure the iommu for a specific device you need to configure the iommu unit that controls that device.
This means that we need to know the bus/dev/func of the device before we create the VT-d domain.
For hot-plug devices it means that by the time we can create the domain we already have memory slots that we need to add to the VT-d mapping.

In addition, some times it is desirable to be able to map the memory as late as possible.
For example if the guest supports PVDMA then there is no need to map the entire guest memory, 
however the host can not tell if the guest supports PVDMA before the memory slots are created.
In this case we would like to postpone the mapping of the entire guest memory until we know we have to.

Regards,
Ben



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] KVM: PCIPT: VT-d support
  2008-07-14  7:49                   ` Ben-Ami Yassour
@ 2008-07-14  8:48                     ` Avi Kivity
  0 siblings, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2008-07-14  8:48 UTC (permalink / raw)
  To: Ben-Ami Yassour
  Cc: Muli Ben-Yehuda, Yang, Sheng, kvm, amit.shah, weidong.han,
	anthony, Kay, Allen M

Ben-Ami Yassour wrote:
> On Sun, 2008-07-13 at 10:34 +0300, Avi Kivity wrote:
>   
>> Muli Ben-Yehuda wrote:
>>     
>>>> On the other hand, this means that you will not be able to assign
>>>> devices unless you specified this when creating the VM; but I think
>>>> this is fair.
>>>>     
>>>>         
>>> It will be nice to support hot-plugged pass-through devices some time
>>> in the future.
>>>
>>>   
>>>       
>> And it should be easy to do too.  The question is whether we need to 
>> allocate the iommu domain up front (which would mean that a command line 
>> parameter would be needed, meaning roughly 'support pci device 
>> assignment on this guest'.  Kind of like -usb, which means 'support usb 
>> device assignment on this guest'.
>>
>>     
>
> I don't think that its that simple...
> On a single machine you can have multiple iommu units, each of which controls certain PCI slots.
> To configure the iommu for a specific device you need to configure the iommu unit that controls that device.
> This means that we need to know the bus/dev/func of the device before we create the VT-d domain.
> For hot-plug devices it means that by the time we can create the domain we already have memory slots that we need to add to the VT-d mapping.
>
>   

Thanks for the info.  Looks like we have to support both 
memslot-before-iommu and memslot-after-iommu then.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2008-07-14  8:48 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-10  9:14 VT-d pci passthrough patches Ben-Ami Yassour
2008-07-10  9:14 ` [PATCH] VT-d : changes to support KVM Ben-Ami Yassour
2008-07-10  9:14   ` [PATCH] KVM: PCIPT: VT-d support Ben-Ami Yassour
2008-07-10  9:30     ` Yang, Sheng
2008-07-10  9:51       ` Ben-Ami Yassour
2008-07-10 10:07         ` Yang, Sheng
2008-07-10 14:41           ` Muli Ben-Yehuda
2008-07-10 14:57             ` Avi Kivity
2008-07-10 16:00               ` Muli Ben-Yehuda
2008-07-13  7:34                 ` Avi Kivity
2008-07-14  7:49                   ` Ben-Ami Yassour
2008-07-14  8:48                     ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2008-07-06 10:52 VT-d pci passthrough patches Ben-Ami Yassour
2008-07-06 10:52 ` [PATCH] VT-d : changes to support KVM Ben-Ami Yassour
2008-07-06 10:52   ` [PATCH] KVM: PCIPT: VT-d support Ben-Ami Yassour
2008-07-09 15:49     ` Anthony Liguori
2008-07-10  9:19       ` Ben-Ami Yassour
2008-07-02 15:33 VT-d pci passthrough patches Ben-Ami Yassour
2008-07-02 15:33 ` [PATCH] VT-d : changes to support KVM Ben-Ami Yassour
2008-07-02 15:33   ` [PATCH] KVM: PCIPT: VT-d support Ben-Ami Yassour
2008-07-03  4:55     ` Han, Weidong
2008-07-06 11:15       ` Ben-Ami Yassour
     [not found]     ` <08DF4D958216244799FC84F3514D70F00181AB10@pdsmsx415.ccr.corp.intel.com>
2008-07-03  6:16       ` Han, Weidong
2008-07-03 11:33         ` Ben-Ami Yassour

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox