[PATCH] [RESEND] VT-d: Support multiple device assignment to one guest

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
@ 2008-10-06  6:38 Han, Weidong
  2008-10-07 10:04 ` Zhang, Xiantao
  2008-10-07 13:29 ` Avi Kivity
  0 siblings, 2 replies; 20+ messages in thread
From: Han, Weidong @ 2008-10-06  6:38 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami, muli

[-- Attachment #1: Type: text/plain, Size: 14461 bytes --]

[Rebased the patch due to my mmio's patch (commit: 0d679782) was checked
in]

>From 9e68fc762358cc44cfec3968ac5ec65324ce04d7 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 6 Oct 2008 14:02:18 +0800
Subject: [PATCH] Support multiple device assignment to one guest

Current VT-d patches in kvm only support one device assignment to one
guest due to dmar_domain is per device.

In order to support multiple device assignemnt, this patch wraps
dmar_domain with a reference count (kvm_vtd_domain), and also adds a
pointer in kvm_assigned_dev_kernel to link to a kvm_vtd_domain.

Each dmar_domain owns one VT-d page table, in order to reduce page
tables and improve IOTLB utility, the devices assigned to the same guest
and under the same IOMMU share the same kvm_vtd_domain.

Signed-off-by: Weidong Han <weidong.han@intel.com>
---
 arch/x86/kvm/vtd.c          |  198
+++++++++++++++++++++++++++----------------
 arch/x86/kvm/x86.c          |   22 +++--
 drivers/pci/intel-iommu.c   |   16 ++++
 include/asm-x86/kvm_host.h  |    1 -
 include/linux/intel-iommu.h |    1 +
 include/linux/kvm_host.h    |   21 +++++
 6 files changed, 177 insertions(+), 82 deletions(-)

diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
index a770874..7552f92 100644
--- a/arch/x86/kvm/vtd.c
+++ b/arch/x86/kvm/vtd.c
@@ -27,19 +27,40 @@
 #include <linux/dmar.h>
 #include <linux/intel-iommu.h>
 
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_domain_pages(struct dmar_domain *domain,
+                                       gfn_t base_gfn, unsigned long
npages)
+{
+        gfn_t gfn = base_gfn;
+        pfn_t pfn;
+        int i;
+
+        for (i = 0; i < npages; i++) {
+                pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+                                                     gfn_to_gpa(gfn));
+                kvm_release_pfn_clean(pfn);
+                gfn++;
+        }
+}
+
 static void kvm_iommu_put_pages(struct kvm *kvm,
-				gfn_t base_gfn, unsigned long npages);
+                                gfn_t base_gfn, unsigned long npages)
+{
+        struct kvm_assigned_dev_kernel *assigned_dev;
 
-int kvm_iommu_map_pages(struct kvm *kvm,
-			gfn_t base_gfn, unsigned long npages)
+        list_for_each_entry(assigned_dev, &kvm->arch.assigned_dev_head,
list) {
+
kvm_iommu_put_domain_pages(assigned_dev->vtd_domain->domain,
+                                           base_gfn, npages);
+        }
+}
+
+static int kvm_iommu_map_domain_pages(struct kvm *kvm,
+                                      struct dmar_domain *domain,
+                                      gfn_t base_gfn, unsigned long
npages)
 {
 	gfn_t gfn = base_gfn;
 	pfn_t pfn;
 	int i, r = 0;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
 
-	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
@@ -58,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 					     DMA_PTE_READ |
 					     DMA_PTE_WRITE);
 		if (r) {
-			printk(KERN_ERR "kvm_iommu_map_pages:"
+			printk(KERN_ERR "kvm_iommu_map_domain_pages:"
 			       "iommu failed to map pfn=%lx\n", pfn);
 			goto unmap_pages;
 		}
@@ -67,18 +88,40 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	return 0;
 
 unmap_pages:
-	kvm_iommu_put_pages(kvm, base_gfn, i);
+	kvm_iommu_put_domain_pages(domain, base_gfn, i);
 	return r;
 }
 
-static int kvm_iommu_map_memslots(struct kvm *kvm)
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	int r = 0;
+        struct kvm_assigned_dev_kernel *assigned_dev;
+
+        list_for_each_entry(assigned_dev, &kvm->arch.assigned_dev_head,
list) {
+		r = kvm_iommu_map_domain_pages(kvm,
+			assigned_dev->vtd_domain->domain,
+			base_gfn, npages);
+		if (r)
+			goto unmap_pages;
+	}
+
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, npages);
+	return r;
+}
+
+static int kvm_iommu_map_domain_memslots(struct kvm *kvm,
+					 struct dmar_domain *domain)
 {
 	int i, r;
 
 	down_read(&kvm->slots_lock);
 	for (i = 0; i < kvm->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
-					kvm->memslots[i].npages);
+		r = kvm_iommu_map_domain_pages(kvm, domain,
+			 kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
 		if (r)
 			break;
 	}
@@ -86,10 +129,23 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	return r;
 }
 
+static void kvm_iommu_unmap_domain_memslots(struct kvm *kvm,
+					    struct dmar_domain *domain)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_domain_pages(domain,
+			kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+}
+
 int kvm_iommu_map_guest(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev)
 {
 	struct pci_dev *pdev = NULL;
+	struct kvm_vtd_domain *vtd_dom = NULL;
 	int r;
 
 	if (!intel_iommu_found()) {
@@ -103,89 +159,83 @@ int kvm_iommu_map_guest(struct kvm *kvm,
 	       PCI_FUNC(assigned_dev->host_devfn));
 
 	pdev = assigned_dev->dev;
+	vtd_dom = assigned_dev->vtd_domain;
 
-	if (pdev == NULL) {
-		if (kvm->arch.intel_iommu_domain) {
-
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
-			kvm->arch.intel_iommu_domain = NULL;
-		}
-		return -ENODEV;
-	}
-
-	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
-	if (!kvm->arch.intel_iommu_domain)
-		return -ENODEV;
-
-	r = kvm_iommu_map_memslots(kvm);
-	if (r)
-		goto out_unmap;
-
-	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+	intel_iommu_detach_dev(vtd_dom->domain,
 			       pdev->bus->number, pdev->devfn);
 
-	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
-					pdev);
+	r = intel_iommu_context_mapping(vtd_dom->domain, pdev);
 	if (r) {
 		printk(KERN_ERR "Domain context map for %s failed",
 		       pci_name(pdev));
-		goto out_unmap;
+		return r;
 	}
-	return 0;
 
-out_unmap:
-	kvm_iommu_unmap_memslots(kvm);
-	return r;
+	return 0;
 }
 
-static void kvm_iommu_put_pages(struct kvm *kvm,
-			       gfn_t base_gfn, unsigned long npages)
+int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
-	gfn_t gfn = base_gfn;
-	pfn_t pfn;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-	int i;
+        struct kvm_assigned_dev_kernel *entry;
 
-	for (i = 0; i < npages; i++) {
-		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
-						     gfn_to_gpa(gfn));
-		kvm_release_pfn_clean(pfn);
-		gfn++;
-	}
+        list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list)
{
+                printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+                       entry->host_busnr,
+                       PCI_SLOT(entry->host_devfn),
+                       PCI_FUNC(entry->host_devfn));
+
+                /* detach kvm dmar domain */
+                intel_iommu_detach_dev(entry->vtd_domain->domain,
+				       entry->host_busnr,
+                                       entry->host_devfn);
+        }
+
+        return 0;
 }
 
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+struct kvm_vtd_domain *get_kvm_vtd_domain(struct kvm *kvm,
+		 			  struct pci_dev *pdev)
 {
-	int i;
-	down_read(&kvm->slots_lock);
-	for (i = 0; i < kvm->nmemslots; i++) {
-		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
-				    kvm->memslots[i].npages);
+	struct kvm_vtd_domain *vtd_dom = NULL;
+	struct intel_iommu *iommu = NULL;
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	iommu = intel_iommu_device_get_iommu(pdev);
+
+	list_for_each_entry(assigned_dev, &kvm->arch.assigned_dev_head,
list) {
+		if (iommu == assigned_dev->vtd_domain->domain->iommu)
+			return assigned_dev->vtd_domain;
 	}
-	up_read(&kvm->slots_lock);
 
-	return 0;
-}
+	vtd_dom = kzalloc(sizeof(struct kvm_vtd_domain), GFP_KERNEL);
+	if (!vtd_dom)
+		return NULL;
 
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	struct kvm_assigned_dev_kernel *entry;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	vtd_dom->domain = intel_iommu_domain_alloc(pdev);
+	if (!vtd_dom->domain) {
+		kfree(vtd_dom);
+		return NULL;
+	}
 
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
+	if (kvm_iommu_map_domain_memslots(kvm, vtd_dom->domain)) {
+		kvm_iommu_unmap_domain_memslots(kvm, vtd_dom->domain);
+		intel_iommu_domain_exit(vtd_dom->domain);
+		kfree(vtd_dom);
+		return NULL;
+	}
 
-	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
-		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
-		       entry->host_busnr,
-		       PCI_SLOT(entry->host_devfn),
-		       PCI_FUNC(entry->host_devfn));
+	return vtd_dom;
+}
 
-		/* detach kvm dmar domain */
-		intel_iommu_detach_dev(domain, entry->host_busnr,
-				       entry->host_devfn);
+void release_kvm_vtd_domain(struct kvm *kvm, struct kvm_vtd_domain
*vtd_dom)
+{
+	if (vtd_dom == NULL)
+		return;
+
+	if (--vtd_dom->dev_count == 0) {
+		kvm_iommu_unmap_domain_memslots(kvm, vtd_dom->domain);
+		intel_iommu_domain_exit(vtd_dom->domain);
+		kfree(vtd_dom);
+		vtd_dom = NULL;
 	}
-	kvm_iommu_unmap_memslots(kvm);
-	intel_iommu_domain_exit(domain);
-	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 675fcc1..a1d6ede 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -188,6 +188,8 @@ static void kvm_free_assigned_device(struct kvm
*kvm,
 	pci_disable_device(assigned_dev->dev);
 	pci_dev_put(assigned_dev->dev);
 
+	release_kvm_vtd_domain(kvm, assigned_dev->vtd_domain);
+
 	list_del(&assigned_dev->list);
 	kfree(assigned_dev);
 }
@@ -280,7 +282,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm
*kvm,
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_dev->assigned_dev_id);
 	if (match) {
-		/* device already assigned */
+		printk(KERN_ERR "%s: device (%x:%x.%x) is already
assigned\n",
+		       __func__, match->host_busnr,
+		       PCI_SLOT(match->host_devfn),
+		       PCI_FUNC(match->host_devfn));
 		r = -EINVAL;
 		goto out;
 	}
@@ -317,21 +322,24 @@ static int kvm_vm_ioctl_assign_device(struct kvm
*kvm,
 
 	match->kvm = kvm;
 
-	list_add(&match->list, &kvm->arch.assigned_dev_head);
-
 	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		match->vtd_domain = get_kvm_vtd_domain(kvm, dev);
+		if (match->vtd_domain == NULL)
+			goto out_disable;
+		match->vtd_domain->dev_count++;
+
 		r = kvm_iommu_map_guest(kvm, match);
 		if (r)
-			goto out_list_del;
+			goto out_disable;
 	}
+	
+	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
-out_list_del:
-	list_del(&match->list);
-	pci_release_regions(dev);
 out_disable:
+	pci_release_regions(dev);
 	pci_disable_device(dev);
 out_put:
 	pci_dev_put(dev);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 089ba3f..ccb2fd3 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2561,3 +2561,19 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain
*domain, u64 iova)
 	return pfn >> PAGE_SHIFT_4K;
 }
 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
+
+struct intel_iommu *intel_iommu_device_get_iommu(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "%s: cannot find drhd for %x:%x.%x\n",
+		       __func__, pdev->bus->number,
+		       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+		return NULL;
+	}
+
+	return drhd->iommu;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_device_get_iommu);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 53995a8..a48dcfe 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -351,7 +351,6 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
-	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 5fa9d26..7801aa4 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -350,6 +350,7 @@ int intel_iommu_page_mapping(struct dmar_domain
*domain, dma_addr_t iova,
 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8
devfn);
 struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+struct intel_iommu *intel_iommu_device_get_iommu(struct pci_dev *pdev);
 
 #ifdef CONFIG_DMAR
 int intel_iommu_found(void);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 73b7c52..7a3e1b6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -293,6 +293,11 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
+struct kvm_vtd_domain {
+	int dev_count;			/* number of assigned devices */
+	struct dmar_domain *domain;
+};
+
 struct kvm_assigned_dev_kernel {
 	struct kvm_irq_ack_notifier ack_notifier;
 	struct work_struct interrupt_work;
@@ -305,6 +310,7 @@ struct kvm_assigned_dev_kernel {
 	int irq_requested;
 	struct pci_dev *dev;
 	struct kvm *kvm;
+	struct kvm_vtd_domain *vtd_domain;
 };
 
 #ifdef CONFIG_DMAR
@@ -313,6 +319,9 @@ int kvm_iommu_map_pages(struct kvm *kvm, gfn_t
base_gfn,
 int kvm_iommu_map_guest(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
+struct kvm_vtd_domain *get_kvm_vtd_domain(struct kvm *kvm,
+					  struct pci_dev *pdev);
+void release_kvm_vtd_domain(struct kvm *kvm, struct kvm_vtd_domain
*vtd_dom);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      gfn_t base_gfn,
@@ -332,6 +341,18 @@ static inline int kvm_iommu_unmap_guest(struct kvm
*kvm)
 {
 	return 0;
 }
+
+static inline struct kvm_vtd_domain *get_kvm_vtd_domain(struct kvm
*kvm,
+					  		struct pci_dev
*pdev)
+{
+	return NULL;
+}
+
+static inline void release_kvm_vtd_domain(struct kvm *kvm,
+					  struct kvm_vtd_domain
*vtd_dom)
+{
+	return;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
-- 
1.5.1

[-- Attachment #2: 0001-Support-multiple-device-assignment-to-one-guest-v2.patch --]
[-- Type: application/octet-stream, Size: 13911 bytes --]

From 9e68fc762358cc44cfec3968ac5ec65324ce04d7 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 6 Oct 2008 14:02:18 +0800
Subject: [PATCH] Support multiple device assignment to one guest

Current VT-d patches in kvm only support one device assignment to one guest due to dmar_domain is per device.

In order to support multiple device assignemnt, this patch wraps dmar_domain with a reference count (kvm_vtd_domain), and also adds a pointer in kvm_assigned_dev_kernel to link to a kvm_vtd_domain.

Each dmar_domain owns one VT-d page table, in order to reduce page tables and improve IOTLB utility, the devices assigned to the same guest and under the same IOMMU share the same kvm_vtd_domain.

Signed-off-by: Weidong Han <weidong.han@intel.com>
---
 arch/x86/kvm/vtd.c          |  198 +++++++++++++++++++++++++++----------------
 arch/x86/kvm/x86.c          |   22 +++--
 drivers/pci/intel-iommu.c   |   16 ++++
 include/asm-x86/kvm_host.h  |    1 -
 include/linux/intel-iommu.h |    1 +
 include/linux/kvm_host.h    |   21 +++++
 6 files changed, 177 insertions(+), 82 deletions(-)

diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
index a770874..7552f92 100644
--- a/arch/x86/kvm/vtd.c
+++ b/arch/x86/kvm/vtd.c
@@ -27,19 +27,40 @@
 #include <linux/dmar.h>
 #include <linux/intel-iommu.h>
 
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_domain_pages(struct dmar_domain *domain,
+                                       gfn_t base_gfn, unsigned long npages)
+{
+        gfn_t gfn = base_gfn;
+        pfn_t pfn;
+        int i;
+
+        for (i = 0; i < npages; i++) {
+                pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+                                                     gfn_to_gpa(gfn));
+                kvm_release_pfn_clean(pfn);
+                gfn++;
+        }
+}
+
 static void kvm_iommu_put_pages(struct kvm *kvm,
-				gfn_t base_gfn, unsigned long npages);
+                                gfn_t base_gfn, unsigned long npages)
+{
+        struct kvm_assigned_dev_kernel *assigned_dev;
 
-int kvm_iommu_map_pages(struct kvm *kvm,
-			gfn_t base_gfn, unsigned long npages)
+        list_for_each_entry(assigned_dev, &kvm->arch.assigned_dev_head, list) {
+                kvm_iommu_put_domain_pages(assigned_dev->vtd_domain->domain,
+                                           base_gfn, npages);
+        }
+}
+
+static int kvm_iommu_map_domain_pages(struct kvm *kvm,
+                                      struct dmar_domain *domain,
+                                      gfn_t base_gfn, unsigned long npages)
 {
 	gfn_t gfn = base_gfn;
 	pfn_t pfn;
 	int i, r = 0;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
 
-	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
@@ -58,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 					     DMA_PTE_READ |
 					     DMA_PTE_WRITE);
 		if (r) {
-			printk(KERN_ERR "kvm_iommu_map_pages:"
+			printk(KERN_ERR "kvm_iommu_map_domain_pages:"
 			       "iommu failed to map pfn=%lx\n", pfn);
 			goto unmap_pages;
 		}
@@ -67,18 +88,40 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	return 0;
 
 unmap_pages:
-	kvm_iommu_put_pages(kvm, base_gfn, i);
+	kvm_iommu_put_domain_pages(domain, base_gfn, i);
 	return r;
 }
 
-static int kvm_iommu_map_memslots(struct kvm *kvm)
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	int r = 0;
+        struct kvm_assigned_dev_kernel *assigned_dev;
+
+        list_for_each_entry(assigned_dev, &kvm->arch.assigned_dev_head, list) {
+		r = kvm_iommu_map_domain_pages(kvm,
+			assigned_dev->vtd_domain->domain,
+			base_gfn, npages);
+		if (r)
+			goto unmap_pages;
+	}
+
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, npages);
+	return r;
+}
+
+static int kvm_iommu_map_domain_memslots(struct kvm *kvm,
+					 struct dmar_domain *domain)
 {
 	int i, r;
 
 	down_read(&kvm->slots_lock);
 	for (i = 0; i < kvm->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
-					kvm->memslots[i].npages);
+		r = kvm_iommu_map_domain_pages(kvm, domain,
+			 kvm->memslots[i].base_gfn, kvm->memslots[i].npages);
 		if (r)
 			break;
 	}
@@ -86,10 +129,23 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	return r;
 }
 
+static void kvm_iommu_unmap_domain_memslots(struct kvm *kvm,
+					    struct dmar_domain *domain)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_domain_pages(domain,
+			kvm->memslots[i].base_gfn, kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+}
+
 int kvm_iommu_map_guest(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev)
 {
 	struct pci_dev *pdev = NULL;
+	struct kvm_vtd_domain *vtd_dom = NULL;
 	int r;
 
 	if (!intel_iommu_found()) {
@@ -103,89 +159,83 @@ int kvm_iommu_map_guest(struct kvm *kvm,
 	       PCI_FUNC(assigned_dev->host_devfn));
 
 	pdev = assigned_dev->dev;
+	vtd_dom = assigned_dev->vtd_domain;
 
-	if (pdev == NULL) {
-		if (kvm->arch.intel_iommu_domain) {
-			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
-			kvm->arch.intel_iommu_domain = NULL;
-		}
-		return -ENODEV;
-	}
-
-	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
-	if (!kvm->arch.intel_iommu_domain)
-		return -ENODEV;
-
-	r = kvm_iommu_map_memslots(kvm);
-	if (r)
-		goto out_unmap;
-
-	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+	intel_iommu_detach_dev(vtd_dom->domain,
 			       pdev->bus->number, pdev->devfn);
 
-	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
-					pdev);
+	r = intel_iommu_context_mapping(vtd_dom->domain, pdev);
 	if (r) {
 		printk(KERN_ERR "Domain context map for %s failed",
 		       pci_name(pdev));
-		goto out_unmap;
+		return r;
 	}
-	return 0;
 
-out_unmap:
-	kvm_iommu_unmap_memslots(kvm);
-	return r;
+	return 0;
 }
 
-static void kvm_iommu_put_pages(struct kvm *kvm,
-			       gfn_t base_gfn, unsigned long npages)
+int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
-	gfn_t gfn = base_gfn;
-	pfn_t pfn;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-	int i;
+        struct kvm_assigned_dev_kernel *entry;
 
-	for (i = 0; i < npages; i++) {
-		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
-						     gfn_to_gpa(gfn));
-		kvm_release_pfn_clean(pfn);
-		gfn++;
-	}
+        list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+                printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+                       entry->host_busnr,
+                       PCI_SLOT(entry->host_devfn),
+                       PCI_FUNC(entry->host_devfn));
+
+                /* detach kvm dmar domain */
+                intel_iommu_detach_dev(entry->vtd_domain->domain,
+				       entry->host_busnr,
+                                       entry->host_devfn);
+        }
+
+        return 0;
 }
 
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+struct kvm_vtd_domain *get_kvm_vtd_domain(struct kvm *kvm,
+		 			  struct pci_dev *pdev)
 {
-	int i;
-	down_read(&kvm->slots_lock);
-	for (i = 0; i < kvm->nmemslots; i++) {
-		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
-				    kvm->memslots[i].npages);
+	struct kvm_vtd_domain *vtd_dom = NULL;
+	struct intel_iommu *iommu = NULL;
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	iommu = intel_iommu_device_get_iommu(pdev);
+
+	list_for_each_entry(assigned_dev, &kvm->arch.assigned_dev_head, list) {
+		if (iommu == assigned_dev->vtd_domain->domain->iommu)
+			return assigned_dev->vtd_domain;
 	}
-	up_read(&kvm->slots_lock);
 
-	return 0;
-}
+	vtd_dom = kzalloc(sizeof(struct kvm_vtd_domain), GFP_KERNEL);
+	if (!vtd_dom)
+		return NULL;
 
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	struct kvm_assigned_dev_kernel *entry;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	vtd_dom->domain = intel_iommu_domain_alloc(pdev);
+	if (!vtd_dom->domain) {
+		kfree(vtd_dom);
+		return NULL;
+	}
 
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
+	if (kvm_iommu_map_domain_memslots(kvm, vtd_dom->domain)) {
+		kvm_iommu_unmap_domain_memslots(kvm, vtd_dom->domain);
+		intel_iommu_domain_exit(vtd_dom->domain);
+		kfree(vtd_dom);
+		return NULL;
+	}
 
-	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
-		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
-		       entry->host_busnr,
-		       PCI_SLOT(entry->host_devfn),
-		       PCI_FUNC(entry->host_devfn));
+	return vtd_dom;
+}
 
-		/* detach kvm dmar domain */
-		intel_iommu_detach_dev(domain, entry->host_busnr,
-				       entry->host_devfn);
+void release_kvm_vtd_domain(struct kvm *kvm, struct kvm_vtd_domain *vtd_dom)
+{
+	if (vtd_dom == NULL)
+		return;
+
+	if (--vtd_dom->dev_count == 0) {
+		kvm_iommu_unmap_domain_memslots(kvm, vtd_dom->domain);
+		intel_iommu_domain_exit(vtd_dom->domain);
+		kfree(vtd_dom);
+		vtd_dom = NULL;
 	}
-	kvm_iommu_unmap_memslots(kvm);
-	intel_iommu_domain_exit(domain);
-	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 675fcc1..a1d6ede 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -188,6 +188,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 	pci_disable_device(assigned_dev->dev);
 	pci_dev_put(assigned_dev->dev);
 
+	release_kvm_vtd_domain(kvm, assigned_dev->vtd_domain);
+
 	list_del(&assigned_dev->list);
 	kfree(assigned_dev);
 }
@@ -280,7 +282,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_dev->assigned_dev_id);
 	if (match) {
-		/* device already assigned */
+		printk(KERN_ERR "%s: device (%x:%x.%x) is already assigned\n",
+		       __func__, match->host_busnr,
+		       PCI_SLOT(match->host_devfn),
+		       PCI_FUNC(match->host_devfn));
 		r = -EINVAL;
 		goto out;
 	}
@@ -317,21 +322,24 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	match->kvm = kvm;
 
-	list_add(&match->list, &kvm->arch.assigned_dev_head);
-
 	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		match->vtd_domain = get_kvm_vtd_domain(kvm, dev);
+		if (match->vtd_domain == NULL)
+			goto out_disable;
+		match->vtd_domain->dev_count++;
+
 		r = kvm_iommu_map_guest(kvm, match);
 		if (r)
-			goto out_list_del;
+			goto out_disable;
 	}
+	
+	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
-out_list_del:
-	list_del(&match->list);
-	pci_release_regions(dev);
 out_disable:
+	pci_release_regions(dev);
 	pci_disable_device(dev);
 out_put:
 	pci_dev_put(dev);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 089ba3f..ccb2fd3 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2561,3 +2561,19 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
 	return pfn >> PAGE_SHIFT_4K;
 }
 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
+
+struct intel_iommu *intel_iommu_device_get_iommu(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "%s: cannot find drhd for %x:%x.%x\n",
+		       __func__, pdev->bus->number,
+		       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+		return NULL;
+	}
+
+	return drhd->iommu;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_device_get_iommu);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 53995a8..a48dcfe 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -351,7 +351,6 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
-	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 5fa9d26..7801aa4 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -350,6 +350,7 @@ int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
 struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+struct intel_iommu *intel_iommu_device_get_iommu(struct pci_dev *pdev);
 
 #ifdef CONFIG_DMAR
 int intel_iommu_found(void);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 73b7c52..7a3e1b6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -293,6 +293,11 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
+struct kvm_vtd_domain {
+	int dev_count;			/* number of assigned devices */
+	struct dmar_domain *domain;
+};
+
 struct kvm_assigned_dev_kernel {
 	struct kvm_irq_ack_notifier ack_notifier;
 	struct work_struct interrupt_work;
@@ -305,6 +310,7 @@ struct kvm_assigned_dev_kernel {
 	int irq_requested;
 	struct pci_dev *dev;
 	struct kvm *kvm;
+	struct kvm_vtd_domain *vtd_domain;
 };
 
 #ifdef CONFIG_DMAR
@@ -313,6 +319,9 @@ int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
 int kvm_iommu_map_guest(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
+struct kvm_vtd_domain *get_kvm_vtd_domain(struct kvm *kvm,
+					  struct pci_dev *pdev);
+void release_kvm_vtd_domain(struct kvm *kvm, struct kvm_vtd_domain *vtd_dom);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      gfn_t base_gfn,
@@ -332,6 +341,18 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
 	return 0;
 }
+
+static inline struct kvm_vtd_domain *get_kvm_vtd_domain(struct kvm *kvm,
+					  		struct pci_dev *pdev)
+{
+	return NULL;
+}
+
+static inline void release_kvm_vtd_domain(struct kvm *kvm,
+					  struct kvm_vtd_domain *vtd_dom)
+{
+	return;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
-- 
1.5.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-06  6:38 [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest Han, Weidong
@ 2008-10-07 10:04 ` Zhang, Xiantao
  2008-10-07 13:59   ` Avi Kivity
  2008-10-07 13:29 ` Avi Kivity
  1 sibling, 1 reply; 20+ messages in thread
From: Zhang, Xiantao @ 2008-10-07 10:04 UTC (permalink / raw)
  To: Han, Weidong, Avi Kivity
  Cc: kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami, muli

Han, Weidong wrote:
>  #ifdef CONFIG_DMAR
>  int intel_iommu_found(void);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 73b7c52..7a3e1b6 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -293,6 +293,11 @@ struct kvm_irq_ack_notifier {
>  	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
>  };
> 
> +struct kvm_vtd_domain {
> +	int dev_count;			/* number of assigned devices */

Atomic operations are needed for this field? 

> +	struct dmar_domain *domain;
> +};
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-06  6:38 [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest Han, Weidong
  2008-10-07 10:04 ` Zhang, Xiantao
@ 2008-10-07 13:29 ` Avi Kivity
  2008-10-08  5:40   ` Han, Weidong
  2008-10-29 10:25   ` Joerg Roedel
  1 sibling, 2 replies; 20+ messages in thread
From: Avi Kivity @ 2008-10-07 13:29 UTC (permalink / raw)
  To: Han, Weidong, Joerg Roedel
  Cc: kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami, muli

Han, Weidong wrote:
> [Rebased the patch due to my mmio's patch (commit: 0d679782) was checked
> in]
>
> From 9e68fc762358cc44cfec3968ac5ec65324ce04d7 Mon Sep 17 00:00:00 2001
> From: Weidong Han <weidong.han@intel.com>
> Date: Mon, 6 Oct 2008 14:02:18 +0800
> Subject: [PATCH] Support multiple device assignment to one guest
>
> Current VT-d patches in kvm only support one device assignment to one
> guest due to dmar_domain is per device.
>
> In order to support multiple device assignemnt, this patch wraps
> dmar_domain with a reference count (kvm_vtd_domain), and also adds a
> pointer in kvm_assigned_dev_kernel to link to a kvm_vtd_domain.
>
> Each dmar_domain owns one VT-d page table, in order to reduce page
> tables and improve IOTLB utility, the devices assigned to the same guest
> and under the same IOMMU share the same kvm_vtd_domain.
>
>   

I don't understand this.  If we have a one dmar domain per guest, why do 
we need reference counting at all?

We can create the dmar domain when we assign the first device, and 
destroy it when we deassign the last device, but otherwise I don't see a 
need for changes.  Particularly I don't understand this:

> @@ -351,7 +351,6 @@ struct kvm_arch{
>  	 */
>  	struct list_head active_mmu_pages;
>  	struct list_head assigned_dev_head;
> -	struct dmar_domain *intel_iommu_domain;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> @@ -305,6 +310,7 @@ struct kvm_assigned_dev_kernel {
>  	int irq_requested;
>  	struct pci_dev *dev;
>  	struct kvm *kvm;
> +	struct kvm_vtd_domain *vtd_domain;
>  };

Oh, I see it now.  Different devices may need to go under different iommus.

This really feels like it should be handled by the iommu API.  Users 
shouldn't need to bother with it.

Joerg, can your dma api handle this?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-07 10:04 ` Zhang, Xiantao
@ 2008-10-07 13:59   ` Avi Kivity
  2008-10-08  1:58     ` Zhang, Xiantao
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-10-07 13:59 UTC (permalink / raw)
  To: Zhang, Xiantao
  Cc: Han, Weidong, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli

Zhang, Xiantao wrote:
> Han, Weidong wrote:
>   
>>  #ifdef CONFIG_DMAR
>>  int intel_iommu_found(void);
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 73b7c52..7a3e1b6 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -293,6 +293,11 @@ struct kvm_irq_ack_notifier {
>>  	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
>>  };
>>
>> +struct kvm_vtd_domain {
>> +	int dev_count;			/* number of assigned devices */
>>     
>
> Atomic operations are needed for this field? 
>   

Probably not, since it is protected by the kvm lock.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-07 13:59   ` Avi Kivity
@ 2008-10-08  1:58     ` Zhang, Xiantao
  0 siblings, 0 replies; 20+ messages in thread
From: Zhang, Xiantao @ 2008-10-08  1:58 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Han, Weidong, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli

Avi Kivity wrote:
> Zhang, Xiantao wrote:
>> Han, Weidong wrote:
>> 
>>>  #ifdef CONFIG_DMAR
>>>  int intel_iommu_found(void);
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index 73b7c52..7a3e1b6 100644 --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -293,6 +293,11 @@ struct kvm_irq_ack_notifier {
>>>  	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);  };
>>> 
>>> +struct kvm_vtd_domain {
>>> +	int dev_count;			/* number of assigned devices */
>>> 
>> 
>> Atomic operations are needed for this field?
>> 
> 
> Probably not, since it is protected by the kvm lock.
Okay, it should have no problem if kvm_lock is acquired before operating
this field :)
Xiantao


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-07 13:29 ` Avi Kivity
@ 2008-10-08  5:40   ` Han, Weidong
  2008-10-08 10:32     ` Avi Kivity
  2008-10-29 10:25   ` Joerg Roedel
  1 sibling, 1 reply; 20+ messages in thread
From: Han, Weidong @ 2008-10-08  5:40 UTC (permalink / raw)
  To: Avi Kivity, Joerg Roedel
  Cc: kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami, muli

Avi Kivity wrote:
> Han, Weidong wrote:
>> [Rebased the patch due to my mmio's patch (commit: 0d679782) was
>> checked in] 
>> 
>> From 9e68fc762358cc44cfec3968ac5ec65324ce04d7 Mon Sep 17 00:00:00
>> 2001 From: Weidong Han <weidong.han@intel.com>
>> Date: Mon, 6 Oct 2008 14:02:18 +0800
>> Subject: [PATCH] Support multiple device assignment to one guest
>> 
>> Current VT-d patches in kvm only support one device assignment to one
>> guest due to dmar_domain is per device.
>> 
>> In order to support multiple device assignemnt, this patch wraps
>> dmar_domain with a reference count (kvm_vtd_domain), and also adds a
>> pointer in kvm_assigned_dev_kernel to link to a kvm_vtd_domain.
>> 
>> Each dmar_domain owns one VT-d page table, in order to reduce page
>> tables and improve IOTLB utility, the devices assigned to the same
>> guest and under the same IOMMU share the same kvm_vtd_domain.
>> 
>> 
> 
> I don't understand this.  If we have a one dmar domain per guest, why
> do we need reference counting at all?
> 
> We can create the dmar domain when we assign the first device, and
> destroy it when we deassign the last device, but otherwise I don't
> see a need for changes.  Particularly I don't understand this:
> 
>> @@ -351,7 +351,6 @@ struct kvm_arch{
>>  	 */
>>  	struct list_head active_mmu_pages;
>>  	struct list_head assigned_dev_head;
>> -	struct dmar_domain *intel_iommu_domain;
>>  	struct kvm_pic *vpic;
>>  	struct kvm_ioapic *vioapic;
>>  	struct kvm_pit *vpit;
>> @@ -305,6 +310,7 @@ struct kvm_assigned_dev_kernel {  	int
>>  	irq_requested; struct pci_dev *dev;
>>  	struct kvm *kvm;
>> +	struct kvm_vtd_domain *vtd_domain;
>>  };
> 
> Oh, I see it now.  Different devices may need to go under different
> iommus. 
> 
> This really feels like it should be handled by the iommu API.  Users
> shouldn't need to bother with it.

Why do you say it should be handled by iommu API? The direct way to
support multiple device assignment is keep a dmar_domain list for each
guest, each device corresponds to one dmar_domain. But this will cost
more memory because each dmar_domain has its own VT-d page table. Our
method lets the devices which are under smae iommu and assigned to the
same guest share the same VT-d page table. 

Regards,
Weidong

> 
> Joerg, can your dma api handle this?


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-08  5:40   ` Han, Weidong
@ 2008-10-08 10:32     ` Avi Kivity
  2008-10-08 15:06       ` Han, Weidong
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-10-08 10:32 UTC (permalink / raw)
  To: Han, Weidong
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli

Han, Weidong wrote:
>> Oh, I see it now.  Different devices may need to go under different
>> iommus. 
>>
>> This really feels like it should be handled by the iommu API.  Users
>> shouldn't need to bother with it.
>>     
>
> Why do you say it should be handled by iommu API? 

Because the logic of which iommu controls which device is only 
understood by iommu developers.  Also, because this logic would be 
duplicated by anyone attempting to do the same thing.

So it seems reasonable it should be implemented by the iommu API, not 
its users.

> The direct way to
> support multiple device assignment is keep a dmar_domain list for each
> guest, each device corresponds to one dmar_domain. But this will cost
> more memory because each dmar_domain has its own VT-d page table. Our
> method lets the devices which are under smae iommu and assigned to the
> same guest share the same VT-d page table. 
>   

If we devolve this to the iommu API, the same io page table can be 
shared by all iommus, so long as they all use the same page table format.


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-08 10:32     ` Avi Kivity
@ 2008-10-08 15:06       ` Han, Weidong
  2008-10-08 19:49         ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Han, Weidong @ 2008-10-08 15:06 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli

Avi Kivity wrote:
> Han, Weidong wrote:
>>> Oh, I see it now.  Different devices may need to go under different
>>> iommus. 
>>> 
>>> This really feels like it should be handled by the iommu API.  Users
>>> shouldn't need to bother with it.
>>> 
>> 
>> Why do you say it should be handled by iommu API?
> 
> Because the logic of which iommu controls which device is only
> understood by iommu developers.  Also, because this logic would be
> duplicated by anyone attempting to do the same thing.
> 
> So it seems reasonable it should be implemented by the iommu API, not
> its users.
> 
>> The direct way to
>> support multiple device assignment is keep a dmar_domain list for
>> each guest, each device corresponds to one dmar_domain. But this
>> will cost more memory because each dmar_domain has its own VT-d page
>> table. Our method lets the devices which are under smae iommu and
>> assigned to the same guest share the same VT-d page table.
>> 
> 
> If we devolve this to the iommu API, the same io page table can be
> shared by all iommus, so long as they all use the same page table
> format. 

I don't understand how to handle this by iommu API. Let me explain my
thoughts more clearly: 

VT-d spec says: 
	Context-entries programmed with the same domain identifier must
always reference the same address translation structure (through the ASR
field). Similarly, context-entries referencing the same address
translation structure must be programmed with the same domain id. 

In native VT-d driver, dmar_domain is per device, and has its own VT-d
page table, which is dynamically setup before each DMA. So it is
impossible that the same VT-d page table is shared by all iommus.
Moveover different iommus in system may have different page table
levels. I think it's enough that iommu API tells us its iommu of a
device. 

Whereas in KVM side, the same VT-d page table can be shared by the
devices which are under smae iommu and assigned to the same guest,
because all of the guest's memory are statically mapped in VT-d page
table. But it needs to wrap dmar_domain, this patch wraps it with a
reference count for multiple devices relate to same dmar_domain.

This patch already adds an API (intel_iommu_device_get_iommu()) in
intel-iommu.c, which returns its iommu of a device. 

Regards,
Weidong

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-08 15:06       ` Han, Weidong
@ 2008-10-08 19:49         ` Avi Kivity
  2008-10-09  6:11           ` Han, Weidong
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-10-08 19:49 UTC (permalink / raw)
  To: Han, Weidong
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli

Han, Weidong wrote:
>> If we devolve this to the iommu API, the same io page table can be
>> shared by all iommus, so long as they all use the same page table
>> format. 
>>     
>
> I don't understand how to handle this by iommu API. Let me explain my
> thoughts more clearly: 
>
> VT-d spec says: 
> 	Context-entries programmed with the same domain identifier must
> always reference the same address translation structure (through the ASR
> field). Similarly, context-entries referencing the same address
> translation structure must be programmed with the same domain id. 
>
> In native VT-d driver, dmar_domain is per device, and has its own VT-d
> page table, which is dynamically setup before each DMA. So it is
> impossible that the same VT-d page table is shared by all iommus.
> Moveover different iommus in system may have different page table
> levels. 

Right.  This use case is in essence to prevent unintended sharing.  It
is also likely to have low page table height, since dma sizes are
relatively small.

> I think it's enough that iommu API tells us its iommu of a
> device. 
>   

While this is tangential to our conversation, why?  Even for the device
driver use case, this only makes the API more complex.  If the API hides
the existence of multiple iommus, it's easier to use and harder to make
a mistake.

> Whereas in KVM side, the same VT-d page table can be shared by the
> devices which are under smae iommu and assigned to the same guest,
> because all of the guest's memory are statically mapped in VT-d page
> table. But it needs to wrap dmar_domain, this patch wraps it with a
> reference count for multiple devices relate to same dmar_domain.
>
> This patch already adds an API (intel_iommu_device_get_iommu()) in
> intel-iommu.c, which returns its iommu of a device. 

There is a missed optimization here.  Suppose we have two devices each
under a different iommu.  With the patch, each will be in a different
dmar_domain and so will have a different page table.  The amount of
memory used is doubled.

Suppose the iommu API hides the existence of multiple iommus.  You
allocate a translation and add devices to it.  When you add a device,
the iommu API checks which iommu is needed and programs it accordingly,
but only one io page table is used.

The other benefit is that iommu developers understand this issues while
kvm developers don't, so it's best managed by the iommu API.  This way
if things change (as usual, becoming more complicated), the iommu can
make the changes in their code and hide the complexity from kvm or other
users.

I'm probably (badly) duplicating Joerg's iommu API here, but this is how
it could go:

iommu_translation_create() - creates an iommu translation object; this
allocates the page tables but doesn't do anything with them
iommu_translation_map() - adds pages to the translation
iommu_translation_attach() - attach a device to the translation; this
locates the iommu and programs it
_detach(), _unmap(), and _free() undo these operations.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-08 19:49         ` Avi Kivity
@ 2008-10-09  6:11           ` Han, Weidong
  2008-10-09  8:31             ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Han, Weidong @ 2008-10-09  6:11 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Avi Kivity wrote:
> Han, Weidong wrote:
>>> If we devolve this to the iommu API, the same io page table can be
>>> shared by all iommus, so long as they all use the same page table
>>> format. 
>>> 
>> 
>> I don't understand how to handle this by iommu API. Let me explain
>> my thoughts more clearly: 
>> 
>> VT-d spec says:
>> 	Context-entries programmed with the same domain identifier must
>> always reference the same address translation structure (through the
>> ASR field). Similarly, context-entries referencing the same address
>> translation structure must be programmed with the same domain id.
>> 
>> In native VT-d driver, dmar_domain is per device, and has its own
>> VT-d page table, which is dynamically setup before each DMA. So it is
>> impossible that the same VT-d page table is shared by all iommus.
>> Moveover different iommus in system may have different page table
>> levels.
> 
> Right.  This use case is in essence to prevent unintended sharing.  It
> is also likely to have low page table height, since dma sizes are
> relatively small.
> 
>> I think it's enough that iommu API tells us its iommu of a
>> device.
>> 
> 
> While this is tangential to our conversation, why?  Even for the
> device driver use case, this only makes the API more complex.  If the
> API hides the existence of multiple iommus, it's easier to use and
> harder to make a mistake.
> 
>> Whereas in KVM side, the same VT-d page table can be shared by the
>> devices which are under smae iommu and assigned to the same guest,
>> because all of the guest's memory are statically mapped in VT-d page
>> table. But it needs to wrap dmar_domain, this patch wraps it with a
>> reference count for multiple devices relate to same dmar_domain.
>> 
>> This patch already adds an API (intel_iommu_device_get_iommu()) in
>> intel-iommu.c, which returns its iommu of a device.
> 
> There is a missed optimization here.  Suppose we have two devices each
> under a different iommu.  With the patch, each will be in a different
> dmar_domain and so will have a different page table.  The amount of
> memory used is doubled.

You cannot let two devices each under a different iommu share one
dmar_domain, becasue dmar_domain has a pointer to iommu.

> 
> Suppose the iommu API hides the existence of multiple iommus.  You
> allocate a translation and add devices to it.  When you add a device,
> the iommu API checks which iommu is needed and programs it
> accordingly, but only one io page table is used.
> 
> The other benefit is that iommu developers understand this issues
> while kvm developers don't, so it's best managed by the iommu API. 
> This way if things change (as usual, becoming more complicated), the
> iommu can make the changes in their code and hide the complexity from
> kvm or other users.
> 
> I'm probably (badly) duplicating Joerg's iommu API here, but this is
> how it could go:
> 
> iommu_translation_create() - creates an iommu translation object; this
> allocates the page tables but doesn't do anything with them
> iommu_translation_map() - adds pages to the translation
> iommu_translation_attach() - attach a device to the translation; this
> locates the iommu and programs it
> _detach(), _unmap(), and _free() undo these operations.

In fact, the exported APIs added for KVM VT-d also do
create/map/attach/detach/free functions. Whereas these iommu APIs are
more readable. 

Because kvm VT-d usage is different with native usage, it's inevitable
extend native VT-d code to support KVM VT-d (such as wrap dmar_domain).
For devices under different iommus, they cannot share the same
dmar_domain, thus they cannot share VT-d page table. If we want to
handle this by iommu APIs, I suspect we need to change lots of native
VT-d driver code.

David/Jesse, what's your opinion?




^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-09  6:11           ` Han, Weidong
@ 2008-10-09  8:31             ` Avi Kivity
  2008-10-09  9:25               ` Han, Weidong
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-10-09  8:31 UTC (permalink / raw)
  To: Han, Weidong
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Han, Weidong wrote:
>>
>> There is a missed optimization here.  Suppose we have two devices each
>> under a different iommu.  With the patch, each will be in a different
>> dmar_domain and so will have a different page table.  The amount of
>> memory used is doubled.
>>     
>
> You cannot let two devices each under a different iommu share one
> dmar_domain, becasue dmar_domain has a pointer to iommu.
>
>   

I don't want then to share dmar_domains (these are implementation
details anyway), just io page tables.


kvm ---> something (owns io page table) ---> dmar_domain (uses shared io
page table) ---> device

Even if we don't implement io page table sharing right away,
implementing the 'something' in the iommu api means we can later
impement sharing without changing the iommu/kvm interface.

> In fact, the exported APIs added for KVM VT-d also do
> create/map/attach/detach/free functions. Whereas these iommu APIs are
> more readable. 
>
>   


No; the existing iommu API talks about dmar domains and exposes the
existence of multiple iommus, so it is more complex.

> Because kvm VT-d usage is different with native usage, it's inevitable
> extend native VT-d code to support KVM VT-d (such as wrap dmar_domain).
> For devices under different iommus, they cannot share the same
> dmar_domain, thus they cannot share VT-d page table. If we want to
> handle this by iommu APIs, I suspect we need to change lots of native
> VT-d driver code.
>   

As mentioned above, we can start with implementing the API without
actual sharing (basically, your patch, but as an addition to the API
rather than a change to kvm); we can add io pagetable sharing later.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-09  8:31             ` Avi Kivity
@ 2008-10-09  9:25               ` Han, Weidong
  2008-10-09 12:50                 ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Han, Weidong @ 2008-10-09  9:25 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Avi Kivity wrote:
> Han, Weidong wrote:
>>> 
>>> There is a missed optimization here.  Suppose we have two devices
>>> each under a different iommu.  With the patch, each will be in a
>>> different dmar_domain and so will have a different page table.  The
>>> amount of memory used is doubled. 
>>> 
>> 
>> You cannot let two devices each under a different iommu share one
>> dmar_domain, becasue dmar_domain has a pointer to iommu.
>> 
>> 
> 
> I don't want then to share dmar_domains (these are implementation
> details anyway), just io page tables.
> 
> 
> kvm ---> something (owns io page table) ---> dmar_domain (uses shared
> io page table) ---> device
> 

Let dmar_domains share io page table is not allowed. VT-d spec allows
one domain corresponds to one page table, vice versa. If we want
"something" owns the io page table, which shared by all assigned devices
to one guest, we need to redefine dmar_domain which covers all devices
assigned to a guest. Then we need to rewrite most of native VT-d code
for kvm. Xen doesn't use dmar_domain, instead it implements "something"
as a domain sturcture (with domain id) to own page table. One guest has
only one "something" instance, thus has only one page table. It looks
like: xen ---> something (owns io page table) ---> device. But, in KVM
side, I think we can reuse native VT-d code, needn't to duplicate
another VT-d code.

Regards,
Weidong

> Even if we don't implement io page table sharing right away,
> implementing the 'something' in the iommu api means we can later
> impement sharing without changing the iommu/kvm interface.
> 
>> In fact, the exported APIs added for KVM VT-d also do
>> create/map/attach/detach/free functions. Whereas these iommu APIs
>> are more readable. 
>> 
>> 
> 
> 
> No; the existing iommu API talks about dmar domains and exposes the
> existence of multiple iommus, so it is more complex.
> 
>> Because kvm VT-d usage is different with native usage, it's
>> inevitable extend native VT-d code to support KVM VT-d (such as wrap
>> dmar_domain). For devices under different iommus, they cannot share
>> the same dmar_domain, thus they cannot share VT-d page table. If we
>> want to handle this by iommu APIs, I suspect we need to change lots
>> of native VT-d driver code. 
>> 
> 
> As mentioned above, we can start with implementing the API without
> actual sharing (basically, your patch, but as an addition to the API
> rather than a change to kvm); we can add io pagetable sharing later.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-09  9:25               ` Han, Weidong
@ 2008-10-09 12:50                 ` Avi Kivity
  2008-10-09 14:31                   ` Han, Weidong
       [not found]                   ` <0122C7C995D32147B66BF4F440D3016301CB08EF@pdsmsx415.ccr.corp.intel.com>
  0 siblings, 2 replies; 20+ messages in thread
From: Avi Kivity @ 2008-10-09 12:50 UTC (permalink / raw)
  To: Han, Weidong
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Han, Weidong wrote:
>>
>> I don't want then to share dmar_domains (these are implementation
>> details anyway), just io page tables.
>>
>>
>> kvm ---> something (owns io page table) ---> dmar_domain (uses shared
>> io page table) ---> device
>>
>>     
>
> Let dmar_domains share io page table is not allowed. VT-d spec allows
> one domain corresponds to one page table, vice versa. 

Since the io pagetables are read only for the iommu (right?), I don't
see what prevents several iommus from accessing the same pagetable. 
It's just a bunch of memory.

> If we want
> "something" owns the io page table, which shared by all assigned devices
> to one guest, we need to redefine dmar_domain which covers all devices
> assigned to a guest. Then we need to rewrite most of native VT-d code
> for kvm. Xen doesn't use dmar_domain, instead it implements "something"
> as a domain sturcture (with domain id) to own page table. 

I imagine, Xen shares the io pagetables with the EPT pagetables as
well.  So io pagetable sharing is allowed.

> One guest has
> only one "something" instance, thus has only one page table. It looks
> like: xen ---> something (owns io page table) ---> device. But, in KVM
> side, I think we can reuse native VT-d code, needn't to duplicate
> another VT-d code.
>   

I agree that at this stage, we don't want to do optimization, we need
something working first.  But let's at least ensure the API allows the
optimization later on (and also, that iommu implementation details are
hidden from kvm).

What I'm proposing is moving the list of kvm_vtd_domains inside the
iommu API.  The only missing piece is populating a new dmar_domain when
a new device is added.  We already have intel_iommu_iova_to_pfn(), we
need to add a way to read the protection bits and the highest mapped
iova (oh, and intel_iommu_iova_to_pfn() has a bug: it shifts right
instead of left).

Later we can make the "something" (that already contains the list) also
own the io page table; and non-kvm users can still use the same code
(the list will always be of length 1 for these users).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-09 12:50                 ` Avi Kivity
@ 2008-10-09 14:31                   ` Han, Weidong
       [not found]                   ` <0122C7C995D32147B66BF4F440D3016301CB08EF@pdsmsx415.ccr.corp.intel.com>
  1 sibling, 0 replies; 20+ messages in thread
From: Han, Weidong @ 2008-10-09 14:31 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Avi Kivity wrote:
> Han, Weidong wrote:
>>> 
>>> I don't want then to share dmar_domains (these are implementation
>>> details anyway), just io page tables.
>>> 
>>> 
>>> kvm ---> something (owns io page table) ---> dmar_domain (uses
>>> shared io page table) ---> device 
>>> 
>>> 
>> 
>> Let dmar_domains share io page table is not allowed. VT-d spec allows
>> one domain corresponds to one page table, vice versa.
> 
> Since the io pagetables are read only for the iommu (right?), I don't
> see what prevents several iommus from accessing the same pagetable.
> It's just a bunch of memory.

I think the reason is that hardware may use the domain identifier to tag
its internal caches. 

> 
>> If we want
>> "something" owns the io page table, which shared by all assigned
>> devices to one guest, we need to redefine dmar_domain which covers
>> all devices assigned to a guest. Then we need to rewrite most of
>> native VT-d code for kvm. Xen doesn't use dmar_domain, instead it
>> implements "something" as a domain sturcture (with domain id) to own
>> page table. 
> 
> I imagine, Xen shares the io pagetables with the EPT pagetables as
> well.  So io pagetable sharing is allowed.

In Xen, VT-d page table doesn't share with EPT pagetable and P2M
pagetable. But they can share if the format is the same.

> 
>> One guest has
>> only one "something" instance, thus has only one page table. It looks
>> like: xen ---> something (owns io page table) ---> device. But, in
>> KVM side, I think we can reuse native VT-d code, needn't to
>> duplicate another VT-d code. 
>> 
> 
> I agree that at this stage, we don't want to do optimization, we need
> something working first.  But let's at least ensure the API allows the
> optimization later on (and also, that iommu implementation details are
> hidden from kvm).
> 
> What I'm proposing is moving the list of kvm_vtd_domains inside the
> iommu API.  The only missing piece is populating a new dmar_domain
> when a new device is added.  We already have

I will move kvm_vtd_domain inside the iommu API, and also hide
get_kvm_vtd_domain() and release_kvm_vtd_domain() implementation details
from kvm.

> intel_iommu_iova_to_pfn(), we need to add a way to read the
> protection bits and the highest mapped iova (oh, and
> intel_iommu_iova_to_pfn() has a bug: it shifts right instead of left).
> 

Why do we need the protection bits and the highest mapped iova? 

Shifting right instead of left in intel_iommu_iova_to_pfn() is not a
bug, because it returns pfn, not address.

Regards,
Weidong

> Later we can make the "something" (that already contains the list)
> also own the io page table; and non-kvm users can still use the same
> code (the list will always be of length 1 for these users).


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
       [not found]                   ` <0122C7C995D32147B66BF4F440D3016301CB08EF@pdsmsx415.ccr.corp.intel.com>
@ 2008-10-10  5:50                     ` Han, Weidong
  2008-10-10  6:40                       ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Han, Weidong @ 2008-10-10  5:50 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Han, Weidong wrote:
> Avi Kivity wrote:
>> Han, Weidong wrote:
>>>> 
>>>> I don't want then to share dmar_domains (these are implementation
>>>> details anyway), just io page tables.
>>>> 
>>>> 
>>>> kvm ---> something (owns io page table) ---> dmar_domain (uses
>>>> shared io page table) ---> device
>>>> 
>>>> 
>>> 
>>> Let dmar_domains share io page table is not allowed. VT-d spec
>>> allows one domain corresponds to one page table, vice versa.
>> 
>> Since the io pagetables are read only for the iommu (right?), I don't
>> see what prevents several iommus from accessing the same pagetable.
>> It's just a bunch of memory.
> 
> I think the reason is that hardware may use the domain identifier to
> tag its internal caches. 
> 
>> 
>>> If we want
>>> "something" owns the io page table, which shared by all assigned
>>> devices to one guest, we need to redefine dmar_domain which covers
>>> all devices assigned to a guest. Then we need to rewrite most of
>>> native VT-d code for kvm. Xen doesn't use dmar_domain, instead it
>>> implements "something" as a domain sturcture (with domain id) to own
>>> page table.
>> 
>> I imagine, Xen shares the io pagetables with the EPT pagetables as
>> well.  So io pagetable sharing is allowed.
> 
> In Xen, VT-d page table doesn't share with EPT pagetable and P2M
> pagetable. But they can share if the format is the same. 
> 
>> 
>>> One guest has
>>> only one "something" instance, thus has only one page table. It
>>> looks like: xen ---> something (owns io page table) ---> device.
>>> But, in KVM side, I think we can reuse native VT-d code, needn't to
>>> duplicate another VT-d code. 
>>> 
>> 
>> I agree that at this stage, we don't want to do optimization, we need
>> something working first.  But let's at least ensure the API allows
>> the optimization later on (and also, that iommu implementation
>> details are hidden from kvm). 
>> 
>> What I'm proposing is moving the list of kvm_vtd_domains inside the
>> iommu API.  The only missing piece is populating a new dmar_domain
>> when a new device is added.  We already have
> 
> I will move kvm_vtd_domain inside the iommu API, and also hide
> get_kvm_vtd_domain() and release_kvm_vtd_domain() implementation
> details from kvm.  

It's hard to move kvm_vtd_domain inside current iommu API. It's kvm
specific. It's not elegant to include kvm_vtd_domain stuffs in native
VT-d code. I think leave it in kvm side is more clean at this point.
Moveover it's very simple. I read Joerg's iommu API foils just now, I
think it's good. Native AMD iommu code will be in 2.6.28, it's a
suitable to implement a generic iommu API based both on Intel and AMD
iommu for kvm after 2.6.28. What's your opinion? 

Regards,
Weidong

> 
>> intel_iommu_iova_to_pfn(), we need to add a way to read the
>> protection bits and the highest mapped iova (oh, and
>> intel_iommu_iova_to_pfn() has a bug: it shifts right instead of
>> left). 
>> 
> 
> Why do we need the protection bits and the highest mapped iova?
> 
> Shifting right instead of left in intel_iommu_iova_to_pfn() is not a
> bug, because it returns pfn, not address. 
> 
> Regards,
> Weidong
> 
>> Later we can make the "something" (that already contains the list)
>> also own the io page table; and non-kvm users can still use the same
>> code (the list will always be of length 1 for these users).


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-10  5:50                     ` Han, Weidong
@ 2008-10-10  6:40                       ` Avi Kivity
  2008-10-10  7:22                         ` Han, Weidong
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-10-10  6:40 UTC (permalink / raw)
  To: Han, Weidong
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Han, Weidong wrote:
> It's hard to move kvm_vtd_domain inside current iommu API. It's kvm
> specific. It's not elegant to include kvm_vtd_domain stuffs in native
> VT-d code.

It's cleaner than adding knowledge of how the iommu works to kvm.

>  I think leave it in kvm side is more clean at this point.
> Moveover it's very simple. I read Joerg's iommu API foils just now, I
> think it's good. Native AMD iommu code will be in 2.6.28, it's a
> suitable to implement a generic iommu API based both on Intel and AMD
> iommu for kvm after 2.6.28. What's your opinion? 
>   

2.6.27 is out, so anything we do will be for 2.6.29.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-10  6:40                       ` Avi Kivity
@ 2008-10-10  7:22                         ` Han, Weidong
  2008-10-10  7:32                           ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Han, Weidong @ 2008-10-10  7:22 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Avi Kivity wrote:
> Han, Weidong wrote:
>> It's hard to move kvm_vtd_domain inside current iommu API. It's kvm
>> specific. It's not elegant to include kvm_vtd_domain stuffs in native
>> VT-d code.
> 
> It's cleaner than adding knowledge of how the iommu works to kvm.

I will try to move kvm_vtd_domain inside iommu API. I suspect it would
need much changes on VT-d code.

> 
>>  I think leave it in kvm side is more clean at this point.
>> Moveover it's very simple. I read Joerg's iommu API foils just now, I
>> think it's good. Native AMD iommu code will be in 2.6.28, it's a
>> suitable to implement a generic iommu API based both on Intel and AMD
>> iommu for kvm after 2.6.28. What's your opinion?
>> 
> 
> 2.6.27 is out, so anything we do will be for 2.6.29.

Do you mean the VT-d patches which haven't been checked in won't be
pushed into 2.6.28? 

Regards,
Weidong

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-10  7:22                         ` Han, Weidong
@ 2008-10-10  7:32                           ` Avi Kivity
  2008-10-10  7:50                             ` Han, Weidong
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-10-10  7:32 UTC (permalink / raw)
  To: Han, Weidong
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Han, Weidong wrote:
>>
>> 2.6.27 is out, so anything we do will be for 2.6.29.
>>     
>
> Do you mean the VT-d patches which haven't been checked in won't be
> pushed into 2.6.28? 
>   

Yes.  The only exception is the guest interrupt sharing patch, which is 
awaiting testing.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-10  7:32                           ` Avi Kivity
@ 2008-10-10  7:50                             ` Han, Weidong
  0 siblings, 0 replies; 20+ messages in thread
From: Han, Weidong @ 2008-10-10  7:50 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Joerg Roedel, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli, Woodhouse, David, Barnes, Jesse

Avi Kivity wrote:
> Han, Weidong wrote:
>>> 
>>> 2.6.27 is out, so anything we do will be for 2.6.29.
>>> 
>> 
>> Do you mean the VT-d patches which haven't been checked in won't be
>> pushed into 2.6.28? 
>> 
> 
> Yes.  The only exception is the guest interrupt sharing patch, which
> is awaiting testing.
> 

Ok, I see. So big changes on VT-d code is not a problem. I will redesign
the multi-device assignment patch. 

Regards,
Weidong

> --
> I have a truly marvellous patch that fixes the bug which this
> signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest
  2008-10-07 13:29 ` Avi Kivity
  2008-10-08  5:40   ` Han, Weidong
@ 2008-10-29 10:25   ` Joerg Roedel
  1 sibling, 0 replies; 20+ messages in thread
From: Joerg Roedel @ 2008-10-29 10:25 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Han, Weidong, kvm, Amit Shah, Kay, Allen M, Yang, Sheng, benami,
	muli

(sorry for joining this late, I returned from a 3-week vacation this
 monday)

On Tue, Oct 07, 2008 at 03:29:21PM +0200, Avi Kivity wrote:
> Oh, I see it now.  Different devices may need to go under different iommus.
> 
> This really feels like it should be handled by the iommu API.  Users
> shouldn't need to bother with it.
> 
> Joerg, can your dma api handle this?

Yes. The API works in terms of devices and domains. The fact that a
domain may contain devices behind different IOMMUs is hidden to the user
of that API.

Joerg

-- 
           |           AMD Saxony Limited Liability Company & Co. KG
 Operating |         Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System    |                  Register Court Dresden: HRA 4896
 Research  |              General Partner authorized to represent:
 Center    |             AMD Saxony LLC (Wilmington, Delaware, US)
           | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2008-10-29 10:26 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-06  6:38 [PATCH] [RESEND] VT-d: Support multiple device assignment to one guest Han, Weidong
2008-10-07 10:04 ` Zhang, Xiantao
2008-10-07 13:59   ` Avi Kivity
2008-10-08  1:58     ` Zhang, Xiantao
2008-10-07 13:29 ` Avi Kivity
2008-10-08  5:40   ` Han, Weidong
2008-10-08 10:32     ` Avi Kivity
2008-10-08 15:06       ` Han, Weidong
2008-10-08 19:49         ` Avi Kivity
2008-10-09  6:11           ` Han, Weidong
2008-10-09  8:31             ` Avi Kivity
2008-10-09  9:25               ` Han, Weidong
2008-10-09 12:50                 ` Avi Kivity
2008-10-09 14:31                   ` Han, Weidong
     [not found]                   ` <0122C7C995D32147B66BF4F440D3016301CB08EF@pdsmsx415.ccr.corp.intel.com>
2008-10-10  5:50                     ` Han, Weidong
2008-10-10  6:40                       ` Avi Kivity
2008-10-10  7:22                         ` Han, Weidong
2008-10-10  7:32                           ` Avi Kivity
2008-10-10  7:50                             ` Han, Weidong
2008-10-29 10:25   ` Joerg Roedel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).