[PATCH 4/4][VTD] vt-d specific files in KVM

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 4/4][VTD] vt-d specific files in KVM
@ 2008-06-10  0:43 Kay, Allen M
  2008-06-10 10:27 ` Muli Ben-Yehuda
  2008-06-20 18:24 ` Avi Kivity
  0 siblings, 2 replies; 9+ messages in thread
From: Kay, Allen M @ 2008-06-10  0:43 UTC (permalink / raw)
  To: kvm
  Cc: Amit Shah, Muli Ben-Yehuda, Ben-Ami Yassour, Avi Kivity,
	Anthony Liguori, Chris Wright, Han, Weidong

[-- Attachment #1: Type: text/plain, Size: 155 bytes --]

vt-d specific files in KVM for contructing vt-d page tables and
programming vt-d context entries.

Signed-off-by: Allen M. Kay <allen.m.kay@intel.com>

[-- Attachment #2: kvm_vtd.patch --]
[-- Type: application/octet-stream, Size: 7083 bytes --]

diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..634802c
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+#include "vtd.h"
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+	gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct page *page;
+	int i, rc;
+
+	if (!kvm->arch.domain)
+		return -EFAULT;
+
+	printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n",
+		gfn << PAGE_SHIFT);
+	printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %lx\n",
+		gfn_to_pfn(kvm, base_gfn) << PAGE_SHIFT);
+	printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n",
+		npages*PAGE_SIZE);
+
+	for (i = 0; i < npages; i++) {
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (pfn_valid(pfn)) {
+			rc = kvm_intel_iommu_page_mapping(kvm->arch.domain,
+				gfn << PAGE_SHIFT, pfn << PAGE_SHIFT,
+				PAGE_SIZE, DMA_PTE_READ | DMA_PTE_WRITE);
+			if (rc) {
+				page = gfn_to_page(kvm, gfn);
+				put_page(page);
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+				"invalid pfn=%lx\n", pfn);
+			return 0;
+		}
+		gfn++;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_iommu_map_pages);
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+				kvm->memslots[i].npages);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+int kvm_iommu_map_guest(struct kvm *kvm,
+	struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+	struct pci_dev *pdev = NULL;
+
+	printk(KERN_DEBUG "kvm_iommu_map_guest: host bdf = %x:%x:%x\n",
+		pci_pt_dev->host.busnr,
+		PCI_SLOT(pci_pt_dev->host.devfn),
+		PCI_FUNC(pci_pt_dev->host.devfn));
+
+	for_each_pci_dev(pdev) {
+		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
+			(pdev->devfn == pci_pt_dev->host.devfn))
+			goto found;
+	}
+	if (kvm->arch.domain) {
+		kvm_intel_iommu_domain_exit(kvm->arch.domain);
+		kvm->arch.domain = NULL;
+	}
+	return -ENODEV;
+found:
+	kvm->arch.domain = kvm_intel_iommu_domain_alloc(pdev);
+	if (kvm->arch.domain == NULL)
+		printk(KERN_WARN "kvm_iommu_map_guest: domain == NULL\n");
+	else
+		printk(KERN_INFO "kvm_iommu_map_guest: domain = %p\n",
+			kvm->arch.domain);
+	if (kvm_iommu_map_memslots(kvm)) {
+		kvm_iommu_unmap_memslots(kvm);
+		return -EFAULT;
+	}
+	kvm_intel_iommu_context_mapping(kvm->arch.domain, pdev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_iommu_map_guest);
+
+static int kvm_iommu_put_pages(struct kvm *kvm,
+	gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	struct page *page;
+	int i;
+
+	if (!kvm->arch.domain)
+		return -EFAULT;
+
+	printk(KERN_DEBUG "kvm_iommu_put_pages: gpa = %lx\n",
+		gfn << PAGE_SHIFT);
+	printk(KERN_DEBUG "kvm_iommu_put_pages: hpa = %lx\n",
+		gfn_to_pfn(kvm, gfn) << PAGE_SHIFT);
+	printk(KERN_DEBUG "kvm_iommu_put_pages: size = %lx\n",
+		npages*PAGE_SIZE);
+
+	for (i = 0; i < npages; i++) {
+		page = gfn_to_page(kvm, gfn);
+		put_page(page);
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i, rc;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				kvm->memslots[i].npages);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct dmar_domain *domain;
+	struct kvm_pci_pt_dev_list *entry;
+	struct pci_dev *pdev = NULL;
+
+	if (kvm->arch.domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) {
+		printk(KERN_DEBUG "kvm_iommu_unmap_guest: %x:%x:%x\n",
+			entry->pt_dev.host.busnr,
+			PCI_SLOT(entry->pt_dev.host.devfn),
+			PCI_FUNC(entry->pt_dev.host.devfn));
+
+		for_each_pci_dev(pdev) {
+			if ((pdev->bus->number == entry->pt_dev.host.busnr) &&
+				(pdev->devfn == entry->pt_dev.host.devfn))
+				goto found;
+		}
+		return -ENODEV;
+found:
+		if (pdev == NULL) {
+			printk(KERN_ERR "kvm_iommu_unmap_guest:pdev == NULL\n");
+			return -EFAULT;
+		}
+
+		/* detach kvm dmar domain */
+		kvm_intel_iommu_detach_dev(kvm->arch.domain,
+				pdev->bus->number, pdev->devfn);
+
+		/* now restore back linux iommu domain */
+		domain = kvm_intel_iommu_find_domain(pdev);
+		if (domain)
+			kvm_intel_iommu_context_mapping(domain, pdev);
+		else
+			printk(KERN_DEBUG
+				"kvm_iommu_unmap_guest: domain == NULL\n");
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	kvm_intel_iommu_domain_exit(kvm->arch.domain);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_iommu_unmap_guest);
diff --git a/arch/x86/kvm/vtd.h b/arch/x86/kvm/vtd.h
new file mode 100644
index 0000000..d03c15c
--- /dev/null
+++ b/arch/x86/kvm/vtd.h
@@ -0,0 +1,36 @@
+#ifndef __VTD_H
+#define __VTD_H
+
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ */
+
+#define DEBUG
+
+int kvm_intel_iommu_context_mapping(struct dmar_domain *d,
+			struct pci_dev *pdev);
+int kvm_intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+			u64 hpa, size_t size, int prot);
+void kvm_intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
+struct dmar_domain *kvm_intel_iommu_domain_alloc(struct pci_dev *pdev);
+void kvm_intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *kvm_intel_iommu_find_domain(struct pci_dev *pdev);
+
+#endif

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4][VTD] vt-d specific files in KVM
  2008-06-10  0:43 [PATCH 4/4][VTD] vt-d specific files in KVM Kay, Allen M
@ 2008-06-10 10:27 ` Muli Ben-Yehuda
  2008-06-10 14:26   ` Anthony Liguori
  2008-06-20 18:24 ` Avi Kivity
  1 sibling, 1 reply; 9+ messages in thread
From: Muli Ben-Yehuda @ 2008-06-10 10:27 UTC (permalink / raw)
  To: Kay, Allen M
  Cc: kvm, Amit Shah, Ben-Ami Yassour1, Avi Kivity, Anthony Liguori,
	Chris Wright, Han, Weidong

On Mon, Jun 09, 2008 at 05:43:15PM -0700, Kay, Allen M wrote:
> vt-d specific files in KVM for contructing vt-d page tables and
> programming vt-d context entries.

Hi Allen,

Some comments below, patches will follow up.
 
> Signed-off-by: Allen M. Kay <allen.m.kay@intel.com>

>
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..634802c
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,197 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Author: Allen M. Kay <allen.m.kay@intel.com>
> + * Author: Weidong Han <weidong.han@intel.com>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +#include "vtd.h"
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +	gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct page *page;
> +	int i, rc;
> +
> +	if (!kvm->arch.domain)
> +		return -EFAULT;
> +
> +	printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n",
> +		gfn << PAGE_SHIFT);
> +	printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %lx\n",
> +		gfn_to_pfn(kvm, base_gfn) << PAGE_SHIFT);
> +	printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n",
> +		npages*PAGE_SIZE);
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (pfn_valid(pfn)) {

Checking against pfn_valid() isn't enough to differentiate between RAM
and MMIO areas. I think the consensus was that we also need to check
PageReserved(), i.e.,

if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) ...
 
> +			rc = kvm_intel_iommu_page_mapping(kvm->arch.domain,
> +				gfn << PAGE_SHIFT, pfn << PAGE_SHIFT,
> +				PAGE_SIZE, DMA_PTE_READ | DMA_PTE_WRITE);
> +			if (rc) {
> +				page = gfn_to_page(kvm, gfn);
> +				put_page(page);

If we fail to map some of the domain's memory, shouldn't we bail out
of giving it pass-through access at all?

> +			}
> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +				"invalid pfn=%lx\n", pfn);
> +			return 0;

I think we should BUG_ON() (or at least WARN_ON()) if we hit a slot
that has both RAM and an MMIO region. 

> +		}
> +		gfn++;
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_map_pages);
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +				kvm->memslots[i].npages);
> +		if (rc)
> +			return rc;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +	struct kvm_pci_passthrough_dev *pci_pt_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +
> +	printk(KERN_DEBUG "kvm_iommu_map_guest: host bdf = %x:%x:%x\n",
> +		pci_pt_dev->host.busnr,
> +		PCI_SLOT(pci_pt_dev->host.devfn),
> +		PCI_FUNC(pci_pt_dev->host.devfn));
> +
> +	for_each_pci_dev(pdev) {
> +		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
> +			(pdev->devfn == pci_pt_dev->host.devfn))
> +			goto found;

We can stick the `found' stanza in a seperate function and call it
here, which gets rid of one goto.

> +	}
> +	if (kvm->arch.domain) {
> +		kvm_intel_iommu_domain_exit(kvm->arch.domain);
> +		kvm->arch.domain = NULL;
> +	}
> +	return -ENODEV;
> +found:
> +	kvm->arch.domain = kvm_intel_iommu_domain_alloc(pdev);
> +	if (kvm->arch.domain == NULL)
> +		printk(KERN_WARN "kvm_iommu_map_guest: domain == NULL\n");
> +	else
> +		printk(KERN_INFO "kvm_iommu_map_guest: domain = %p\n",
> +			kvm->arch.domain);
> +	if (kvm_iommu_map_memslots(kvm)) {

We shouldn't call map_memslots if domain == NULL.

> +		kvm_iommu_unmap_memslots(kvm);
> +		return -EFAULT;
> +	}
> +	kvm_intel_iommu_context_mapping(kvm->arch.domain, pdev);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_map_guest);
> +
> +static int kvm_iommu_put_pages(struct kvm *kvm,
> +	gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	struct page *page;
> +	int i;
> +
> +	if (!kvm->arch.domain)
> +		return -EFAULT;
> +
> +	printk(KERN_DEBUG "kvm_iommu_put_pages: gpa = %lx\n",
> +		gfn << PAGE_SHIFT);
> +	printk(KERN_DEBUG "kvm_iommu_put_pages: hpa = %lx\n",
> +		gfn_to_pfn(kvm, gfn) << PAGE_SHIFT);
> +	printk(KERN_DEBUG "kvm_iommu_put_pages: size = %lx\n",
> +		npages*PAGE_SIZE);
> +
> +	for (i = 0; i < npages; i++) {
> +		page = gfn_to_page(kvm, gfn);
> +		put_page(page);
> +		gfn++;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +				kvm->memslots[i].npages);
> +		if (rc)
> +			return rc;
> +	}
> +	return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	struct dmar_domain *domain;
> +	struct kvm_pci_pt_dev_list *entry;
> +	struct pci_dev *pdev = NULL;
> +
> +	if (kvm->arch.domain)
> +		return 0;
> +
> +	list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) {
> +		printk(KERN_DEBUG "kvm_iommu_unmap_guest: %x:%x:%x\n",
> +			entry->pt_dev.host.busnr,
> +			PCI_SLOT(entry->pt_dev.host.devfn),
> +			PCI_FUNC(entry->pt_dev.host.devfn));
> +
> +		for_each_pci_dev(pdev) {
> +			if ((pdev->bus->number == entry->pt_dev.host.busnr) &&
> +				(pdev->devfn == entry->pt_dev.host.devfn))
> +				goto found;
> +		}
> +		return -ENODEV;
> +found:

Same comment as above.

Cheers,
Muli

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4][VTD] vt-d specific files in KVM
  2008-06-10 10:27 ` Muli Ben-Yehuda
@ 2008-06-10 14:26   ` Anthony Liguori
  2008-06-10 14:56     ` Muli Ben-Yehuda
  0 siblings, 1 reply; 9+ messages in thread
From: Anthony Liguori @ 2008-06-10 14:26 UTC (permalink / raw)
  To: Muli Ben-Yehuda
  Cc: Kay, Allen M, kvm, Amit Shah, Ben-Ami Yassour1, Avi Kivity,
	Chris Wright, Han, Weidong

Muli Ben-Yehuda wrote:
> On Mon, Jun 09, 2008 at 05:43:15PM -0700, Kay, Allen M wrote:
>   
>> vt-d specific files in KVM for contructing vt-d page tables and
>> programming vt-d context entries.
>>     
>
> Hi Allen,
>
> Some comments below, patches will follow up.
>  
>   
>> Signed-off-by: Allen M. Kay <allen.m.kay@intel.com>
>>     
>
>   
>> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
>> new file mode 100644
>> index 0000000..634802c
>> --- /dev/null
>> +++ b/arch/x86/kvm/vtd.c
>> @@ -0,0 +1,197 @@
>> +/*
>> + * Copyright (c) 2006, Intel Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License along with
>> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
>> + * Place - Suite 330, Boston, MA 02111-1307 USA.
>> + *
>> + * Copyright (C) 2006-2008 Intel Corporation
>> + * Author: Allen M. Kay <allen.m.kay@intel.com>
>> + * Author: Weidong Han <weidong.han@intel.com>
>> + */
>> +
>> +#include <linux/list.h>
>> +#include <linux/kvm_host.h>
>> +#include <linux/pci.h>
>> +#include <linux/dmar.h>
>> +#include <linux/intel-iommu.h>
>> +#include "vtd.h"
>> +
>> +int kvm_iommu_map_pages(struct kvm *kvm,
>> +	gfn_t base_gfn, unsigned long npages)
>> +{
>> +	gfn_t gfn = base_gfn;
>> +	pfn_t pfn;
>> +	struct page *page;
>> +	int i, rc;
>> +
>> +	if (!kvm->arch.domain)
>> +		return -EFAULT;
>> +
>> +	printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n",
>> +		gfn << PAGE_SHIFT);
>> +	printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %lx\n",
>> +		gfn_to_pfn(kvm, base_gfn) << PAGE_SHIFT);
>> +	printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n",
>> +		npages*PAGE_SIZE);
>> +
>> +	for (i = 0; i < npages; i++) {
>> +		pfn = gfn_to_pfn(kvm, gfn);
>> +		if (pfn_valid(pfn)) {
>>     
>
> Checking against pfn_valid() isn't enough to differentiate between RAM
> and MMIO areas. I think the consensus was that we also need to check
> PageReserved(), i.e.,
>
> if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) ...
>   

When checking the error return of gfn_to_pfn(), you should use 
is_error_pfn().  There's no need to differentiate mmio/ram pages in the 
code, the goal is just error checking.

>> +			rc = kvm_intel_iommu_page_mapping(kvm->arch.domain,
>> +				gfn << PAGE_SHIFT, pfn << PAGE_SHIFT,
>> +				PAGE_SIZE, DMA_PTE_READ | DMA_PTE_WRITE);
>> +			if (rc) {
>> +				page = gfn_to_page(kvm, gfn);
>> +				put_page(page);
>>     

kvm_release_pfn_clean() should be used here.

> If we fail to map some of the domain's memory, shouldn't we bail out
> of giving it pass-through access at all?
>
>   
>> +			}
>> +		} else {
>> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
>> +				"invalid pfn=%lx\n", pfn);
>> +			return 0;
>>     
>
> I think we should BUG_ON() (or at least WARN_ON()) if we hit a slot
> that has both RAM and an MMIO region. 
>
>   
>> +		}
>> +		gfn++;
>> +	}
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_iommu_map_pages);
>> +
>> +static int kvm_iommu_map_memslots(struct kvm *kvm)
>> +{
>> +	int i, rc;
>> +	for (i = 0; i < kvm->nmemslots; i++) {
>> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
>> +				kvm->memslots[i].npages);
>> +		if (rc)
>> +			return rc;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
>> +int kvm_iommu_map_guest(struct kvm *kvm,
>> +	struct kvm_pci_passthrough_dev *pci_pt_dev)
>> +{
>> +	struct pci_dev *pdev = NULL;
>> +
>> +	printk(KERN_DEBUG "kvm_iommu_map_guest: host bdf = %x:%x:%x\n",
>> +		pci_pt_dev->host.busnr,
>> +		PCI_SLOT(pci_pt_dev->host.devfn),
>> +		PCI_FUNC(pci_pt_dev->host.devfn));
>> +
>> +	for_each_pci_dev(pdev) {
>> +		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
>> +			(pdev->devfn == pci_pt_dev->host.devfn))
>> +			goto found;
>>     
>
> We can stick the `found' stanza in a seperate function and call it
> here, which gets rid of one goto.
>
>   
>> +	}
>> +	if (kvm->arch.domain) {
>> +		kvm_intel_iommu_domain_exit(kvm->arch.domain);
>> +		kvm->arch.domain = NULL;
>> +	}
>> +	return -ENODEV;
>> +found:
>> +	kvm->arch.domain = kvm_intel_iommu_domain_alloc(pdev);
>> +	if (kvm->arch.domain == NULL)
>> +		printk(KERN_WARN "kvm_iommu_map_guest: domain == NULL\n");
>> +	else
>> +		printk(KERN_INFO "kvm_iommu_map_guest: domain = %p\n",
>> +			kvm->arch.domain);
>> +	if (kvm_iommu_map_memslots(kvm)) {
>>     
>
> We shouldn't call map_memslots if domain == NULL.
>
>   
>> +		kvm_iommu_unmap_memslots(kvm);
>> +		return -EFAULT;
>> +	}
>> +	kvm_intel_iommu_context_mapping(kvm->arch.domain, pdev);
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_iommu_map_guest);
>> +
>> +static int kvm_iommu_put_pages(struct kvm *kvm,
>> +	gfn_t base_gfn, unsigned long npages)
>> +{
>> +	gfn_t gfn = base_gfn;
>> +	struct page *page;
>> +	int i;
>> +
>> +	if (!kvm->arch.domain)
>> +		return -EFAULT;
>> +
>> +	printk(KERN_DEBUG "kvm_iommu_put_pages: gpa = %lx\n",
>> +		gfn << PAGE_SHIFT);
>> +	printk(KERN_DEBUG "kvm_iommu_put_pages: hpa = %lx\n",
>> +		gfn_to_pfn(kvm, gfn) << PAGE_SHIFT);
>> +	printk(KERN_DEBUG "kvm_iommu_put_pages: size = %lx\n",
>> +		npages*PAGE_SIZE);
>> +
>> +	for (i = 0; i < npages; i++) {
>> +		page = gfn_to_page(kvm, gfn);
>> +		put_page(page);
>> +		gfn++;
>>     

Likewise, you should use kvm_release_pfn_dirty() here.

Note, this patch series isn't bisect friendly.  In the third patch, you 
introduce a makefile change for vtd.o but don't introduce the file until 
the fourth patch.  This will break bisection.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4][VTD] vt-d specific files in KVM
  2008-06-10 14:26   ` Anthony Liguori
@ 2008-06-10 14:56     ` Muli Ben-Yehuda
  2008-06-10 15:02       ` Anthony Liguori
  0 siblings, 1 reply; 9+ messages in thread
From: Muli Ben-Yehuda @ 2008-06-10 14:56 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Kay, Allen M, kvm, Amit Shah, Ben-Ami Yassour1, Avi Kivity,
	Chris Wright, Han, Weidong

On Tue, Jun 10, 2008 at 09:26:04AM -0500, Anthony Liguori wrote:

>> Checking against pfn_valid() isn't enough to differentiate between
>> RAM and MMIO areas. I think the consensus was that we also need to
>> check PageReserved(), i.e.,
>>
>> if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) ...
>>   
>
> When checking the error return of gfn_to_pfn(), you should use
> is_error_pfn().  There's no need to differentiate mmio/ram pages in
> the code, the goal is just error checking.

I'd have to check the exact semantics of is_error_pfn() to see if it
fits, since strictly speaking what we are doing is not checking
pfn_to_page() for errors. We need to differentiate between gfns which
represent RAM (which needs to be mapped into the VT-d page tables) and
gfns which don't (e.g, slots which represent an MMIO region), which
should not be mapped in the VT-d page tables.

Cheers,
Muli


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4][VTD] vt-d specific files in KVM
  2008-06-10 14:56     ` Muli Ben-Yehuda
@ 2008-06-10 15:02       ` Anthony Liguori
  2008-06-10 15:15         ` Muli Ben-Yehuda
  0 siblings, 1 reply; 9+ messages in thread
From: Anthony Liguori @ 2008-06-10 15:02 UTC (permalink / raw)
  To: Muli Ben-Yehuda
  Cc: Kay, Allen M, kvm, Amit Shah, Ben-Ami Yassour1, Avi Kivity,
	Chris Wright, Han, Weidong

Muli Ben-Yehuda wrote:
> On Tue, Jun 10, 2008 at 09:26:04AM -0500, Anthony Liguori wrote:
>
>   
>>> Checking against pfn_valid() isn't enough to differentiate between
>>> RAM and MMIO areas. I think the consensus was that we also need to
>>> check PageReserved(), i.e.,
>>>
>>> if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) ...
>>>   
>>>       
>> When checking the error return of gfn_to_pfn(), you should use
>> is_error_pfn().  There's no need to differentiate mmio/ram pages in
>> the code, the goal is just error checking.
>>     
>
> I'd have to check the exact semantics of is_error_pfn() to see if it
> fits, since strictly speaking what we are doing is not checking
> pfn_to_page() for errors. We need to differentiate between gfns which
> represent RAM (which needs to be mapped into the VT-d page tables) and
> gfns which don't (e.g, slots which represent an MMIO region), which
> should not be mapped in the VT-d page tables.
>   

Why?  Wouldn't MMIO pages have to be mapped in the VT-d page table in 
order to support pass-through?  It certainly can't hurt, can it?

At any rate, looking at the code again, the else clause is:

> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +				"invalid pfn=%lx\n", pfn);
> +			return 0;



Which looks like error handling to me.  I don't think it's at all safe 
to assume that a slot is either entirely MMIO or entirely RAM.  You 
could very easily construct a slot that's a mix of both so if this is an 
attempt to skip MMIO slots, it's broken.

Regards,

Anthony Liguori

> Cheers,
> Muli
>
>   


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4][VTD] vt-d specific files in KVM
  2008-06-10 15:02       ` Anthony Liguori
@ 2008-06-10 15:15         ` Muli Ben-Yehuda
  2008-06-10 15:24           ` Anthony Liguori
  0 siblings, 1 reply; 9+ messages in thread
From: Muli Ben-Yehuda @ 2008-06-10 15:15 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Kay, Allen M, kvm, Amit Shah, Ben-Ami Yassour1, Avi Kivity,
	Chris Wright, Han, Weidong

On Tue, Jun 10, 2008 at 10:02:45AM -0500, Anthony Liguori wrote:

> Why?  Wouldn't MMIO pages have to be mapped in the VT-d page table
> in order to support pass-through?  It certainly can't hurt, can it?

By MMIO pages we refer to pages which are mapped (or can mapped) to
device MMIO regions. In other words, they are only relevant for host
memory accesses. VT-d mappings are used for *device* memory
accesses. I can't think of a good reason for a device to try to DMA to
such a page (where would the DMA end? There is no backing RAM), hence
the principal of least surprise says that we shouldn't map such pages
in the IOMMU page tables so that *if* the device tries to DMA to them
we will take an IOMMU fault rather than fail silently or machine check
(I've seen both happen with DMAs to MMIO regions).

> At any rate, looking at the code again, the else clause is:
>
>> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
>> +				"invalid pfn=%lx\n", pfn);
>> +			return 0;
>
>
> Which looks like error handling to me. 

You are right, that snippet should be fixed to just skip non-RAM
gfns.

> I don't think it's at all safe to assume that a slot is either
> entirely MMIO or entirely RAM.  You could very easily construct a
> slot that's a mix of both so if this is an attempt to skip MMIO
> slots, it's broken.

How would suc a slot be constructed with the current code base? (Note
that you need Ben's direct MMIO patches to create an MMIO slot).

Cheers,
Muli

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4][VTD] vt-d specific files in KVM
  2008-06-10 15:15         ` Muli Ben-Yehuda
@ 2008-06-10 15:24           ` Anthony Liguori
  2008-06-10 16:07             ` Muli Ben-Yehuda
  0 siblings, 1 reply; 9+ messages in thread
From: Anthony Liguori @ 2008-06-10 15:24 UTC (permalink / raw)
  To: Muli Ben-Yehuda
  Cc: Kay, Allen M, kvm, Amit Shah, Ben-Ami Yassour1, Avi Kivity,
	Chris Wright, Han, Weidong

Muli Ben-Yehuda wrote:
> On Tue, Jun 10, 2008 at 10:02:45AM -0500, Anthony Liguori wrote:
>
>   
>> Why?  Wouldn't MMIO pages have to be mapped in the VT-d page table
>> in order to support pass-through?  It certainly can't hurt, can it?
>>     
>
> By MMIO pages we refer to pages which are mapped (or can mapped) to
> device MMIO regions. In other words, they are only relevant for host
> memory accesses. VT-d mappings are used for *device* memory
> accesses. I can't think of a good reason for a device to try to DMA to
> such a page (where would the DMA end? There is no backing RAM), hence
> the principal of least surprise says that we shouldn't map such pages
> in the IOMMU page tables so that *if* the device tries to DMA to them
> we will take an IOMMU fault rather than fail silently or machine check
> (I've seen both happen with DMAs to MMIO regions).
>   

If you add the MMIO page to the IOMMU table, then the behavior is going 
to be identical to what occurs on bare metal which IMHO is a good 
thing.  Why jump through hoops to change what may or may not be an error 
condition instead of letting the natural error behavior happen?  There 
may be some weird piece of hardware that relies on this behavior out there.

>> I don't think it's at all safe to assume that a slot is either
>> entirely MMIO or entirely RAM.  You could very easily construct a
>> slot that's a mix of both so if this is an attempt to skip MMIO
>> slots, it's broken.
>>     
>
> How would suc a slot be constructed with the current code base? (Note
> that you need Ben's direct MMIO patches to create an MMIO slot).
>   

There is no such thing as an MMIO slot.  All you would need to do is 
mmap(phys_ram_base + GPA, "/sys/bus/pci/.../region/0", MAP_SHARED | 
MAP_FIXED) and you'd have a mixed slot assuming GPA was within an 
existing RAM slot.  Without MMU-notifiers, you'd have to do this before 
the guest started.  With MMU-notifiers, you can do this during execution 
of the guest.

Regards,

Anthony Liguori

> Cheers,
> Muli
>   


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4][VTD] vt-d specific files in KVM
  2008-06-10 15:24           ` Anthony Liguori
@ 2008-06-10 16:07             ` Muli Ben-Yehuda
  0 siblings, 0 replies; 9+ messages in thread
From: Muli Ben-Yehuda @ 2008-06-10 16:07 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Kay, Allen M, kvm, Amit Shah, Ben-Ami Yassour1, Avi Kivity,
	Chris Wright, Han, Weidong

On Tue, Jun 10, 2008 at 10:24:06AM -0500, Anthony Liguori wrote:

> If you add the MMIO page to the IOMMU table, then the behavior is
> going to be identical to what occurs on bare metal which IMHO is a
> good thing. Why jump through hoops to change what may or may not be
> an error condition instead of letting the natural error behavior
> happen?  There may be some weird piece of hardware that relies on
> this behavior out there.

One potential outcome of this behaviour on bare metal---which has been
observed!---is a machine check. Letting the guest machine check the
host is not a good thing.

> There is no such thing as an MMIO slot.  All you would need to do is
> mmap(phys_ram_base + GPA, "/sys/bus/pci/.../region/0", MAP_SHARED |
> MAP_FIXED) and you'd have a mixed slot assuming GPA was within an
> existing RAM slot.

That doesn't happen with the current code base, but color me
convinced: we'll just continue checking gfns one by one when mapping
them into the IOMMU page tables, skipping any non-ram gfns.

Cheers,
Muli


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4][VTD] vt-d specific files in KVM
  2008-06-10  0:43 [PATCH 4/4][VTD] vt-d specific files in KVM Kay, Allen M
  2008-06-10 10:27 ` Muli Ben-Yehuda
@ 2008-06-20 18:24 ` Avi Kivity
  1 sibling, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2008-06-20 18:24 UTC (permalink / raw)
  To: Kay, Allen M
  Cc: kvm, Amit Shah, Muli Ben-Yehuda, Ben-Ami Yassour, Anthony Liguori,
	Chris Wright, Han, Weidong

Kay, Allen M wrote:
> vt-d specific files in KVM for contructing vt-d page tables and
> programming vt-d context entries.
>
> Signed-off-by: Allen M. Kay <allen.m.kay@intel.com>
>   
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..634802c
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,197 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or 
> modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
> License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License 
> along with
> + * this program; if not, write to the Free Software Foundation, Inc., 
> 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Author: Allen M. Kay <allen.m.kay@intel.com>
> + * Author: Weidong Han <weidong.han@intel.com>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +#include "vtd.h"
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +    gfn_t base_gfn, unsigned long npages)
> +{
> +    gfn_t gfn = base_gfn;
> +    pfn_t pfn;
> +    struct page *page;
> +    int i, rc;
> +
> +    if (!kvm->arch.domain)
> +        return -EFAULT;
> +
> +    printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n",
> +        gfn << PAGE_SHIFT);
> +    printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %lx\n",
> +        gfn_to_pfn(kvm, base_gfn) << PAGE_SHIFT);
> +    printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n",
> +        npages*PAGE_SIZE);
> +
> +    for (i = 0; i < npages; i++) {
> +        pfn = gfn_to_pfn(kvm, gfn);
> +        if (pfn_valid(pfn)) {
> +            rc = kvm_intel_iommu_page_mapping(kvm->arch.domain,
> +                gfn << PAGE_SHIFT, pfn << PAGE_SHIFT,
> +                PAGE_SIZE, DMA_PTE_READ | DMA_PTE_WRITE);
> +            if (rc) {
> +                page = gfn_to_page(kvm, gfn);
> +                put_page(page);

This is racy.  gfn_to_page() can return a different page each time it is 
called.  Instead iommu_map_page() should drop the refcount if it fails.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-06-20 18:24 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-10  0:43 [PATCH 4/4][VTD] vt-d specific files in KVM Kay, Allen M
2008-06-10 10:27 ` Muli Ben-Yehuda
2008-06-10 14:26   ` Anthony Liguori
2008-06-10 14:56     ` Muli Ben-Yehuda
2008-06-10 15:02       ` Anthony Liguori
2008-06-10 15:15         ` Muli Ben-Yehuda
2008-06-10 15:24           ` Anthony Liguori
2008-06-10 16:07             ` Muli Ben-Yehuda
2008-06-20 18:24 ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox