public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* VT-d support for device assignment
@ 2008-08-26  8:55 Amit Shah
  2008-08-26  8:55 ` [PATCH 1/2] VT-d: changes to support KVM Amit Shah
  2008-08-26  9:10 ` VT-d support for device assignment Avi Kivity
  0 siblings, 2 replies; 22+ messages in thread
From: Amit Shah @ 2008-08-26  8:55 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay


The following two patches contain VT-d support for device assignment for KVM guests.

The first patch contains the changes that are required to the generic VT-d code.

The second patch contains the changes to KVM.

Since the last send, I've updated the 2nd patch to expose KVM_CAP_IOMMU and check for the existence of an IOMMU before using one. I've also preserved the 'From' field for both the patches so that the original authors are credited for the patches.

The command line currently should be invoked with the parameter:

-pcidevice dev=00:13.0,dma=iommu

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/2] VT-d: changes to support KVM
  2008-08-26  8:55 VT-d support for device assignment Amit Shah
@ 2008-08-26  8:55 ` Amit Shah
  2008-08-26  8:55   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
  2008-08-26  9:10 ` VT-d support for device assignment Avi Kivity
  1 sibling, 1 reply; 22+ messages in thread
From: Amit Shah @ 2008-08-26  8:55 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Amit Shah

From: Kay, Allen M <allen.m.kay@intel.com>

This patch extends the VT-d driver to support KVM

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
---
 drivers/pci/dmar.c          |    4 +-
 drivers/pci/intel-iommu.c   |  117 ++++++++++++++-
 drivers/pci/intel-iommu.h   |  344 -----------------------------------------
 drivers/pci/iova.c          |    2 +-
 drivers/pci/iova.h          |   52 -------
 include/linux/intel-iommu.h |  355 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/iova.h        |   52 +++++++
 7 files changed, 523 insertions(+), 403 deletions(-)
 delete mode 100644 drivers/pci/intel-iommu.h
 delete mode 100644 drivers/pci/iova.h
 create mode 100644 include/linux/intel-iommu.h
 create mode 100644 include/linux/iova.h

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae..1df28ea 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -26,8 +26,8 @@
 
 #include <linux/pci.h>
 #include <linux/dmar.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8d0e60a..1eefc60 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -20,6 +20,7 @@
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
+#undef DEBUG
 #include <linux/init.h>
 #include <linux/bitmap.h>
 #include <linux/debugfs.h>
@@ -33,8 +34,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
 #include <linux/timer.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -160,7 +161,7 @@ static inline void *alloc_domain_mem(void)
 	return iommu_kmem_cache_alloc(iommu_domain_cache);
 }
 
-static inline void free_domain_mem(void *vaddr)
+static void free_domain_mem(void *vaddr)
 {
 	kmem_cache_free(iommu_domain_cache, vaddr);
 }
@@ -1414,7 +1415,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
  * find_domain
  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
  */
-struct dmar_domain *
+static struct dmar_domain *
 find_domain(struct pci_dev *pdev)
 {
 	struct device_domain_info *info;
@@ -2430,3 +2431,111 @@ int __init intel_iommu_init(void)
 	return 0;
 }
 
+void intel_iommu_domain_exit(struct dmar_domain *domain)
+{
+	u64 end;
+
+	/* Domain 0 is reserved, so dont process it */
+	if (!domain)
+		return;
+
+	end = DOMAIN_MAX_ADDR(domain->gaw);
+	end = end & (~PAGE_MASK_4K);
+
+	/* clear ptes */
+	dma_pte_clear_range(domain, 0, end);
+
+	/* free page tables */
+	dma_pte_free_pagetable(domain, 0, end);
+
+	iommu_free_domain(domain);
+	free_domain_mem(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
+
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+	struct dmar_domain *domain;
+	struct intel_iommu *iommu;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
+		return NULL;
+	}
+
+	iommu = drhd->iommu;
+	if (!iommu) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: iommu == NULL\n");
+		return NULL;
+	}
+	domain = iommu_alloc_domain(iommu);
+	if (!domain) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain == NULL\n");
+		return NULL;
+	}
+	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain_init() failed\n");
+		intel_iommu_domain_exit(domain);
+		return NULL;
+	}
+	return domain;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+
+int intel_iommu_context_mapping(
+	struct dmar_domain *domain, struct pci_dev *pdev)
+{
+	int rc;
+	rc = domain_context_mapping(domain, pdev);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+
+int intel_iommu_page_mapping(
+	struct dmar_domain *domain, dma_addr_t iova,
+	u64 hpa, size_t size, int prot)
+{
+	int rc;
+	rc = domain_page_mapping(domain, iova, hpa, size, prot);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+{
+	detach_domain_for_dev(domain, bus, devfn);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+
+struct dmar_domain *
+intel_iommu_find_domain(struct pci_dev *pdev)
+{
+	return find_domain(pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+
+int intel_iommu_found(void)
+{
+	return g_num_of_iommus;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_found);
+
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+{
+	struct dma_pte *pte;
+	u64 pfn;
+
+	pfn = 0;
+	pte = addr_to_dma_pte(domain, iova);
+
+	if (pte)
+		pfn = dma_pte_addr(*pte);
+
+	return pfn >> PAGE_SHIFT_4K;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
deleted file mode 100644
index afc0ad9..0000000
--- a/drivers/pci/intel-iommu.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Ashok Raj <ashok.raj@intel.com>
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- */
-
-#ifndef _INTEL_IOMMU_H_
-#define _INTEL_IOMMU_H_
-
-#include <linux/types.h>
-#include <linux/msi.h>
-#include <linux/sysdev.h>
-#include "iova.h"
-#include <linux/io.h>
-
-/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
- */
-#define PAGE_SHIFT_4K		(12)
-#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
-
-#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
-#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
-
-/*
- * Intel IOMMU register specification per version 1.0 public spec.
- */
-
-#define	DMAR_VER_REG	0x0	/* Arch version supported by this IOMMU */
-#define	DMAR_CAP_REG	0x8	/* Hardware supported capabilities */
-#define	DMAR_ECAP_REG	0x10	/* Extended capabilities supported */
-#define	DMAR_GCMD_REG	0x18	/* Global command register */
-#define	DMAR_GSTS_REG	0x1c	/* Global status register */
-#define	DMAR_RTADDR_REG	0x20	/* Root entry table */
-#define	DMAR_CCMD_REG	0x28	/* Context command reg */
-#define	DMAR_FSTS_REG	0x34	/* Fault Status register */
-#define	DMAR_FECTL_REG	0x38	/* Fault control register */
-#define	DMAR_FEDATA_REG	0x3c	/* Fault event interrupt data register */
-#define	DMAR_FEADDR_REG	0x40	/* Fault event interrupt addr register */
-#define	DMAR_FEUADDR_REG 0x44	/* Upper address register */
-#define	DMAR_AFLOG_REG	0x58	/* Advanced Fault control */
-#define	DMAR_PMEN_REG	0x64	/* Enable Protected Memory Region */
-#define	DMAR_PLMBASE_REG 0x68	/* PMRR Low addr */
-#define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
-#define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
-#define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
-
-#define OFFSET_STRIDE		(9)
-/*
-#define dmar_readl(dmar, reg) readl(dmar + reg)
-#define dmar_readq(dmar, reg) ({ \
-		u32 lo, hi; \
-		lo = readl(dmar + reg); \
-		hi = readl(dmar + reg + 4); \
-		(((u64) hi) << 32) + lo; })
-*/
-static inline u64 dmar_readq(void __iomem *addr)
-{
-	u32 lo, hi;
-	lo = readl(addr);
-	hi = readl(addr + 4);
-	return (((u64) hi) << 32) + lo;
-}
-
-static inline void dmar_writeq(void __iomem *addr, u64 val)
-{
-	writel((u32)val, addr);
-	writel((u32)(val >> 32), addr + 4);
-}
-
-#define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
-#define DMAR_VER_MINOR(v)		((v) & 0x0f)
-
-/*
- * Decoding Capability Register
- */
-#define cap_read_drain(c)	(((c) >> 55) & 1)
-#define cap_write_drain(c)	(((c) >> 54) & 1)
-#define cap_max_amask_val(c)	(((c) >> 48) & 0x3f)
-#define cap_num_fault_regs(c)	((((c) >> 40) & 0xff) + 1)
-#define cap_pgsel_inv(c)	(((c) >> 39) & 1)
-
-#define cap_super_page_val(c)	(((c) >> 34) & 0xf)
-#define cap_super_offset(c)	(((find_first_bit(&cap_super_page_val(c), 4)) \
-					* OFFSET_STRIDE) + 21)
-
-#define cap_fault_reg_offset(c)	((((c) >> 24) & 0x3ff) * 16)
-#define cap_max_fault_reg_offset(c) \
-	(cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
-
-#define cap_zlr(c)		(((c) >> 22) & 1)
-#define cap_isoch(c)		(((c) >> 23) & 1)
-#define cap_mgaw(c)		((((c) >> 16) & 0x3f) + 1)
-#define cap_sagaw(c)		(((c) >> 8) & 0x1f)
-#define cap_caching_mode(c)	(((c) >> 7) & 1)
-#define cap_phmr(c)		(((c) >> 6) & 1)
-#define cap_plmr(c)		(((c) >> 5) & 1)
-#define cap_rwbf(c)		(((c) >> 4) & 1)
-#define cap_afl(c)		(((c) >> 3) & 1)
-#define cap_ndoms(c)		(((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
-/*
- * Extended Capability Register
- */
-
-#define ecap_niotlb_iunits(e)	((((e) >> 24) & 0xff) + 1)
-#define ecap_iotlb_offset(e) 	((((e) >> 8) & 0x3ff) * 16)
-#define ecap_max_iotlb_offset(e) \
-	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
-#define ecap_coherent(e)	((e) & 0x1)
-
-
-/* IOTLB_REG */
-#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
-#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
-#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
-#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
-#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
-#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
-#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
-#define DMA_TLB_DID(id)	(((u64)((id) & 0xffff)) << 32)
-#define DMA_TLB_IVT (((u64)1) << 63)
-#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
-#define DMA_TLB_MAX_SIZE (0x3f)
-
-/* PMEN_REG */
-#define DMA_PMEN_EPM (((u32)1)<<31)
-#define DMA_PMEN_PRS (((u32)1)<<0)
-
-/* GCMD_REG */
-#define DMA_GCMD_TE (((u32)1) << 31)
-#define DMA_GCMD_SRTP (((u32)1) << 30)
-#define DMA_GCMD_SFL (((u32)1) << 29)
-#define DMA_GCMD_EAFL (((u32)1) << 28)
-#define DMA_GCMD_WBF (((u32)1) << 27)
-
-/* GSTS_REG */
-#define DMA_GSTS_TES (((u32)1) << 31)
-#define DMA_GSTS_RTPS (((u32)1) << 30)
-#define DMA_GSTS_FLS (((u32)1) << 29)
-#define DMA_GSTS_AFLS (((u32)1) << 28)
-#define DMA_GSTS_WBFS (((u32)1) << 27)
-
-/* CCMD_REG */
-#define DMA_CCMD_ICC (((u64)1) << 63)
-#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
-#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
-#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
-#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
-#define DMA_CCMD_MASK_NOBIT 0
-#define DMA_CCMD_MASK_1BIT 1
-#define DMA_CCMD_MASK_2BIT 2
-#define DMA_CCMD_MASK_3BIT 3
-#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
-#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
-
-/* FECTL_REG */
-#define DMA_FECTL_IM (((u32)1) << 31)
-
-/* FSTS_REG */
-#define DMA_FSTS_PPF ((u32)2)
-#define DMA_FSTS_PFO ((u32)1)
-#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
-
-/* FRCD_REG, 32 bits access */
-#define DMA_FRCD_F (((u32)1) << 31)
-#define dma_frcd_type(d) ((d >> 30) & 1)
-#define dma_frcd_fault_reason(c) (c & 0xff)
-#define dma_frcd_source_id(c) (c & 0xffff)
-#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
-
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	val;
-	u64	rsvd1;
-};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-	return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-	root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-	root->val |= value & PAGE_MASK_4K;
-}
-
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-	return (struct context_entry *)
-		(root_present(root)?phys_to_virt(
-		root->val & PAGE_MASK_4K):
-		NULL);
-}
-
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-	do { \
-		(c).lo &= (((u64)-1) << 4) | 3; \
-		(c).lo |= ((val) & 3) << 2; \
-	} while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
-
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
-	u64 val;
-};
-#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
-
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
-
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
-#define dma_set_pte_addr(p, addr) do {\
-		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
-
-struct intel_iommu;
-
-struct dmar_domain {
-	int	id;			/* domain id */
-	struct intel_iommu *iommu;	/* back pointer to owning iommu */
-
-	struct list_head devices; 	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
-
-	struct dma_pte	*pgd;		/* virtual address */
-	spinlock_t	mapping_lock;	/* page table lock */
-	int		gaw;		/* max guest address width */
-
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
-
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-	int		flags;
-};
-
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus numer */
-	u8 devfn;		/* PCI devfn number */
-	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
-	struct dmar_domain *domain; /* pointer to domain */
-};
-
-extern int init_dmars(void);
-
-struct intel_iommu {
-	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
-	u64		cap;
-	u64		ecap;
-	unsigned long 	*domain_ids; /* bitmap of domains */
-	struct dmar_domain **domains; /* ptr to domains */
-	int		seg;
-	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
-	spinlock_t	lock; /* protect context, domain ids */
-	spinlock_t	register_lock; /* protect register handling */
-	struct root_entry *root_entry; /* virtual address */
-
-	unsigned int irq;
-	unsigned char name[7];    /* Device Name */
-	struct msi_msg saved_msg;
-	struct sys_device sysdev;
-};
-
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
-{
-	return;
-}
-#endif /* !CONFIG_DMAR_GFX_WA */
-
-#endif
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 3ef4ac0..2287116 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -7,7 +7,7 @@
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
-#include "iova.h"
+#include <linux/iova.h>
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h
deleted file mode 100644
index 228f6c9..0000000
--- a/drivers/pci/iova.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This file is released under the GPLv2.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- *
- */
-
-#ifndef _IOVA_H_
-#define _IOVA_H_
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/rbtree.h>
-#include <linux/dma-mapping.h>
-
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN		(1)
-
-/* iova structure */
-struct iova {
-	struct rb_node	node;
-	unsigned long	pfn_hi; /* IOMMU dish out addr hi */
-	unsigned long	pfn_lo; /* IOMMU dish out addr lo */
-};
-
-/* holds all the iova translations for a domain */
-struct iova_domain {
-	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
-	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
-	struct rb_root	rbroot;		/* iova domain rbtree root */
-	struct rb_node	*cached32_node; /* Save last alloced node */
-	unsigned long	dma_32bit_pfn;
-};
-
-struct iova *alloc_iova_mem(void);
-void free_iova_mem(struct iova *iova);
-void free_iova(struct iova_domain *iovad, unsigned long pfn);
-void __free_iova(struct iova_domain *iovad, struct iova *iova);
-struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
-	unsigned long limit_pfn,
-	bool size_aligned);
-struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
-	unsigned long pfn_hi);
-void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
-void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
-struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
-void put_iova_domain(struct iova_domain *iovad);
-
-#endif
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
new file mode 100644
index 0000000..1490fc0
--- /dev/null
+++ b/include/linux/intel-iommu.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Ashok Raj <ashok.raj@intel.com>
+ * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ */
+
+#ifndef _INTEL_IOMMU_H_
+#define _INTEL_IOMMU_H_
+
+#include <linux/types.h>
+#include <linux/msi.h>
+#include <linux/sysdev.h>
+#include "iova.h"
+#include <linux/io.h>
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K		(12)
+#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
+
+/*
+ * Intel IOMMU register specification per version 1.0 public spec.
+ */
+
+#define	DMAR_VER_REG	0x0	/* Arch version supported by this IOMMU */
+#define	DMAR_CAP_REG	0x8	/* Hardware supported capabilities */
+#define	DMAR_ECAP_REG	0x10	/* Extended capabilities supported */
+#define	DMAR_GCMD_REG	0x18	/* Global command register */
+#define	DMAR_GSTS_REG	0x1c	/* Global status register */
+#define	DMAR_RTADDR_REG	0x20	/* Root entry table */
+#define	DMAR_CCMD_REG	0x28	/* Context command reg */
+#define	DMAR_FSTS_REG	0x34	/* Fault Status register */
+#define	DMAR_FECTL_REG	0x38	/* Fault control register */
+#define	DMAR_FEDATA_REG	0x3c	/* Fault event interrupt data register */
+#define	DMAR_FEADDR_REG	0x40	/* Fault event interrupt addr register */
+#define	DMAR_FEUADDR_REG 0x44	/* Upper address register */
+#define	DMAR_AFLOG_REG	0x58	/* Advanced Fault control */
+#define	DMAR_PMEN_REG	0x64	/* Enable Protected Memory Region */
+#define	DMAR_PLMBASE_REG 0x68	/* PMRR Low addr */
+#define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
+#define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
+#define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
+
+#define OFFSET_STRIDE		(9)
+/*
+#define dmar_readl(dmar, reg) readl(dmar + reg)
+#define dmar_readq(dmar, reg) ({ \
+		u32 lo, hi; \
+		lo = readl(dmar + reg); \
+		hi = readl(dmar + reg + 4); \
+		(((u64) hi) << 32) + lo; })
+*/
+static inline u64 dmar_readq(void __iomem *addr)
+{
+	u32 lo, hi;
+	lo = readl(addr);
+	hi = readl(addr + 4);
+	return (((u64) hi) << 32) + lo;
+}
+
+static inline void dmar_writeq(void __iomem *addr, u64 val)
+{
+	writel((u32)val, addr);
+	writel((u32)(val >> 32), addr + 4);
+}
+
+#define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
+#define DMAR_VER_MINOR(v)		((v) & 0x0f)
+
+/*
+ * Decoding Capability Register
+ */
+#define cap_read_drain(c)	(((c) >> 55) & 1)
+#define cap_write_drain(c)	(((c) >> 54) & 1)
+#define cap_max_amask_val(c)	(((c) >> 48) & 0x3f)
+#define cap_num_fault_regs(c)	((((c) >> 40) & 0xff) + 1)
+#define cap_pgsel_inv(c)	(((c) >> 39) & 1)
+
+#define cap_super_page_val(c)	(((c) >> 34) & 0xf)
+#define cap_super_offset(c)	(((find_first_bit(&cap_super_page_val(c), 4)) \
+					* OFFSET_STRIDE) + 21)
+
+#define cap_fault_reg_offset(c)	((((c) >> 24) & 0x3ff) * 16)
+#define cap_max_fault_reg_offset(c) \
+	(cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
+
+#define cap_zlr(c)		(((c) >> 22) & 1)
+#define cap_isoch(c)		(((c) >> 23) & 1)
+#define cap_mgaw(c)		((((c) >> 16) & 0x3f) + 1)
+#define cap_sagaw(c)		(((c) >> 8) & 0x1f)
+#define cap_caching_mode(c)	(((c) >> 7) & 1)
+#define cap_phmr(c)		(((c) >> 6) & 1)
+#define cap_plmr(c)		(((c) >> 5) & 1)
+#define cap_rwbf(c)		(((c) >> 4) & 1)
+#define cap_afl(c)		(((c) >> 3) & 1)
+#define cap_ndoms(c)		(((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
+/*
+ * Extended Capability Register
+ */
+
+#define ecap_niotlb_iunits(e)	((((e) >> 24) & 0xff) + 1)
+#define ecap_iotlb_offset(e) 	((((e) >> 8) & 0x3ff) * 16)
+#define ecap_max_iotlb_offset(e) \
+	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
+#define ecap_coherent(e)	((e) & 0x1)
+
+
+/* IOTLB_REG */
+#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
+#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
+#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
+#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
+#define DMA_TLB_DID(id)	(((u64)((id) & 0xffff)) << 32)
+#define DMA_TLB_IVT (((u64)1) << 63)
+#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_TLB_MAX_SIZE (0x3f)
+
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
+/* GCMD_REG */
+#define DMA_GCMD_TE (((u32)1) << 31)
+#define DMA_GCMD_SRTP (((u32)1) << 30)
+#define DMA_GCMD_SFL (((u32)1) << 29)
+#define DMA_GCMD_EAFL (((u32)1) << 28)
+#define DMA_GCMD_WBF (((u32)1) << 27)
+
+/* GSTS_REG */
+#define DMA_GSTS_TES (((u32)1) << 31)
+#define DMA_GSTS_RTPS (((u32)1) << 30)
+#define DMA_GSTS_FLS (((u32)1) << 29)
+#define DMA_GSTS_AFLS (((u32)1) << 28)
+#define DMA_GSTS_WBFS (((u32)1) << 27)
+
+/* CCMD_REG */
+#define DMA_CCMD_ICC (((u64)1) << 63)
+#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
+#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
+#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
+#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
+#define DMA_CCMD_MASK_NOBIT 0
+#define DMA_CCMD_MASK_1BIT 1
+#define DMA_CCMD_MASK_2BIT 2
+#define DMA_CCMD_MASK_3BIT 3
+#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
+#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
+
+/* FECTL_REG */
+#define DMA_FECTL_IM (((u32)1) << 31)
+
+/* FSTS_REG */
+#define DMA_FSTS_PPF ((u32)2)
+#define DMA_FSTS_PFO ((u32)1)
+#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
+
+/* FRCD_REG, 32 bits access */
+#define DMA_FRCD_F (((u32)1) << 31)
+#define dma_frcd_type(d) ((d >> 30) & 1)
+#define dma_frcd_fault_reason(c) (c & 0xff)
+#define dma_frcd_source_id(c) (c & 0xffff)
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64	val;
+	u64	rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+	return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+	root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+	root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+	return (struct context_entry *)
+		(root_present(root)?phys_to_virt(
+		root->val & PAGE_MASK_4K):
+		NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi &  7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+	do { \
+		(c).lo &= (((u64)-1) << 4) | 3; \
+		(c).lo |= ((val) & 3) << 2; \
+	} while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+	u64 val;
+};
+#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+	int	id;			/* domain id */
+	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+
+	struct list_head devices; 	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	spinlock_t	mapping_lock;	/* page table lock */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+	int		flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus numer */
+	u8 devfn;		/* PCI devfn number */
+	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+
+struct intel_iommu {
+	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
+	u64		cap;
+	u64		ecap;
+	unsigned long 	*domain_ids; /* bitmap of domains */
+	struct dmar_domain **domains; /* ptr to domains */
+	int		seg;
+	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+	spinlock_t	lock; /* protect context, domain ids */
+	spinlock_t	register_lock; /* protect register handling */
+	struct root_entry *root_entry; /* virtual address */
+
+	unsigned int irq;
+	unsigned char name[7];    /* Device Name */
+	struct msi_msg saved_msg;
+	struct sys_device sysdev;
+};
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+	return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+void intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
+int intel_iommu_context_mapping(struct dmar_domain *domain,
+				struct pci_dev *pdev);
+int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+			     u64 hpa, size_t size, int prot);
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
+struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+int intel_iommu_found(void);
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+
+#endif
diff --git a/include/linux/iova.h b/include/linux/iova.h
new file mode 100644
index 0000000..228f6c9
--- /dev/null
+++ b/include/linux/iova.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *
+ */
+
+#ifndef _IOVA_H_
+#define _IOVA_H_
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/dma-mapping.h>
+
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN		(1)
+
+/* iova structure */
+struct iova {
+	struct rb_node	node;
+	unsigned long	pfn_hi; /* IOMMU dish out addr hi */
+	unsigned long	pfn_lo; /* IOMMU dish out addr lo */
+};
+
+/* holds all the iova translations for a domain */
+struct iova_domain {
+	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
+	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
+	struct rb_root	rbroot;		/* iova domain rbtree root */
+	struct rb_node	*cached32_node; /* Save last alloced node */
+	unsigned long	dma_32bit_pfn;
+};
+
+struct iova *alloc_iova_mem(void);
+void free_iova_mem(struct iova *iova);
+void free_iova(struct iova_domain *iovad, unsigned long pfn);
+void __free_iova(struct iova_domain *iovad, struct iova *iova);
+struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
+	unsigned long limit_pfn,
+	bool size_aligned);
+struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
+	unsigned long pfn_hi);
+void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
+void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
+void put_iova_domain(struct iova_domain *iovad);
+
+#endif
-- 
1.5.4.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26  8:55 ` [PATCH 1/2] VT-d: changes to support KVM Amit Shah
@ 2008-08-26  8:55   ` Amit Shah
  2008-08-26 10:28     ` Zhang, Xiantao
  2008-09-03 16:52     ` Amit Shah
  0 siblings, 2 replies; 22+ messages in thread
From: Amit Shah @ 2008-08-26  8:55 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Amit Shah

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  203 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 266 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..4336769
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			  gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			rc = intel_iommu_page_mapping(domain,
+						      gfn_to_gpa(gfn),
+						      pfn_to_hpa(pfn),
+						      PAGE_SIZE,
+						      DMA_PTE_READ |
+						      DMA_PTE_WRITE);
+			if (rc) {
+				kvm_release_pfn_clean(pfn);
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				return rc;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			return 0;
+		}
+
+		gfn++;
+	}
+	return 0;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc) {
+			up_read(&kvm->slots_lock);
+			return rc;
+		}
+	}
+	up_read(&kvm->slots_lock);
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int rc;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "intel iommu not found\n");
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+
+	rc = kvm_iommu_map_memslots(kvm);
+	if (rc)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					 pdev);
+	if (rc) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return rc;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		for_each_pci_dev(pdev) {
+			if ((pdev->bus->number == entry->host_busnr) &&
+			    (pdev->devfn == entry->host_devfn))
+				break;
+		}
+
+		if (pdev == NULL)
+			return -ENODEV;
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain,
+				       pdev->bus->number, pdev->devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bfc7c33..38ab48b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -276,9 +277,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1145,6 +1155,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4264,6 +4277,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 982b6b2..fcc8088 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -513,6 +514,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0309571..191bfe1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out_free;
+
 	return 0;
 
 out_free:
-- 
1.5.4.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: VT-d support for device assignment
  2008-08-26  8:55 VT-d support for device assignment Amit Shah
  2008-08-26  8:55 ` [PATCH 1/2] VT-d: changes to support KVM Amit Shah
@ 2008-08-26  9:10 ` Avi Kivity
  2008-08-26 14:11   ` Amit Shah
  1 sibling, 1 reply; 22+ messages in thread
From: Avi Kivity @ 2008-08-26  9:10 UTC (permalink / raw)
  To: Amit Shah
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay

Amit Shah wrote:
> The following two patches contain VT-d support for device assignment for KVM guests.
>
> The first patch contains the changes that are required to the generic VT-d code.
>
> The second patch contains the changes to KVM.
>
> Since the last send, I've updated the 2nd patch to expose KVM_CAP_IOMMU and check for the existence of an IOMMU before using one. I've also preserved the 'From' field for both the patches so that the original authors are credited for the patches.
>
> The command line currently should be invoked with the parameter:
>
> -pcidevice dev=00:13.0,dma=iommu
>   

Why not default to iommu if one is available?

btw, -usbdevice prefixes host devices with host:, no?  maybe adopt the 
convention for -pcidevice?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26  8:55   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
@ 2008-08-26 10:28     ` Zhang, Xiantao
  2008-08-26 10:35       ` Amit Shah
  2008-09-03 16:52     ` Amit Shah
  1 sibling, 1 reply; 22+ messages in thread
From: Zhang, Xiantao @ 2008-08-26 10:28 UTC (permalink / raw)
  To: Amit Shah, avi
  Cc: kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to avoid
future code move.  
Thanks
Xiantao

Amit Shah wrote:
> From: Ben-Ami Yassour <benami@il.ibm.com>
> 
> Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>
> 
> This patch enables PCI device assignment based on VT-d support.
> When a device is assigned to the guest, the guest memory is pinned and
> the mapping is updated in the VT-d IOMMU.
> 
> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
> and also control enable/disable from userspace]
> 
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> ---
>  arch/x86/kvm/Makefile      |    3 +
>  arch/x86/kvm/vtd.c         |  203
>  ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c     
>  |   14 +++ include/asm-x86/kvm_host.h |    3 +
>  include/linux/kvm.h        |    3 +
>  include/linux/kvm_host.h   |   32 +++++++
>  virt/kvm/kvm_main.c        |    9 ++-
>  7 files changed, 266 insertions(+), 1 deletions(-)
>  create mode 100644 arch/x86/kvm/vtd.c
> 
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index d0e940b..3072b17 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
> 
>  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o
>  	lapic.o \ i8254.o
> +ifeq ($(CONFIG_DMAR),y)
> +kvm-objs += vtd.o
> +endif
>  obj-$(CONFIG_KVM) += kvm.o
>  kvm-intel-objs = vmx.o
>  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..4336769
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,203 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it + * under the terms and conditions of the GNU General
> Public License, + * version 2, as published by the Free Software
> Foundation. + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> General Public License for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with + * this program; if not, write to the Free Software
> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA
> 02111-1307 USA. + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Copyright IBM Corporation, 2008
> + * Author: Allen M. Kay <allen.m.kay@intel.com>
> + * Author: Weidong Han <weidong.han@intel.com>
> + * Author: Ben-Ami Yassour <benami@il.ibm.com>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +			  gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	int i, rc;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	for (i = 0; i < npages; i++) {
> +		/* check if already mapped */
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		if (pfn && !is_mmio_pfn(pfn))
> +			continue;
> +
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (!is_mmio_pfn(pfn)) {
> +			rc = intel_iommu_page_mapping(domain,
> +						      gfn_to_gpa(gfn),
> +						      pfn_to_hpa(pfn),
> +						      PAGE_SIZE,
> +						      DMA_PTE_READ |
> +						      DMA_PTE_WRITE);
> +			if (rc) {
> +				kvm_release_pfn_clean(pfn);
> +				printk(KERN_DEBUG "kvm_iommu_map_pages:"
> +				       "iommu failed to map pfn=%lx\n",
pfn);
> +				return rc;
> +			}
> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +			       "invalid pfn=%lx\n", pfn);
> +			return 0;
> +		}
> +
> +		gfn++;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc) {
> +			up_read(&kvm->slots_lock);
> +			return rc;
> +		}
> +	}
> +	up_read(&kvm->slots_lock);
> +	return 0;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +	int rc;
> +
> +	if (!intel_iommu_found()) {
> +		printk(KERN_ERR "intel iommu not found\n");
> +		return -ENODEV;
> +	}
> +
> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> +	       assigned_dev->host_busnr,
> +	       PCI_SLOT(assigned_dev->host_devfn),
> +	       PCI_FUNC(assigned_dev->host_devfn));
> +
> +	pdev = assigned_dev->dev;
> +
> +	if (pdev == NULL) {
> +		if (kvm->arch.intel_iommu_domain) {
> +
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> +			kvm->arch.intel_iommu_domain = NULL;
> +		}
> +		return -ENODEV;
> +	}
> +
> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> +
> +	rc = kvm_iommu_map_memslots(kvm);
> +	if (rc)
> +		goto out_unmap;
> +
> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> +			       pdev->bus->number, pdev->devfn);
> +
> +	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> +					 pdev);
> +	if (rc) {
> +		printk(KERN_ERR "Domain context map for %s failed",
> +		       pci_name(pdev));
> +		goto out_unmap;
> +	}
> +	return 0;
> +
> +out_unmap:
> +	kvm_iommu_unmap_memslots(kvm);
> +	return rc;
> +}
> +
> +static void kvm_iommu_put_pages(struct kvm *kvm,
> +			       gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +	int i;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		kvm_release_pfn_clean(pfn);
> +		gfn++;
> +	}
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i;
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +				    kvm->memslots[i].npages);
> +	}
> +	up_read(&kvm->slots_lock);
> +
> +	return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	struct kvm_assigned_dev_kernel *entry;
> +	struct pci_dev *pdev = NULL;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
> +		       entry->host_busnr,
> +		       PCI_SLOT(entry->host_devfn),
> +		       PCI_FUNC(entry->host_devfn));
> +
> +		for_each_pci_dev(pdev) {
> +			if ((pdev->bus->number == entry->host_busnr) &&
> +			    (pdev->devfn == entry->host_devfn))
> +				break;
> +		}
> +
> +		if (pdev == NULL)
> +			return -ENODEV;
> +
> +		/* detach kvm dmar domain */
> +		intel_iommu_detach_dev(domain,
> +				       pdev->bus->number, pdev->devfn);
> +	}
> +	kvm_iommu_unmap_memslots(kvm);
> +	intel_iommu_domain_exit(domain);
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bfc7c33..38ab48b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -35,6 +35,7 @@
>  #include <linux/module.h>
>  #include <linux/mman.h>
>  #include <linux/highmem.h>
> +#include <linux/intel-iommu.h>
> 
>  #include <asm/uaccess.h>
>  #include <asm/msr.h>
> @@ -276,9 +277,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm
> *kvm, 
> 
>  	list_add(&match->list, &kvm->arch.assigned_dev_head);
> 
> +	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
> +		r = kvm_iommu_map_guest(kvm, match);
> +		if (r)
> +			goto out_list_del;
> +	}
> +
>  out:
>  	mutex_unlock(&kvm->lock);
>  	return r;
> +out_list_del:
> +	list_del(&match->list);
> +	pci_release_regions(dev);
>  out_disable:
>  	pci_disable_device(dev);
>  out_put:
> @@ -1145,6 +1155,9 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_PV_MMU:
>  		r = !tdp_enabled;
>  		break;
> +	case KVM_CAP_IOMMU:
> +		r = intel_iommu_found();
> +		break;
>  	default:
>  		r = 0;
>  		break;
> @@ -4264,6 +4277,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
> 
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +	kvm_iommu_unmap_guest(kvm);
>  	kvm_free_assigned_devices(kvm);
>  	kvm_free_pit(kvm);
>  	kfree(kvm->arch.vpic);
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 982b6b2..fcc8088 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -364,6 +364,7 @@ struct kvm_arch{
>  	 */
>  	struct list_head active_mmu_pages;
>  	struct list_head assigned_dev_head;
> +	struct dmar_domain *intel_iommu_domain;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> @@ -513,6 +514,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu,
>  gpa_t gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long
>  		  bytes, gpa_t addr, unsigned long *ret);
> 
> +int is_mmio_pfn(pfn_t pfn);
> +
>  extern bool tdp_enabled;
> 
>  enum emulation_result {
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ef4bc6f..4269be1 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -384,6 +384,7 @@ struct kvm_trace_rec {
>  #define KVM_CAP_COALESCED_MMIO 15
>  #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected
>  in guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17
> +#define KVM_CAP_IOMMU 18
> 
>  /*
>   * ioctls for VM fds
> @@ -495,4 +496,6 @@ struct kvm_assigned_irq {
>  	__u32 flags;
>  };
> 
> +#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> +
>  #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a18aaad..b703890 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
> 
> +#ifdef CONFIG_DMAR
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +			unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +#else /* CONFIG_DMAR */
> +static inline int kvm_iommu_map_pages(struct kvm *kvm,
> +				      gfn_t base_gfn,
> +				      unsigned long npages)
> +{
> +	return 0;
> +}
> +
> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
> +				      struct kvm_assigned_dev_kernel
> +				      *assigned_dev)
> +{
> +	return -ENODEV;
> +}
> +
> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_DMAR */
> +
>  static inline void kvm_guest_enter(void)
>  {
>  	account_system_vtime(current);
> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>  	return (gpa_t)gfn << PAGE_SHIFT;
>  }
> 
> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
> +{
> +	return (hpa_t)pfn << PAGE_SHIFT;
> +}
> +
>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
>  	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0309571..191bfe1 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -41,6 +41,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/mman.h>
>  #include <linux/swap.h>
> +#include <linux/intel-iommu.h>
> 
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>  	return likely(n >= 0 && n < KVM_MAX_VCPUS);
>  }
> 
> -static inline int is_mmio_pfn(pfn_t pfn)
> +inline int is_mmio_pfn(pfn_t pfn)
>  {
>  	if (pfn_valid(pfn))
>  		return PageReserved(pfn_to_page(pfn));
> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  	}
> 
>  	kvm_free_physmem_slot(&old, &new);
> +
> +	/* map the pages in iommu page table */
> +	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
> +	if (r)
> +		goto out_free;
> +
>  	return 0;
> 
>  out_free:
> --
> 1.5.4.3


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:28     ` Zhang, Xiantao
@ 2008-08-26 10:35       ` Amit Shah
  2008-08-26 10:42         ` Zhang, Xiantao
  0 siblings, 1 reply; 22+ messages in thread
From: Amit Shah @ 2008-08-26 10:35 UTC (permalink / raw)
  To: Zhang, Xiantao
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

* On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to avoid
> future code move.

As of now, device assignment resides inside the x86 directory and is only 
tested in x86 environment. Once we support ia64, we'll have a lot of files 
moving anyway.

However, I don't have any preference for the location of vtd.c; depends on 
what Avi thinks.

Amit

^ permalink raw reply	[flat|nested] 22+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:35       ` Amit Shah
@ 2008-08-26 10:42         ` Zhang, Xiantao
  2008-08-26 10:57           ` Amit Shah
  2008-08-26 14:41           ` Avi Kivity
  0 siblings, 2 replies; 22+ messages in thread
From: Zhang, Xiantao @ 2008-08-26 10:42 UTC (permalink / raw)
  To: Amit Shah
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

Amit Shah wrote:
> * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
>> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
>> avoid future code move.
> 
> As of now, device assignment resides inside the x86 directory and is
> only tested in x86 environment. Once we support ia64, we'll have a
> lot of files moving anyway.

Just a suggestion.  Even if put it @ virt/kvm, we still can make it only
compiled with x86 before eanbling it for ia64. :)
Have you considered the cross-arch support in the current code?

> However, I don't have any preference for the location of vtd.c;
> depends on what Avi thinks.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:42         ` Zhang, Xiantao
@ 2008-08-26 10:57           ` Amit Shah
  2008-08-26 11:04             ` Zhang, Xiantao
  2008-08-26 14:41           ` Avi Kivity
  1 sibling, 1 reply; 22+ messages in thread
From: Amit Shah @ 2008-08-26 10:57 UTC (permalink / raw)
  To: Zhang, Xiantao
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

* On Tuesday 26 Aug 2008 16:12:56 Zhang, Xiantao wrote:
> Amit Shah wrote:
> > * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
> >> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
> >> avoid future code move.
> >
> > As of now, device assignment resides inside the x86 directory and is
> > only tested in x86 environment. Once we support ia64, we'll have a
> > lot of files moving anyway.
>
> Just a suggestion.  Even if put it @ virt/kvm, we still can make it only
> compiled with x86 before eanbling it for ia64. :)

Sure; I'm fine with that :-)

> Have you considered the cross-arch support in the current code?

I've not, since I don't know much about ia64, but Allen had mentioned that it 
shouldn't take much effort to port the current code to ia64.

Amit

^ permalink raw reply	[flat|nested] 22+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:57           ` Amit Shah
@ 2008-08-26 11:04             ` Zhang, Xiantao
  0 siblings, 0 replies; 22+ messages in thread
From: Zhang, Xiantao @ 2008-08-26 11:04 UTC (permalink / raw)
  To: Amit Shah
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Han, Weidong, Kay, Allen M

Amit Shah wrote:
> * On Tuesday 26 Aug 2008 16:12:56 Zhang, Xiantao wrote:
>> Amit Shah wrote:
>>> * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
>>>> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
>>>> avoid future code move.
>>> 
>>> As of now, device assignment resides inside the x86 directory and is
>>> only tested in x86 environment. Once we support ia64, we'll have a
>>> lot of files moving anyway.
>> 
>> Just a suggestion.  Even if put it @ virt/kvm, we still can make it
>> only compiled with x86 before eanbling it for ia64. :)
> 
> Sure; I'm fine with that :-)

>> Have you considered the cross-arch support in the current code?
> 
> I've not, since I don't know much about ia64, but Allen had mentioned
> that it shouldn't take much effort to port the current code to ia64.

Okay. Thanks for your explanation! 
Xiantao


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: VT-d support for device assignment
  2008-08-26  9:10 ` VT-d support for device assignment Avi Kivity
@ 2008-08-26 14:11   ` Amit Shah
  2008-08-26 14:38     ` Avi Kivity
  0 siblings, 1 reply; 22+ messages in thread
From: Amit Shah @ 2008-08-26 14:11 UTC (permalink / raw)
  To: Avi Kivity
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay

* On Tuesday 26 Aug 2008 14:40:57 Avi Kivity wrote:
> Amit Shah wrote:
> > The following two patches contain VT-d support for device assignment for
> > KVM guests.
> >
> > The first patch contains the changes that are required to the generic
> > VT-d code.
> >
> > The second patch contains the changes to KVM.
> >
> > Since the last send, I've updated the 2nd patch to expose KVM_CAP_IOMMU
> > and check for the existence of an IOMMU before using one. I've also
> > preserved the 'From' field for both the patches so that the original
> > authors are credited for the patches.
> >
> > The command line currently should be invoked with the parameter:
> >
> > -pcidevice dev=00:13.0,dma=iommu
>
> Why not default to iommu if one is available?

I've set that as default now.

What should the parameter be to disable just the iommu? dma=noiommu looks 
ugly.

Or do we not want to disable an iommu at all (even for debugging or testing 
something else, like pvdma)?

> btw, -usbdevice prefixes host devices with host:, no?  maybe adopt the
> convention for -pcidevice?

Sounds ok.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: VT-d support for device assignment
  2008-08-26 14:11   ` Amit Shah
@ 2008-08-26 14:38     ` Avi Kivity
  0 siblings, 0 replies; 22+ messages in thread
From: Avi Kivity @ 2008-08-26 14:38 UTC (permalink / raw)
  To: Amit Shah
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay

Amit Shah wrote:
>>> -pcidevice dev=00:13.0,dma=iommu
>>>       
>> Why not default to iommu if one is available?
>>     
>
> I've set that as default now.
>
> What should the parameter be to disable just the iommu? dma=noiommu looks 
> ugly.
>   

dma=none ?

> Or do we not want to disable an iommu at all (even for debugging or testing 
> something else, like pvdma)?
>
>   

Good question.  Maybe it's safer to always enable it, and to let 
developers hack it out.

We must be careful of people's feet.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 10:42         ` Zhang, Xiantao
  2008-08-26 10:57           ` Amit Shah
@ 2008-08-26 14:41           ` Avi Kivity
  2008-08-26 15:09             ` Han, Weidong
  1 sibling, 1 reply; 22+ messages in thread
From: Avi Kivity @ 2008-08-26 14:41 UTC (permalink / raw)
  To: Zhang, Xiantao
  Cc: Amit Shah, kvm, muli, anthony, jbarnes, Woodhouse, David,
	Gross, Mark, benami, Han, Weidong, Kay, Allen M

Zhang, Xiantao wrote:
> Amit Shah wrote:
>   
>> * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
>>     
>>> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
>>> avoid future code move.
>>>       
>> As of now, device assignment resides inside the x86 directory and is
>> only tested in x86 environment. Once we support ia64, we'll have a
>> lot of files moving anyway.
>>     
>
> Just a suggestion.  Even if put it @ virt/kvm, we still can make it only
>   

I'm fine with keeping it in x86 and moving it later, since the code is 
late already.  However if someone is willing to do the work to move it 
to virt/kvm/, I'm happy with that as well.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 22+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26 14:41           ` Avi Kivity
@ 2008-08-26 15:09             ` Han, Weidong
  0 siblings, 0 replies; 22+ messages in thread
From: Han, Weidong @ 2008-08-26 15:09 UTC (permalink / raw)
  To: Avi Kivity, Zhang, Xiantao
  Cc: Amit Shah, kvm, muli, anthony, jbarnes, Woodhouse, David,
	Gross, Mark, benami, Kay, Allen M

Avi Kivity wrote:
> Zhang, Xiantao wrote:
>> Amit Shah wrote:
>> 
>>> * On Tuesday 26 Aug 2008 15:58:42 Zhang, Xiantao wrote:
>>> 
>>>> Maybe vtd.c should be put @ virt/kvm so that ia64 can share it to
>>>> avoid future code move. 
>>>> 
>>> As of now, device assignment resides inside the x86 directory and is
>>> only tested in x86 environment. Once we support ia64, we'll have a
>>> lot of files moving anyway. 
>>> 
>> 
>> Just a suggestion.  Even if put it @ virt/kvm, we still can make it
>> only 
>> 
> 
> I'm fine with keeping it in x86 and moving it later, since the code is
> late already.  However if someone is willing to do the work to move it
> to virt/kvm/, I'm happy with that as well.

I think we'd better keep it in x86 at this moment. Making VT-d code
arch-independent is our goal. Moving that file is not enough, let's do
it cleanly after merging VT-d patches into upstream.

Randy (Weidong)

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-08-26  8:55   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
  2008-08-26 10:28     ` Zhang, Xiantao
@ 2008-09-03 16:52     ` Amit Shah
  2008-09-09  7:18       ` Han, Weidong
  1 sibling, 1 reply; 22+ messages in thread
From: Amit Shah @ 2008-09-03 16:52 UTC (permalink / raw)
  To: Ben-Ami Yassour1
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Avi Kivity

There are a couple of things here that might need some error handling:

* On Tuesday 26 August 2008 14:25:35 Amit Shah wrote:
> From: Ben-Ami Yassour <benami@il.ibm.com>
>
> Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>
>
> This patch enables PCI device assignment based on VT-d support.
> When a device is assigned to the guest, the guest memory is pinned and
> the mapping is updated in the VT-d IOMMU.
>
> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
> and also control enable/disable from userspace]
>
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>


> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +			  gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	int i, rc;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	for (i = 0; i < npages; i++) {
> +		/* check if already mapped */
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		if (pfn && !is_mmio_pfn(pfn))
> +			continue;
> +
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (!is_mmio_pfn(pfn)) {
> +			rc = intel_iommu_page_mapping(domain,
> +						      gfn_to_gpa(gfn),
> +						      pfn_to_hpa(pfn),
> +						      PAGE_SIZE,
> +						      DMA_PTE_READ |
> +						      DMA_PTE_WRITE);
> +			if (rc) {
> +				kvm_release_pfn_clean(pfn);
> +				printk(KERN_DEBUG "kvm_iommu_map_pages:"
> +				       "iommu failed to map pfn=%lx\n", pfn);
> +				return rc;
> +			}
> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +			       "invalid pfn=%lx\n", pfn);
> +			return 0;
> +		}

In the error case, this function should itself call unmap_pages so that either 
all pages are mapped or none are. Also makes it easier to bail out in the two 
places this function gets called.

> +
> +		gfn++;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc) {
> +			up_read(&kvm->slots_lock);
> +			return rc;
> +		}
> +	}
> +	up_read(&kvm->slots_lock);
> +	return 0;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +	int rc;
> +
> +	if (!intel_iommu_found()) {
> +		printk(KERN_ERR "intel iommu not found\n");
> +		return -ENODEV;
> +	}
> +
> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> +	       assigned_dev->host_busnr,
> +	       PCI_SLOT(assigned_dev->host_devfn),
> +	       PCI_FUNC(assigned_dev->host_devfn));
> +
> +	pdev = assigned_dev->dev;
> +
> +	if (pdev == NULL) {
> +		if (kvm->arch.intel_iommu_domain) {
> +			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> +			kvm->arch.intel_iommu_domain = NULL;
> +		}
> +		return -ENODEV;
> +	}
> +
> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);

check if we really got the domain

> +
> +	rc = kvm_iommu_map_memslots(kvm);
> +	if (rc)
> +		goto out_unmap;
> +
> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> +			       pdev->bus->number, pdev->devfn);
> +
> +	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> +					 pdev);

This function name (as Mark points out) doesn't make much sense; can this be 
changed?

> +	if (rc) {
> +		printk(KERN_ERR "Domain context map for %s failed",
> +		       pci_name(pdev));
> +		goto out_unmap;
> +	}
> +	return 0;
> +
> +out_unmap:
> +	kvm_iommu_unmap_memslots(kvm);
> +	return rc;
> +}
> +
> +static void kvm_iommu_put_pages(struct kvm *kvm,
> +			       gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +	int i;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		kvm_release_pfn_clean(pfn);
> +		gfn++;
> +	}
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i;
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +				    kvm->memslots[i].npages);
> +	}
> +	up_read(&kvm->slots_lock);
> +
> +	return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	struct kvm_assigned_dev_kernel *entry;
> +	struct pci_dev *pdev = NULL;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
> +		       entry->host_busnr,
> +		       PCI_SLOT(entry->host_devfn),
> +		       PCI_FUNC(entry->host_devfn));
> +
> +		for_each_pci_dev(pdev) {
> +			if ((pdev->bus->number == entry->host_busnr) &&
> +			    (pdev->devfn == entry->host_devfn))
> +				break;
> +		}

We store the PCI dev in entry->dev; no need to scan this entire list.

> +
> +		if (pdev == NULL)
> +			return -ENODEV;
> +
> +		/* detach kvm dmar domain */
> +		intel_iommu_detach_dev(domain,
> +				       pdev->bus->number, pdev->devfn);
> +	}
> +	kvm_iommu_unmap_memslots(kvm);
> +	intel_iommu_domain_exit(domain);
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bfc7c33..38ab48b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -35,6 +35,7 @@
>  #include <linux/module.h>
>  #include <linux/mman.h>
>  #include <linux/highmem.h>
> +#include <linux/intel-iommu.h>
>
>  #include <asm/uaccess.h>
>  #include <asm/msr.h>
> @@ -276,9 +277,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>
>  	list_add(&match->list, &kvm->arch.assigned_dev_head);
>
> +	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
> +		r = kvm_iommu_map_guest(kvm, match);
> +		if (r)
> +			goto out_list_del;
> +	}
> +
>  out:
>  	mutex_unlock(&kvm->lock);
>  	return r;
> +out_list_del:
> +	list_del(&match->list);
> +	pci_release_regions(dev);
>  out_disable:
>  	pci_disable_device(dev);
>  out_put:
> @@ -1145,6 +1155,9 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_PV_MMU:
>  		r = !tdp_enabled;
>  		break;
> +	case KVM_CAP_IOMMU:
> +		r = intel_iommu_found();
> +		break;
>  	default:
>  		r = 0;
>  		break;
> @@ -4264,6 +4277,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
>
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +	kvm_iommu_unmap_guest(kvm);
>  	kvm_free_assigned_devices(kvm);
>  	kvm_free_pit(kvm);
>  	kfree(kvm->arch.vpic);
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 982b6b2..fcc8088 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -364,6 +364,7 @@ struct kvm_arch{
>  	 */
>  	struct list_head active_mmu_pages;
>  	struct list_head assigned_dev_head;
> +	struct dmar_domain *intel_iommu_domain;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> @@ -513,6 +514,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t
> gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
>  		  gpa_t addr, unsigned long *ret);
>
> +int is_mmio_pfn(pfn_t pfn);
> +
>  extern bool tdp_enabled;
>
>  enum emulation_result {
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ef4bc6f..4269be1 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -384,6 +384,7 @@ struct kvm_trace_rec {
>  #define KVM_CAP_COALESCED_MMIO 15
>  #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in
> guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17
> +#define KVM_CAP_IOMMU 18
>
>  /*
>   * ioctls for VM fds
> @@ -495,4 +496,6 @@ struct kvm_assigned_irq {
>  	__u32 flags;
>  };
>
> +#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> +
>  #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a18aaad..b703890 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>
> +#ifdef CONFIG_DMAR
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +			unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +#else /* CONFIG_DMAR */
> +static inline int kvm_iommu_map_pages(struct kvm *kvm,
> +				      gfn_t base_gfn,
> +				      unsigned long npages)
> +{
> +	return 0;
> +}
> +
> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
> +				      struct kvm_assigned_dev_kernel
> +				      *assigned_dev)
> +{
> +	return -ENODEV;
> +}
> +
> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_DMAR */
> +
>  static inline void kvm_guest_enter(void)
>  {
>  	account_system_vtime(current);
> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>  	return (gpa_t)gfn << PAGE_SHIFT;
>  }
>
> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
> +{
> +	return (hpa_t)pfn << PAGE_SHIFT;
> +}
> +

This can be a separate patch.

>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
>  	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0309571..191bfe1 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -41,6 +41,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/mman.h>
>  #include <linux/swap.h>
> +#include <linux/intel-iommu.h>
>
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>  	return likely(n >= 0 && n < KVM_MAX_VCPUS);
>  }
>
> -static inline int is_mmio_pfn(pfn_t pfn)
> +inline int is_mmio_pfn(pfn_t pfn)
>  {
>  	if (pfn_valid(pfn))
>  		return PageReserved(pfn_to_page(pfn));
> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  	}
>
>  	kvm_free_physmem_slot(&old, &new);
> +
> +	/* map the pages in iommu page table */
> +	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
> +	if (r)
> +		goto out_free;

Doing the unmapping in the map function will mean we don't have to check for 
error return values here.

> +
>  	return 0;
>
>  out_free:



^ permalink raw reply	[flat|nested] 22+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-03 16:52     ` Amit Shah
@ 2008-09-09  7:18       ` Han, Weidong
  0 siblings, 0 replies; 22+ messages in thread
From: Han, Weidong @ 2008-09-09  7:18 UTC (permalink / raw)
  To: Amit Shah, Ben-Ami Yassour1
  Cc: kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M, Avi Kivity

Amit Shah wrote:
> There are a couple of things here that might need some error handling:
> 
> * On Tuesday 26 August 2008 14:25:35 Amit Shah wrote:
>> From: Ben-Ami Yassour <benami@il.ibm.com>
>> 
>> Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>
>> 
>> This patch enables PCI device assignment based on VT-d support.
>> When a device is assigned to the guest, the guest memory is pinned
>> and 
>> the mapping is updated in the VT-d IOMMU.
>> 
>> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
>> and also control enable/disable from userspace]
>> 
>> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
>> Signed-off-by: Weidong Han <weidong.han@intel.com>
>> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
>> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> 
> 
>> +#include <linux/list.h>
>> +#include <linux/kvm_host.h>
>> +#include <linux/pci.h>
>> +#include <linux/dmar.h>
>> +#include <linux/intel-iommu.h>
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +
>> +int kvm_iommu_map_pages(struct kvm *kvm,
>> +			  gfn_t base_gfn, unsigned long npages)
>> +{
>> +	gfn_t gfn = base_gfn;
>> +	pfn_t pfn;
>> +	int i, rc;
>> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
>> +	/* check if iommu exists and in use */
>> +	if (!domain)
>> +		return 0;
>> +
>> +	for (i = 0; i < npages; i++) {
>> +		/* check if already mapped */
>> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
>> +						     gfn_to_gpa(gfn));
>> +		if (pfn && !is_mmio_pfn(pfn))
>> +			continue;
>> +
>> +		pfn = gfn_to_pfn(kvm, gfn);
>> +		if (!is_mmio_pfn(pfn)) {
>> +			rc = intel_iommu_page_mapping(domain,
>> +						      gfn_to_gpa(gfn),
>> +						      pfn_to_hpa(pfn),
>> +						      PAGE_SIZE,
>> +						      DMA_PTE_READ |
>> +						      DMA_PTE_WRITE);
>> +			if (rc) {
>> +				kvm_release_pfn_clean(pfn);
>> +				printk(KERN_DEBUG "kvm_iommu_map_pages:"
>> +				       "iommu failed to map pfn=%lx\n",
pfn);
>> +				return rc;
>> +			}
>> +		} else {
>> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
>> +			       "invalid pfn=%lx\n", pfn);
>> +			return 0;
>> +		}
> 
> In the error case, this function should itself call unmap_pages so
> that either all pages are mapped or none are. Also makes it easier to
> bail out in the two places this function gets called.
> 

Good catch. Will fix it.

>> +
>> +		gfn++;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int kvm_iommu_map_memslots(struct kvm *kvm) +{
>> +	int i, rc;
>> +
>> +	down_read(&kvm->slots_lock);
>> +	for (i = 0; i < kvm->nmemslots; i++) {
>> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
>> +					 kvm->memslots[i].npages);
>> +		if (rc) {
>> +			up_read(&kvm->slots_lock);
>> +			return rc;
>> +		}
>> +	}
>> +	up_read(&kvm->slots_lock);
>> +	return 0;
>> +}
>> +
>> +int kvm_iommu_map_guest(struct kvm *kvm,
>> +			struct kvm_assigned_dev_kernel *assigned_dev)
>> +{
>> +	struct pci_dev *pdev = NULL;
>> +	int rc;
>> +
>> +	if (!intel_iommu_found()) {
>> +		printk(KERN_ERR "intel iommu not found\n");
>> +		return -ENODEV;
>> +	}
>> +
>> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
>> +	       assigned_dev->host_busnr,
>> +	       PCI_SLOT(assigned_dev->host_devfn),
>> +	       PCI_FUNC(assigned_dev->host_devfn));
>> +
>> +	pdev = assigned_dev->dev;
>> +
>> +	if (pdev == NULL) {
>> +		if (kvm->arch.intel_iommu_domain) {
>> +
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
>> +			kvm->arch.intel_iommu_domain = NULL;
>> +		}
>> +		return -ENODEV;
>> +	}
>> +
>> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> 
> check if we really got the domain

yes, need a check here.

> 
>> +
>> +	rc = kvm_iommu_map_memslots(kvm);
>> +	if (rc)
>> +		goto out_unmap;
>> +
>> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
>> +			       pdev->bus->number, pdev->devfn);
>> +
>> +	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
>> +					 pdev);
> 
> This function name (as Mark points out) doesn't make much sense; can
> this be changed?

This function name keeps consistent with original name in vtd driver. do
you want to add a verb in the name? 

> 
>> +	if (rc) {
>> +		printk(KERN_ERR "Domain context map for %s failed", +

>> pci_name(pdev)); +		goto out_unmap;
>> +	}
>> +	return 0;
>> +
>> +out_unmap:
>> +	kvm_iommu_unmap_memslots(kvm);
>> +	return rc;
>> +}
>> +
>> +static void kvm_iommu_put_pages(struct kvm *kvm,
>> +			       gfn_t base_gfn, unsigned long npages)
>> +{
>> +	gfn_t gfn = base_gfn;
>> +	pfn_t pfn;
>> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
int i;
>> +
>> +	for (i = 0; i < npages; i++) {
>> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
>> +						     gfn_to_gpa(gfn));
>> +		kvm_release_pfn_clean(pfn);
>> +		gfn++;
>> +	}
>> +}
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{
>> +	int i;
>> +	down_read(&kvm->slots_lock);
>> +	for (i = 0; i < kvm->nmemslots; i++) {
>> +		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
>> +				    kvm->memslots[i].npages);
>> +	}
>> +	up_read(&kvm->slots_lock);
>> +
>> +	return 0;
>> +}
>> +
>> +int kvm_iommu_unmap_guest(struct kvm *kvm)
>> +{
>> +	struct kvm_assigned_dev_kernel *entry;
>> +	struct pci_dev *pdev = NULL;
>> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
>> +	/* check if iommu exists and in use */
>> +	if (!domain)
>> +		return 0;
>> +
>> +	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
>> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", +

>> entry->host_busnr, +		       PCI_SLOT(entry->host_devfn),
>> +		       PCI_FUNC(entry->host_devfn));
>> +
>> +		for_each_pci_dev(pdev) {
>> +			if ((pdev->bus->number == entry->host_busnr) &&
>> +			    (pdev->devfn == entry->host_devfn))
>> +				break;
>> +		}
> 
> We store the PCI dev in entry->dev; no need to scan this entire list.

yes, it's not neccessary.

> 
>> +
>> +		if (pdev == NULL)
>> +			return -ENODEV;
>> +
>> +		/* detach kvm dmar domain */
>> +		intel_iommu_detach_dev(domain,
>> +				       pdev->bus->number, pdev->devfn);
>> +	}
>> +	kvm_iommu_unmap_memslots(kvm);
>> +	intel_iommu_domain_exit(domain);
>> +	return 0;
>> +}
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index a18aaad..b703890 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>> 
>> +#ifdef CONFIG_DMAR
>> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
>> +			unsigned long npages); +int
kvm_iommu_map_guest(struct kvm *kvm,
>> +			struct kvm_assigned_dev_kernel *assigned_dev);
>> +int kvm_iommu_unmap_guest(struct kvm *kvm);
>> +#else /* CONFIG_DMAR */
>> +static inline int kvm_iommu_map_pages(struct kvm *kvm, +

>> gfn_t base_gfn, +				      unsigned long
npages)
>> +{
>> +	return 0;
>> +}
>> +
>> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
>> +				      struct kvm_assigned_dev_kernel
>> +				      *assigned_dev)
>> +{
>> +	return -ENODEV;
>> +}
>> +
>> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{
>> +	return 0;
>> +}
>> +#endif /* CONFIG_DMAR */
>> +
>>  static inline void kvm_guest_enter(void)
>>  {
>>  	account_system_vtime(current);
>> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>>  	return (gpa_t)gfn << PAGE_SHIFT;
>>  }
>> 
>> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
>> +{
>> +	return (hpa_t)pfn << PAGE_SHIFT;
>> +}
>> +
> 
> This can be a separate patch.
> 
>>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)  {
>>  	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 0309571..191bfe1 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -41,6 +41,7 @@
>>  #include <linux/pagemap.h>
>>  #include <linux/mman.h>
>>  #include <linux/swap.h>
>> +#include <linux/intel-iommu.h>
>> 
>>  #include <asm/processor.h>
>>  #include <asm/io.h>
>> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>>  	return likely(n >= 0 && n < KVM_MAX_VCPUS);
>>  }
>> 
>> -static inline int is_mmio_pfn(pfn_t pfn)
>> +inline int is_mmio_pfn(pfn_t pfn)
>>  {
>>  	if (pfn_valid(pfn))
>>  		return PageReserved(pfn_to_page(pfn));
>> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
>> 
>>  	kvm_free_physmem_slot(&old, &new);
>> +
>> +	/* map the pages in iommu page table */
>> +	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
>> +	if (r)
>> +		goto out_free;
> 
> Doing the unmapping in the map function will mean we don't have to
> check for error return values here.
> 
>> +
>>  	return 0;
>> 
>>  out_free:


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] KVM: Device Assignment with VT-d
@ 2008-09-09 13:51 Han, Weidong
  2008-09-09 14:39 ` Amit Shah
  0 siblings, 1 reply; 22+ messages in thread
From: Han, Weidong @ 2008-09-09 13:51 UTC (permalink / raw)
  To: avi
  Cc: kvm, Amit Shah, muli, anthony, jbarnes, Woodhouse, David,
	Gross, Mark, benami, Kay, Allen M, Yang, Sheng, mgross

[-- Attachment #1: Type: text/plain, Size: 11849 bytes --]

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  201
++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 264 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o
lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..d80f117
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			  gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			rc = intel_iommu_page_mapping(domain,
+						      gfn_to_gpa(gfn),
+						      pfn_to_hpa(pfn),
+						      PAGE_SIZE,
+						      DMA_PTE_READ |
+						      DMA_PTE_WRITE);
+			if (rc) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n",
pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return rc;
+	
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc) {
+			up_read(&kvm->slots_lock);
+			return rc;
+		}
+	}
+	up_read(&kvm->slots_lock);
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int rc;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "intel iommu not found\n");
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	rc = kvm_iommu_map_memslots(kvm);
+	if (rc)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					 pdev);
+	if (rc) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return rc;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm
*kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..addd874 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t
gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in
guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.5.1

[-- Attachment #2: 0002-Enable-pci-device-assignment-based-on-VT-d-support.patch --]
[-- Type: application/octet-stream, Size: 11415 bytes --]

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  201 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 264 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..d80f117
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			  gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, rc;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			rc = intel_iommu_page_mapping(domain,
+						      gfn_to_gpa(gfn),
+						      pfn_to_hpa(pfn),
+						      PAGE_SIZE,
+						      DMA_PTE_READ |
+						      DMA_PTE_WRITE);
+			if (rc) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return rc;
+	
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, rc;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					 kvm->memslots[i].npages);
+		if (rc) {
+			up_read(&kvm->slots_lock);
+			return rc;
+		}
+	}
+	up_read(&kvm->slots_lock);
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int rc;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "intel iommu not found\n");
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	rc = kvm_iommu_map_memslots(kvm);
+	if (rc)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					 pdev);
+	if (rc) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return rc;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..addd874 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.5.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 13:51 [PATCH 2/2] KVM: Device Assignment with VT-d Han, Weidong
@ 2008-09-09 14:39 ` Amit Shah
  2008-09-09 15:05   ` Han, Weidong
  0 siblings, 1 reply; 22+ messages in thread
From: Amit Shah @ 2008-09-09 14:39 UTC (permalink / raw)
  To: Han, Weidong
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M, Yang, Sheng, mgross

* On Tuesday 09 September 2008 19:21:20 Han, Weidong wrote:

> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc) {
> +			up_read(&kvm->slots_lock);
> +			return rc;
> +		}
> +	}
> +	up_read(&kvm->slots_lock);
> +	return 0;
> +}

I simplified this to:

static int kvm_iommu_map_memslots(struct kvm *kvm)
{
	int i, r;

	down_read(&kvm->slots_lock);
	for (i = 0; i < kvm->nmemslots; i++) {
		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
					kvm->memslots[i].npages);
		if (r)
			break;
	}
	up_read(&kvm->slots_lock);
	return r;
}

Also cleaned up some whitespace.

I'll send out the patchset soon.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 14:44 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
@ 2008-09-09 14:44   ` Amit Shah
  0 siblings, 0 replies; 22+ messages in thread
From: Amit Shah @ 2008-09-09 14:44 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Amit Shah

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  198 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 261 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..8660b2a
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, r;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			r = intel_iommu_page_mapping(domain,
+						     gfn_to_gpa(gfn),
+						     pfn_to_hpa(pfn),
+						     PAGE_SIZE,
+						     DMA_PTE_READ |
+						     DMA_PTE_WRITE);
+			if (r) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return rc;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, r;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					kvm->memslots[i].npages);
+		if (r)
+			break;
+	}
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev);
+	if (r) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return rc;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..addd874 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.6.0.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 14:39 ` Amit Shah
@ 2008-09-09 15:05   ` Han, Weidong
  0 siblings, 0 replies; 22+ messages in thread
From: Han, Weidong @ 2008-09-09 15:05 UTC (permalink / raw)
  To: Amit Shah
  Cc: avi, kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M, Yang, Sheng, mgross

Amit, 

Thanks for your quick fixing.

Randy (Weidong)

Amit Shah wrote:
> * On Tuesday 09 September 2008 19:21:20 Han, Weidong wrote:
> 
>> +static int kvm_iommu_map_memslots(struct kvm *kvm) +{
>> +	int i, rc;
>> +
>> +	down_read(&kvm->slots_lock);
>> +	for (i = 0; i < kvm->nmemslots; i++) {
>> +		rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
>> +					 kvm->memslots[i].npages);
>> +		if (rc) {
>> +			up_read(&kvm->slots_lock);
>> +			return rc;
>> +		}
>> +	}
>> +	up_read(&kvm->slots_lock);
>> +	return 0;
>> +}
> 
> I simplified this to:
> 
> static int kvm_iommu_map_memslots(struct kvm *kvm)
> {
> 	int i, r;
> 
> 	down_read(&kvm->slots_lock);
> 	for (i = 0; i < kvm->nmemslots; i++) {
> 		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> 					kvm->memslots[i].npages);
> 		if (r)
> 			break;
> 	}
> 	up_read(&kvm->slots_lock);
> 	return r;
> }
> 
> Also cleaned up some whitespace.
> 
> I'll send out the patchset soon.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 15:37 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
@ 2008-09-09 15:37   ` Amit Shah
  2008-09-11  7:21     ` Han, Weidong
  0 siblings, 1 reply; 22+ messages in thread
From: Amit Shah @ 2008-09-09 15:37 UTC (permalink / raw)
  To: avi
  Cc: kvm, muli, anthony, jbarnes, david.woodhouse, mark.gross, benami,
	weidong.han, allen.m.kay, Amit Shah

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  198 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |    3 +
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   32 +++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 261 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..667bf3f
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, r;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	r = -EINVAL;
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			r = intel_iommu_page_mapping(domain,
+						     gfn_to_gpa(gfn),
+						     pfn_to_hpa(pfn),
+						     PAGE_SIZE,
+						     DMA_PTE_READ |
+						     DMA_PTE_WRITE);
+			if (r) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, r;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					kvm->memslots[i].npages);
+		if (r)
+			break;
+	}
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev);
+	if (r) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..addd874 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -364,6 +364,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..b703890 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.6.0.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* RE: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-09 15:37   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
@ 2008-09-11  7:21     ` Han, Weidong
  2008-09-14  0:49       ` Avi Kivity
  0 siblings, 1 reply; 22+ messages in thread
From: Han, Weidong @ 2008-09-11  7:21 UTC (permalink / raw)
  To: Amit Shah, avi
  Cc: kvm, muli, anthony, jbarnes, Woodhouse, David, Gross, Mark,
	benami, Kay, Allen M

[-- Attachment #1: Type: text/plain, Size: 12990 bytes --]

This patch only can work on x86, it breaks build on other architectures.
It is caused by kvm_irq_ack_notifier and kvm_assigned_dev_kernel are
defined under x86, while they are always used in
include/linux/kvm_host.h whether CONFIG_DMAR is set or not. I move these
two definitions to include/linux/kvm_host.h, and attached the updated
patch.

Randy (Weidong)

Amit Shah wrote:
> From: Ben-Ami Yassour <benami@il.ibm.com>
> 
> Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>
> 
> This patch enables PCI device assignment based on VT-d support.
> When a device is assigned to the guest, the guest memory is pinned and
> the mapping is updated in the VT-d IOMMU.
> 
> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
> and also control enable/disable from userspace]
> 
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> 
> Acked-by: Mark Gross <mgross@linux.intel.com>
> ---
>  arch/x86/kvm/Makefile      |    3 +
>  arch/x86/kvm/vtd.c         |  198
>  ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c     
>  |   14 +++ include/asm-x86/kvm_host.h |    3 +
>  include/linux/kvm.h        |    3 +
>  include/linux/kvm_host.h   |   32 +++++++
>  virt/kvm/kvm_main.c        |    9 ++-
>  7 files changed, 261 insertions(+), 1 deletions(-)
>  create mode 100644 arch/x86/kvm/vtd.c
> 
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index d0e940b..3072b17 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
> 
>  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o
>  	lapic.o \ i8254.o
> +ifeq ($(CONFIG_DMAR),y)
> +kvm-objs += vtd.o
> +endif
>  obj-$(CONFIG_KVM) += kvm.o
>  kvm-intel-objs = vmx.o
>  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..667bf3f
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,198 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it + * under the terms and conditions of the GNU General
> Public License, + * version 2, as published by the Free Software
> Foundation. + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> General Public License for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with + * this program; if not, write to the Free Software
> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA
> 02111-1307 USA. + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Copyright IBM Corporation, 2008
> + * Author: Allen M. Kay <allen.m.kay@intel.com>
> + * Author: Weidong Han <weidong.han@intel.com>
> + * Author: Ben-Ami Yassour <benami@il.ibm.com>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +static void kvm_iommu_put_pages(struct kvm *kvm,
> +				gfn_t base_gfn, unsigned long npages);
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +			gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	int i, r;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	r = -EINVAL;
> +	for (i = 0; i < npages; i++) {
> +		/* check if already mapped */
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		if (pfn && !is_mmio_pfn(pfn))
> +			continue;
> +
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (!is_mmio_pfn(pfn)) {
> +			r = intel_iommu_page_mapping(domain,
> +						     gfn_to_gpa(gfn),
> +						     pfn_to_hpa(pfn),
> +						     PAGE_SIZE,
> +						     DMA_PTE_READ |
> +						     DMA_PTE_WRITE);
> +			if (r) {
> +				printk(KERN_DEBUG "kvm_iommu_map_pages:"
> +				       "iommu failed to map pfn=%lx\n",
pfn);
> +				goto unmap_pages;
> +			}
> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +			       "invalid pfn=%lx\n", pfn);
> +			goto unmap_pages;
> +		}
> +		gfn++;
> +	}
> +	return 0;
> +
> +unmap_pages:
> +	kvm_iommu_put_pages(kvm, base_gfn, i);
> +	return r;
> +}
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +	int i, r;
> +
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +					kvm->memslots[i].npages);
> +		if (r)
> +			break;
> +	}
> +	up_read(&kvm->slots_lock);
> +	return r;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +	int r;
> +
> +	if (!intel_iommu_found()) {
> +		printk(KERN_ERR "%s: intel iommu not found\n",
__func__);
> +		return -ENODEV;
> +	}
> +
> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> +	       assigned_dev->host_busnr,
> +	       PCI_SLOT(assigned_dev->host_devfn),
> +	       PCI_FUNC(assigned_dev->host_devfn));
> +
> +	pdev = assigned_dev->dev;
> +
> +	if (pdev == NULL) {
> +		if (kvm->arch.intel_iommu_domain) {
> +
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> +			kvm->arch.intel_iommu_domain = NULL;
> +		}
> +		return -ENODEV;
> +	}
> +
> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> +	if (!kvm->arch.intel_iommu_domain)
> +		return -ENODEV;
> +
> +	r = kvm_iommu_map_memslots(kvm);
> +	if (r)
> +		goto out_unmap;
> +
> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> +			       pdev->bus->number, pdev->devfn);
> +
> +	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> +					pdev);
> +	if (r) {
> +		printk(KERN_ERR "Domain context map for %s failed",
> +		       pci_name(pdev));
> +		goto out_unmap;
> +	}
> +	return 0;
> +
> +out_unmap:
> +	kvm_iommu_unmap_memslots(kvm);
> +	return r;
> +}
> +
> +static void kvm_iommu_put_pages(struct kvm *kvm,
> +			       gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +	int i;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn_to_gpa(gfn));
> +		kvm_release_pfn_clean(pfn);
> +		gfn++;
> +	}
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i;
> +	down_read(&kvm->slots_lock);
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +				    kvm->memslots[i].npages);
> +	}
> +	up_read(&kvm->slots_lock);
> +
> +	return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	struct kvm_assigned_dev_kernel *entry;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	/* check if iommu exists and in use */
> +	if (!domain)
> +		return 0;
> +
> +	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
> +		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
> +		       entry->host_busnr,
> +		       PCI_SLOT(entry->host_devfn),
> +		       PCI_FUNC(entry->host_devfn));
> +
> +		/* detach kvm dmar domain */
> +		intel_iommu_detach_dev(domain, entry->host_busnr,
> +				       entry->host_devfn);
> +	}
> +	kvm_iommu_unmap_memslots(kvm);
> +	intel_iommu_domain_exit(domain);
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 3f3cb71..342f67a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -35,6 +35,7 @@
>  #include <linux/module.h>
>  #include <linux/mman.h>
>  #include <linux/highmem.h>
> +#include <linux/intel-iommu.h>
> 
>  #include <asm/uaccess.h>
>  #include <asm/msr.h>
> @@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm
> *kvm, 
> 
>  	list_add(&match->list, &kvm->arch.assigned_dev_head);
> 
> +	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
> +		r = kvm_iommu_map_guest(kvm, match);
> +		if (r)
> +			goto out_list_del;
> +	}
> +
>  out:
>  	mutex_unlock(&kvm->lock);
>  	return r;
> +out_list_del:
> +	list_del(&match->list);
> +	pci_release_regions(dev);
>  out_disable:
>  	pci_disable_device(dev);
>  out_put:
> @@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_PV_MMU:
>  		r = !tdp_enabled;
>  		break;
> +	case KVM_CAP_IOMMU:
> +		r = intel_iommu_found();
> +		break;
>  	default:
>  		r = 0;
>  		break;
> @@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
> 
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +	kvm_iommu_unmap_guest(kvm);
>  	kvm_free_assigned_devices(kvm);
>  	kvm_free_pit(kvm);
>  	kfree(kvm->arch.vpic);
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 815efc3..addd874 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -364,6 +364,7 @@ struct kvm_arch{
>  	 */
>  	struct list_head active_mmu_pages;
>  	struct list_head assigned_dev_head;
> +	struct dmar_domain *intel_iommu_domain;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> @@ -514,6 +515,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu,
>  gpa_t gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long
>  		  bytes, gpa_t addr, unsigned long *ret);
> 
> +int is_mmio_pfn(pfn_t pfn);
> +
>  extern bool tdp_enabled;
> 
>  enum emulation_result {
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ef4bc6f..4269be1 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -384,6 +384,7 @@ struct kvm_trace_rec {
>  #define KVM_CAP_COALESCED_MMIO 15
>  #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected
>  in guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17
> +#define KVM_CAP_IOMMU 18
> 
>  /*
>   * ioctls for VM fds
> @@ -495,4 +496,6 @@ struct kvm_assigned_irq {
>  	__u32 flags;
>  };
> 
> +#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> +
>  #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a18aaad..b703890 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
> 
> +#ifdef CONFIG_DMAR
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> +			unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev_kernel *assigned_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +#else /* CONFIG_DMAR */
> +static inline int kvm_iommu_map_pages(struct kvm *kvm,
> +				      gfn_t base_gfn,
> +				      unsigned long npages)
> +{
> +	return 0;
> +}
> +
> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
> +				      struct kvm_assigned_dev_kernel
> +				      *assigned_dev)
> +{
> +	return -ENODEV;
> +}
> +
> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_DMAR */
> +
>  static inline void kvm_guest_enter(void)
>  {
>  	account_system_vtime(current);
> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>  	return (gpa_t)gfn << PAGE_SHIFT;
>  }
> 
> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
> +{
> +	return (hpa_t)pfn << PAGE_SHIFT;
> +}
> +
>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
>  	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index de3b029..6b55960 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -41,6 +41,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/mman.h>
>  #include <linux/swap.h>
> +#include <linux/intel-iommu.h>
> 
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>  	return likely(n >= 0 && n < KVM_MAX_VCPUS);
>  }
> 
> -static inline int is_mmio_pfn(pfn_t pfn)
> +inline int is_mmio_pfn(pfn_t pfn)
>  {
>  	if (pfn_valid(pfn))
>  		return PageReserved(pfn_to_page(pfn));
> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
>  	}
> 
>  	kvm_free_physmem_slot(&old, &new);
> +
> +	/* map the pages in iommu page table */
> +	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
> +	if (r)
> +		goto out;
> +
>  	return 0;
> 
>  out_free:


[-- Attachment #2: kvm-device-assignment-with-vtd.patch --]
[-- Type: application/octet-stream, Size: 12405 bytes --]

From: Ben-Ami Yassour <benami@il.ibm.com>

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
---
 arch/x86/kvm/Makefile      |    3 +
 arch/x86/kvm/vtd.c         |  198 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |   14 +++
 include/asm-x86/kvm_host.h |   23 +-----
 include/linux/kvm.h        |    3 +
 include/linux/kvm_host.h   |   52 ++++++++++++
 virt/kvm/kvm_main.c        |    9 ++-
 7 files changed, 281 insertions(+), 21 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..667bf3f
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, r;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	r = -EINVAL;
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			r = intel_iommu_page_mapping(domain,
+						     gfn_to_gpa(gfn),
+						     pfn_to_hpa(pfn),
+						     PAGE_SIZE,
+						     DMA_PTE_READ |
+						     DMA_PTE_WRITE);
+			if (r) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, r;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					kvm->memslots[i].npages);
+		if (r)
+			break;
+	}
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev);
+	if (r) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..342f67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4266,6 +4279,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..d1175b8 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -331,26 +331,6 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
-struct kvm_irq_ack_notifier {
-	struct hlist_node link;
-	unsigned gsi;
-	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
-};
-
-struct kvm_assigned_dev_kernel {
-	struct kvm_irq_ack_notifier ack_notifier;
-	struct work_struct interrupt_work;
-	struct list_head list;
-	int assigned_dev_id;
-	int host_busnr;
-	int host_devfn;
-	int host_irq;
-	int guest_irq;
-	int irq_requested;
-	struct pci_dev *dev;
-	struct kvm *kvm;
-};
-
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -364,6 +344,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +495,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f..4269be1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad..9f48374 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,53 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+struct kvm_irq_ack_notifier {
+	struct hlist_node link;
+	unsigned gsi;
+	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
+};
+
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct work_struct interrupt_work;
+	struct list_head list;
+	int assigned_dev_id;
+	int host_busnr;
+	int host_devfn;
+	int host_irq;
+	int guest_irq;
+	int irq_requested;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+};
+
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -307,6 +354,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029..6b55960 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
1.5.1


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] KVM: Device Assignment with VT-d
  2008-09-11  7:21     ` Han, Weidong
@ 2008-09-14  0:49       ` Avi Kivity
  0 siblings, 0 replies; 22+ messages in thread
From: Avi Kivity @ 2008-09-14  0:49 UTC (permalink / raw)
  To: Han, Weidong
  Cc: Amit Shah, kvm, muli, anthony, jbarnes, Woodhouse, David,
	Gross, Mark, benami, Kay, Allen M

Han, Weidong wrote:
> This patch only can work on x86, it breaks build on other architectures.
> It is caused by kvm_irq_ack_notifier and kvm_assigned_dev_kernel are
> defined under x86, while they are always used in
> include/linux/kvm_host.h whether CONFIG_DMAR is set or not. I move these
> two definitions to include/linux/kvm_host.h, and attached the updated
> patch.
>
>   

Thanks.  I replaced the old patch with your new version.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2008-09-14  0:49 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-26  8:55 VT-d support for device assignment Amit Shah
2008-08-26  8:55 ` [PATCH 1/2] VT-d: changes to support KVM Amit Shah
2008-08-26  8:55   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
2008-08-26 10:28     ` Zhang, Xiantao
2008-08-26 10:35       ` Amit Shah
2008-08-26 10:42         ` Zhang, Xiantao
2008-08-26 10:57           ` Amit Shah
2008-08-26 11:04             ` Zhang, Xiantao
2008-08-26 14:41           ` Avi Kivity
2008-08-26 15:09             ` Han, Weidong
2008-09-03 16:52     ` Amit Shah
2008-09-09  7:18       ` Han, Weidong
2008-08-26  9:10 ` VT-d support for device assignment Avi Kivity
2008-08-26 14:11   ` Amit Shah
2008-08-26 14:38     ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2008-09-09 13:51 [PATCH 2/2] KVM: Device Assignment with VT-d Han, Weidong
2008-09-09 14:39 ` Amit Shah
2008-09-09 15:05   ` Han, Weidong
2008-09-09 14:44 VT-d support for device assignment Amit Shah
2008-09-09 14:44 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
2008-09-09 14:44   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
2008-09-09 15:37 VT-d support for device assignment Amit Shah
2008-09-09 15:37 ` [PATCH 1/2] VT-d: Changes to support KVM Amit Shah
2008-09-09 15:37   ` [PATCH 2/2] KVM: Device Assignment with VT-d Amit Shah
2008-09-11  7:21     ` Han, Weidong
2008-09-14  0:49       ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox