* [PATCH 01/12] VT-d: Support multiple device assignment for KVM
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 02/12] KVM: change to use new APIs for kvm vtd Joerg Roedel
` (12 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Weidong Han, Joerg Roedel
From: Weidong Han <weidong.han@intel.com>
In order to support multiple device assignment for KVM, this patch does
following main changes:
- extend dmar_domain to own multiple devices from different iommus,
use a bitmap of iommus to replace iommu pointer in dmar_domain.
- implement independent low level functions for kvm, then won't
impact native VT-d.
- "SAGAW" capability may be different across iommus, that's to
say the VT-d page table levels may be different among iommus.
This patch uses a defaut agaw, and skip top levels of page
tables for iommus which have smaller agaw than default.
- rename the APIs for kvm VT-d, make it more readable.
[Joerg: coding style cleanups]
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/pci/dmar.c | 15 +-
drivers/pci/intel-iommu.c | 696 ++++++++++++++++++++++++++++++++++------
include/linux/dma_remapping.h | 21 +-
include/linux/dmar.h | 2 +
include/linux/intel-iommu.h | 21 +-
5 files changed, 636 insertions(+), 119 deletions(-)
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 691b3ad..d54d3db 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -484,13 +484,14 @@ void __init detect_intel_iommu(void)
dmar_tbl = NULL;
}
-
int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
int map_size;
u32 ver;
static int iommu_allocated = 0;
+ unsigned long sagaw;
+ int agaw;
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -506,6 +507,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+ /* set agaw, "SAGAW" may be different across iommus */
+ sagaw = cap_sagaw(iommu->cap);
+ for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ agaw >= 0; agaw--)
+ if (test_bit(agaw, &sagaw))
+ break;
+ if (agaw < 0) {
+ printk(KERN_ERR "IOMMU: unsupported sagaw %lx\n", sagaw);
+ goto error;
+ }
+ iommu->agaw = agaw;
+
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa4..7f12852 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -50,8 +50,6 @@
#define IOAPIC_RANGE_END (0xfeefffff)
#define IOVA_START_ADDR (0x1000)
-#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
-
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
@@ -64,6 +62,7 @@ struct deferred_flush_tables {
int next;
struct iova *iova[HIGH_WATER_MARK];
struct dmar_domain *domain[HIGH_WATER_MARK];
+ struct intel_iommu *iommu;
};
static struct deferred_flush_tables *deferred_flush;
@@ -184,6 +183,68 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
}
+/* in native case, each domain is related to only one iommu */
+static struct intel_iommu *domain_get_only_iommu(struct dmar_domain *domain)
+{
+ struct dmar_drhd_unit *drhd;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ if (test_bit(drhd->iommu->seq_id, &domain->iommu_bmp))
+ return drhd->iommu;
+ }
+
+ return NULL;
+}
+
+static void domain_flush_cache(struct dmar_domain *domain,
+ void *addr, int size)
+{
+ struct intel_iommu *iommu;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ struct dmar_drhd_unit *drhd;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+
+ if (!test_bit(iommu->seq_id, &domain->iommu_bmp))
+ continue;
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+ }
+ } else {
+ iommu = domain_get_only_iommu(domain);
+ if (iommu && !ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+ }
+}
+
+static struct intel_iommu *device_find_matched_iommu(u8 bus, u8 devfn)
+{
+ struct dmar_drhd_unit *drhd = NULL;
+ int i;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+
+ for (i = 0; i < drhd->devices_cnt; i++)
+ if (drhd->devices[i]->bus->number == bus &&
+ drhd->devices[i]->devfn == devfn)
+ return drhd->iommu;
+
+ if (drhd->include_all)
+ return drhd->iommu;
+ }
+
+ return NULL;
+}
+
/* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn)
@@ -287,7 +348,7 @@ static inline int agaw_to_width(int agaw)
}
-static inline int width_to_agaw(int width)
+int width_to_agaw(int width)
{
return (width - 30) / LEVEL_STRIDE;
}
@@ -347,8 +408,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
flags);
return NULL;
}
- __iommu_flush_cache(domain->iommu, tmp_page,
- PAGE_SIZE);
+ domain_flush_cache(domain, tmp_page, PAGE_SIZE);
dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
/*
* high level table always sets r/w, last level page
@@ -356,7 +416,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
*/
dma_set_pte_readable(*pte);
dma_set_pte_writable(*pte);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
parent = phys_to_virt(dma_pte_addr(*pte));
level--;
@@ -399,7 +459,7 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
if (pte) {
dma_clear_pte(*pte);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
}
@@ -447,8 +507,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
free_pgtable_page(
phys_to_virt(dma_pte_addr(*pte)));
dma_clear_pte(*pte);
- __iommu_flush_cache(domain->iommu,
- pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
tmp += level_size(level);
}
@@ -948,8 +1007,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
return 0;
}
-
static void domain_exit(struct dmar_domain *domain);
+static void vm_domain_exit(struct dmar_domain *domain);
void free_dmar_iommu(struct intel_iommu *iommu)
{
@@ -960,7 +1019,13 @@ void free_dmar_iommu(struct intel_iommu *iommu)
for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i];
clear_bit(i, iommu->domain_ids);
- domain_exit(domain);
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ if (--domain->iommu_count == 0)
+ vm_domain_exit(domain);
+ } else
+ domain_exit(domain);
+
i = find_next_bit(iommu->domain_ids,
cap_ndoms(iommu->cap), i+1);
}
@@ -1006,8 +1071,11 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
set_bit(num, iommu->domain_ids);
domain->id = num;
- domain->iommu = iommu;
+ memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+ set_bit(iommu->seq_id, &domain->iommu_bmp);
iommu->domains[num] = domain;
+ domain->iommu_count = 1;
+ domain->flags = 0;
spin_unlock_irqrestore(&iommu->lock, flags);
return domain;
@@ -1016,10 +1084,12 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
static void iommu_free_domain(struct dmar_domain *domain)
{
unsigned long flags;
+ struct intel_iommu *iommu;
- spin_lock_irqsave(&domain->iommu->lock, flags);
- clear_bit(domain->id, domain->iommu->domain_ids);
- spin_unlock_irqrestore(&domain->iommu->lock, flags);
+ iommu = domain_get_only_iommu(domain);
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(domain->id, iommu->domain_ids);
+ spin_unlock_irqrestore(&iommu->lock, flags);
}
static struct iova_domain reserved_iova_list;
@@ -1098,7 +1168,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain_reserve_special_ranges(domain);
/* calculate AGAW */
- iommu = domain->iommu;
+ iommu = domain_get_only_iommu(domain);
if (guest_width > cap_mgaw(iommu->cap))
guest_width = cap_mgaw(iommu->cap);
domain->gaw = guest_width;
@@ -1107,19 +1177,21 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
sagaw = cap_sagaw(iommu->cap);
if (!test_bit(agaw, &sagaw)) {
/* hardware doesn't support it, choose a bigger one */
- pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
+ pr_debug("IOMMU: hardware doesn't support agaw %d\n",
+ agaw);
agaw = find_next_bit(&sagaw, 5, agaw);
if (agaw >= 5)
return -ENODEV;
}
domain->agaw = agaw;
+
INIT_LIST_HEAD(&domain->devices);
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page();
if (!domain->pgd)
return -ENOMEM;
- __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
+ domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
@@ -1148,10 +1220,9 @@ static void domain_exit(struct dmar_domain *domain)
}
static int domain_context_mapping_one(struct dmar_domain *domain,
- u8 bus, u8 devfn)
+ struct intel_iommu *iommu, u8 bus, u8 devfn)
{
struct context_entry *context;
- struct intel_iommu *iommu = domain->iommu;
unsigned long flags;
pr_debug("Set context mapping for %02x:%02x.%d\n",
@@ -1191,9 +1262,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
{
int ret;
struct pci_dev *tmp, *parent;
+ struct intel_iommu *iommu;
- ret = domain_context_mapping_one(domain, pdev->bus->number,
- pdev->devfn);
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ ret = domain_context_mapping_one(domain, iommu,
+ pdev->bus->number, pdev->devfn);
if (ret)
return ret;
@@ -1204,27 +1280,31 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = domain_context_mapping_one(domain, parent->bus->number,
- parent->devfn);
+ ret = domain_context_mapping_one(domain, iommu,
+ parent->bus->number, parent->devfn);
if (ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
- return domain_context_mapping_one(domain,
+ return domain_context_mapping_one(domain, iommu,
tmp->subordinate->number, 0);
else /* this is a legacy PCI bridge */
- return domain_context_mapping_one(domain,
+ return domain_context_mapping_one(domain, iommu,
tmp->bus->number, tmp->devfn);
}
-static int domain_context_mapped(struct dmar_domain *domain,
- struct pci_dev *pdev)
+static int domain_context_mapped(struct pci_dev *pdev)
{
int ret;
struct pci_dev *tmp, *parent;
+ struct intel_iommu *iommu;
+
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return 0;
- ret = device_context_mapped(domain->iommu,
+ ret = device_context_mapped(iommu,
pdev->bus->number, pdev->devfn);
if (!ret)
return ret;
@@ -1235,17 +1315,17 @@ static int domain_context_mapped(struct dmar_domain *domain,
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = device_context_mapped(domain->iommu, parent->bus->number,
+ ret = device_context_mapped(iommu, parent->bus->number,
parent->devfn);
if (!ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie)
- return device_context_mapped(domain->iommu,
+ return device_context_mapped(iommu,
tmp->subordinate->number, 0);
else
- return device_context_mapped(domain->iommu,
+ return device_context_mapped(iommu,
tmp->bus->number, tmp->devfn);
}
@@ -1276,20 +1356,27 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
BUG_ON(dma_pte_addr(*pte));
dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(*pte, prot);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
}
return 0;
}
-static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+static void detach_domain_for_dev(struct dmar_domain *domain,
+ u8 bus, u8 devfn)
{
- clear_context_table(domain->iommu, bus, devfn);
- domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
- DMA_CCMD_GLOBAL_INVL, 0);
- domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
- DMA_TLB_GLOBAL_FLUSH, 0);
+ struct intel_iommu *iommu;
+
+ iommu = device_find_matched_iommu(bus, devfn);
+ if (!iommu)
+ return;
+
+ clear_context_table(iommu, bus, devfn);
+ iommu->flush.flush_context(iommu, 0, 0, 0,
+ DMA_CCMD_GLOBAL_INVL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+ DMA_TLB_GLOBAL_FLUSH, 0);
}
static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -1336,7 +1423,6 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
{
struct dmar_domain *domain, *found = NULL;
struct intel_iommu *iommu;
- struct dmar_drhd_unit *drhd;
struct device_domain_info *info, *tmp;
struct pci_dev *dev_tmp;
unsigned long flags;
@@ -1371,13 +1457,9 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
}
/* Allocate new domain for the device */
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd) {
- printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
- pci_name(pdev));
- return NULL;
- }
- iommu = drhd->iommu;
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return NULL;
domain = iommu_alloc_domain(iommu);
if (!domain)
@@ -1400,7 +1482,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
info->dev = NULL;
info->domain = domain;
/* This domain is shared by devices under p2p bridge */
- domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
+ domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
/* pcie-to-pci bridge already has a domain, uses it */
found = NULL;
@@ -1805,7 +1887,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
}
/* make sure context mapping is ok */
- if (unlikely(!domain_context_mapped(domain, pdev))) {
+ if (unlikely(!domain_context_mapped(pdev))) {
ret = domain_context_mapping(domain, pdev);
if (ret) {
printk(KERN_ERR
@@ -1823,6 +1905,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
{
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
+ struct intel_iommu *iommu;
phys_addr_t start_paddr;
struct iova *iova;
int prot = 0;
@@ -1836,6 +1919,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
if (!domain)
return 0;
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return 0;
+
size = aligned_size((u64)paddr, size);
iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
@@ -1849,7 +1936,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
- !cap_zlr(domain->iommu->cap))
+ !cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
@@ -1865,10 +1952,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
goto error;
/* it's a non-present to present mapping */
- ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ ret = iommu_flush_iotlb_psi(iommu, domain->id,
start_paddr, size >> VTD_PAGE_SHIFT, 1);
if (ret)
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
return start_paddr + ((u64)paddr & (~PAGE_MASK));
@@ -1896,8 +1983,7 @@ static void flush_unmaps(void)
/* just flush them all */
for (i = 0; i < g_num_of_iommus; i++) {
if (deferred_flush[i].next) {
- struct intel_iommu *iommu =
- deferred_flush[i].domain[0]->iommu;
+ struct intel_iommu *iommu = deferred_flush[i].iommu;
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH, 0);
@@ -1921,7 +2007,8 @@ static void flush_unmaps_timeout(unsigned long data)
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}
-static void add_unmap(struct dmar_domain *dom, struct iova *iova)
+static void add_unmap(struct dmar_domain *dom,
+ struct intel_iommu *iommu, struct iova *iova)
{
unsigned long flags;
int next, iommu_id;
@@ -1930,11 +2017,12 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
if (list_size == HIGH_WATER_MARK)
flush_unmaps();
- iommu_id = dom->iommu->seq_id;
+ iommu_id = iommu->seq_id;
next = deferred_flush[iommu_id].next;
deferred_flush[iommu_id].domain[next] = dom;
deferred_flush[iommu_id].iova[next] = iova;
+ deferred_flush[iommu_id].iommu = iommu;
deferred_flush[iommu_id].next++;
if (!timer_on) {
@@ -1952,12 +2040,17 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
struct dmar_domain *domain;
unsigned long start_addr;
struct iova *iova;
+ struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return;
+
iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
if (!iova)
return;
@@ -1973,13 +2066,13 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
if (intel_iommu_strict) {
- if (iommu_flush_iotlb_psi(domain->iommu,
+ if (iommu_flush_iotlb_psi(iommu,
domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
} else {
- add_unmap(domain, iova);
+ add_unmap(domain, iommu, iova);
/*
* queue up the release of the unmap to save the 1/6th of the
* cpu used up by the iotlb flush operation...
@@ -2036,12 +2129,17 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
size_t size = 0;
void *addr;
struct scatterlist *sg;
+ struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return;
+
iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
if (!iova)
return;
@@ -2057,9 +2155,9 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
+ if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
size >> VTD_PAGE_SHIFT, 0))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
@@ -2093,6 +2191,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
int ret;
struct scatterlist *sg;
unsigned long start_addr;
+ struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -2102,6 +2201,10 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
if (!domain)
return 0;
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return 0;
+
for_each_sg(sglist, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
addr = (void *)virt_to_phys(addr);
@@ -2119,7 +2222,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
- !cap_zlr(domain->iommu->cap))
+ !cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
@@ -2151,9 +2254,9 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
}
/* it's a non-present to present mapping */
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ if (iommu_flush_iotlb_psi(iommu, domain->id,
start_addr, offset >> VTD_PAGE_SHIFT, 1))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
return nelems;
}
@@ -2328,7 +2431,314 @@ int __init intel_iommu_init(void)
return 0;
}
-void intel_iommu_domain_exit(struct dmar_domain *domain)
+/* domain id for virtual machine, it won't be set in context */
+static unsigned long vm_domid;
+
+static int vm_domain_min_agaw(struct dmar_domain *domain)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ int min_agaw = domain->agaw;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+
+ if (test_bit(iommu->seq_id, &domain->iommu_bmp))
+ if (min_agaw > iommu->agaw)
+ min_agaw = iommu->agaw;
+ }
+
+ return min_agaw;
+}
+
+static int vm_domain_add_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ info = alloc_devinfo_mem();
+ if (!info)
+ return -ENOMEM;
+
+ info->bus = pdev->bus->number;
+ info->devfn = pdev->devfn;
+ info->dev = pdev;
+ info->domain = domain;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_add(&info->link, &domain->devices);
+ list_add(&info->global, &device_domain_list);
+ pdev->dev.archdata.iommu = info;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return 0;
+}
+
+static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ int found = 0;
+
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ while (!list_empty(&domain->devices)) {
+ info = list_entry(domain->devices.next,
+ struct device_domain_info, link);
+ if (info->bus == pdev->bus->number &&
+ info->devfn == pdev->devfn) {
+ list_del(&info->link);
+ list_del(&info->global);
+ if (info->dev)
+ info->dev->dev.archdata.iommu = NULL;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ detach_domain_for_dev(info->domain,
+ info->bus, info->devfn);
+ free_devinfo_mem(info);
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+
+ if (found)
+ break;
+ else
+ continue;
+ }
+
+ /* if there is no other devices under the same iommu
+ * owned by this domain, clear this iommu in iommu_bmp
+ */
+ if (device_find_matched_iommu(info->bus, info->devfn) == iommu)
+ found = 1;
+ }
+
+ if (found == 0) {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(iommu->seq_id, &domain->iommu_bmp);
+ domain->iommu_count--;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
+{
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ while (!list_empty(&domain->devices)) {
+ info = list_entry(domain->devices.next,
+ struct device_domain_info, link);
+ list_del(&info->link);
+ list_del(&info->global);
+ if (info->dev)
+ info->dev->dev.archdata.iommu = NULL;
+
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ detach_domain_for_dev(info->domain,
+ info->bus, info->devfn);
+
+ /* clear this iommu in iommu_bmp */
+ iommu = device_find_matched_iommu(info->bus, info->devfn);
+ spin_lock_irqsave(&iommu->lock, flags);
+ if (test_and_clear_bit(iommu->seq_id,
+ &domain->iommu_bmp))
+ domain->iommu_count--;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ free_devinfo_mem(info);
+ spin_lock_irqsave(&device_domain_lock, flags);
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+static int vm_domain_context_mapping_one(struct dmar_domain *domain,
+ struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+ struct context_entry *context;
+ unsigned long flags;
+ struct dma_pte *pgd;
+ unsigned long num;
+ unsigned long ndomains;
+ int id;
+ int agaw;
+ int found = 0;
+
+ pr_debug("Set context mapping for %02x:%02x.%d\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ BUG_ON(!domain->pgd);
+ context = device_to_context_entry(iommu, bus, devfn);
+ if (!context)
+ return -ENOMEM;
+ spin_lock_irqsave(&iommu->lock, flags);
+ if (context_present(*context)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return 0;
+ }
+
+ id = domain->id;
+
+ /* find an available domain id for this device in iommu */
+ ndomains = cap_ndoms(iommu->cap);
+ num = find_first_bit(iommu->domain_ids, ndomains);
+ for (; num < ndomains; ) {
+ if (iommu->domains[num] == domain) {
+ id = num;
+ found = 1;
+ break;
+ }
+ num = find_next_bit(iommu->domain_ids,
+ cap_ndoms(iommu->cap), num+1);
+ }
+
+ if (found == 0) {
+ num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ if (num >= ndomains) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ printk(KERN_ERR "IOMMU: no free domain ids\n");
+ return -EFAULT;
+ }
+
+ set_bit(num, iommu->domain_ids);
+ iommu->domains[num] = domain;
+ id = num;
+ }
+
+ pgd = domain->pgd;
+
+ /* Skip top levels of page tables for
+ * iommu which has less agaw than default.
+ */
+ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(*pgd));
+ if (!dma_pte_present(*pgd)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return -ENOMEM;
+ }
+ }
+
+ context_set_domain_id(*context, id);
+ context_set_address_width(*context, iommu->agaw);
+ context_set_address_root(*context, virt_to_phys(pgd));
+ context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+ context_set_fault_enable(*context);
+ context_set_present(*context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
+
+ /* it's a non-present to present mapping */
+ if (iommu->flush.flush_context(iommu, id,
+ (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL, 1))
+ iommu_flush_write_buffer(iommu);
+ else
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
+
+ if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp))
+ domain->iommu_count++;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return 0;
+}
+
+static int
+vm_domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
+{
+ int ret;
+ struct pci_dev *tmp, *parent;
+ struct intel_iommu *iommu;
+
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ ret = vm_domain_context_mapping_one(domain, iommu,
+ pdev->bus->number, pdev->devfn);
+ if (ret)
+ return ret;
+
+ /* dependent device mapping */
+ tmp = pci_find_upstream_pcie_bridge(pdev);
+ if (!tmp)
+ return 0;
+ /* Secondary interface's bus number and devfn 0 */
+ parent = pdev->bus->self;
+ while (parent != tmp) {
+ ret = vm_domain_context_mapping_one(domain, iommu,
+ parent->bus->number, parent->devfn);
+ if (ret)
+ return ret;
+ parent = parent->bus->self;
+ }
+ if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
+ return vm_domain_context_mapping_one(domain, iommu,
+ tmp->subordinate->number, 0);
+ else /* this is a legacy PCI bridge */
+ return vm_domain_context_mapping_one(domain, iommu,
+ tmp->bus->number, tmp->devfn);
+}
+
+
+static int vm_domain_init(struct dmar_domain *domain, int guest_width)
+{
+ int adjust_width;
+
+ init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
+ spin_lock_init(&domain->mapping_lock);
+
+ domain_reserve_special_ranges(domain);
+
+ /* calculate AGAW */
+ domain->gaw = guest_width;
+ adjust_width = guestwidth_to_adjustwidth(guest_width);
+ domain->agaw = width_to_agaw(adjust_width);
+
+ INIT_LIST_HEAD(&domain->devices);
+
+ /* always allocate the top pgd */
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page();
+ if (!domain->pgd)
+ return -ENOMEM;
+ domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
+ return 0;
+}
+
+static void iommu_free_vm_domain(struct dmar_domain *domain)
+{
+ unsigned long flags;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long i;
+ unsigned long ndomains;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+
+ ndomains = cap_ndoms(iommu->cap);
+ i = find_first_bit(iommu->domain_ids, ndomains);
+ for (; i < ndomains; ) {
+ if (iommu->domains[i] == domain) {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(i, iommu->domain_ids);
+ iommu->domains[i] = NULL;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ break;
+ }
+ i = find_next_bit(iommu->domain_ids, ndomains, i+1);
+ }
+ }
+}
+
+static void vm_domain_exit(struct dmar_domain *domain)
{
u64 end;
@@ -2336,8 +2746,11 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
if (!domain)
return;
+ vm_domain_remove_all_dev_info(domain);
+ /* destroy iovas */
+ put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
- end = end & (~VTD_PAGE_MASK);
+ end &= VTD_PAGE_MASK;
/* clear ptes */
dma_pte_clear_range(domain, 0, end);
@@ -2345,76 +2758,149 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
/* free page tables */
dma_pte_free_pagetable(domain, 0, end);
- iommu_free_domain(domain);
+ iommu_free_vm_domain(domain);
free_domain_mem(domain);
}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+static struct dmar_domain *iommu_alloc_vm_domain(void)
{
- struct dmar_drhd_unit *drhd;
struct dmar_domain *domain;
- struct intel_iommu *iommu;
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd) {
- printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
+ domain = alloc_domain_mem();
+ if (!domain)
return NULL;
- }
- iommu = drhd->iommu;
- if (!iommu) {
- printk(KERN_ERR
- "intel_iommu_domain_alloc: iommu == NULL\n");
- return NULL;
- }
- domain = iommu_alloc_domain(iommu);
+ domain->id = vm_domid++;
+ domain->iommu_count = 0;
+ domain->max_addr = 0;
+ memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+ domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
+
+ return domain;
+}
+
+struct dmar_domain *intel_iommu_alloc_domain(void)
+{
+ struct dmar_domain *domain;
+
+ domain = iommu_alloc_vm_domain();
if (!domain) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain == NULL\n");
return NULL;
}
- if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+ if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain_init() failed\n");
- intel_iommu_domain_exit(domain);
+ vm_domain_exit(domain);
return NULL;
}
+
return domain;
}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
-int intel_iommu_context_mapping(
- struct dmar_domain *domain, struct pci_dev *pdev)
+void intel_iommu_free_domain(struct dmar_domain *domain)
{
- int rc;
- rc = domain_context_mapping(domain, pdev);
- return rc;
+ vm_domain_exit(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
+
+int intel_iommu_assign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct intel_iommu *iommu;
+ int addr_width;
+ u64 end;
+ int ret;
+
+ /* normally pdev is not mapped */
+ if (unlikely(domain_context_mapped(pdev))) {
+ struct dmar_domain *old_domain;
+
+ old_domain = find_domain(pdev);
+ if (old_domain) {
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ vm_domain_remove_one_dev_info(old_domain, pdev);
+ else
+ domain_remove_dev_info(old_domain);
+ }
+ }
+
+ iommu = device_find_matched_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ /* check if this iommu agaw is sufficient for max mapped address */
+ addr_width = agaw_to_width(iommu->agaw);
+ end = DOMAIN_MAX_ADDR(addr_width);
+ end = end & VTD_PAGE_MASK;
+ if (end < domain->max_addr) {
+ printk(KERN_ERR "%s: iommu agaw (%d) is not "
+ "sufficient for the mapped address (%llx)\n",
+ __func__, iommu->agaw, domain->max_addr);
+ return -EFAULT;
+ }
+
+ ret = vm_domain_context_mapping(domain, pdev);
+ if (ret)
+ return ret;
+
+ ret = vm_domain_add_dev_info(domain, pdev);
+ return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_assign_device);
-int intel_iommu_page_mapping(
- struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot)
+
+void intel_iommu_deassign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev)
{
- int rc;
- rc = domain_page_mapping(domain, iova, hpa, size, prot);
- return rc;
+ vm_domain_remove_one_dev_info(domain, pdev);
}
-EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_deassign_device);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
+ u64 hpa, size_t size, int prot)
{
- detach_domain_for_dev(domain, bus, devfn);
+ u64 max_addr;
+ int addr_width;
+ int ret;
+
+ max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
+ if (domain->max_addr < max_addr) {
+ int min_agaw;
+ u64 end;
+
+ /* check if minimum agaw is sufficient for mapped address */
+ min_agaw = vm_domain_min_agaw(domain);
+ addr_width = agaw_to_width(min_agaw);
+ end = DOMAIN_MAX_ADDR(addr_width);
+ end = end & VTD_PAGE_MASK;
+ if (end < max_addr) {
+ printk(KERN_ERR "%s: iommu agaw (%d) is not "
+ "sufficient for the mapped address (%llx)\n",
+ __func__, min_agaw, max_addr);
+ return -EFAULT;
+ }
+ domain->max_addr = max_addr;
+ }
+
+ ret = domain_page_mapping(domain, iova, hpa, size, prot);
+ return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+EXPORT_SYMBOL_GPL(intel_iommu_map_pages);
-struct dmar_domain *
-intel_iommu_find_domain(struct pci_dev *pdev)
+void intel_iommu_unmap_pages(struct dmar_domain *domain,
+ dma_addr_t iova, size_t size)
{
- return find_domain(pdev);
+ dma_addr_t base;
+
+ /* The address might not be aligned */
+ base = iova & PAGE_MASK;
+ size = PAGE_ALIGN(size);
+ dma_pte_clear_range(domain, base, base + size);
}
-EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+EXPORT_SYMBOL_GPL(intel_iommu_unmap_pages);
int intel_iommu_found(void)
{
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 952df39..b4200bd 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -111,11 +111,21 @@ struct dma_pte {
(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
#define dma_pte_present(p) (((p).val & 3) != 0)
+/* domain flags, one domain owns one device by default */
+
+/* devices under the same p2p bridge are owned in one domain */
+#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
+
+/* domain represents a virtual machine, more than one devices
+ * across iommus may be owned in one domain, e.g. kvm guest.
+ */
+#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
+
struct intel_iommu;
struct dmar_domain {
int id; /* domain id */
- struct intel_iommu *iommu; /* back pointer to owning iommu */
+ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
struct list_head devices; /* all devices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
@@ -123,12 +133,13 @@ struct dmar_domain {
struct dma_pte *pgd; /* virtual address */
spinlock_t mapping_lock; /* page table lock */
int gaw; /* max guest address width */
+ int agaw; /* adjusted guest address width */
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
+ int flags; /* domain flag */
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
- int flags;
+ /* following fields are used in virtual machine case */
+ int iommu_count; /* reference count of iommu */
+ u64 max_addr; /* maximum mapped address */
};
/* PCI domain-device relationship */
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index f1984fc..157dcb7 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -53,6 +53,8 @@ extern void detect_intel_iommu(void);
extern int parse_ioapics_under_ir(void);
extern int alloc_iommu(struct dmar_drhd_unit *);
+extern int width_to_agaw(int width);
+
#else
static inline void detect_intel_iommu(void)
{
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3d017cf..c2f37b8 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -219,6 +219,8 @@ do { \
} \
} while (0)
+#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
+
#define QI_LENGTH 256 /* queue length */
enum {
@@ -299,6 +301,7 @@ struct intel_iommu {
struct dmar_domain **domains; /* ptr to domains */
spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */
+ int agaw;
unsigned int irq;
unsigned char name[7]; /* Device Name */
@@ -334,14 +337,16 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
-void intel_iommu_domain_exit(struct dmar_domain *domain);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
-int intel_iommu_context_mapping(struct dmar_domain *domain,
- struct pci_dev *pdev);
-int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
-struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+struct dmar_domain *intel_iommu_alloc_domain(void);
+void intel_iommu_free_domain(struct dmar_domain *domain);
+int intel_iommu_assign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev);
+void intel_iommu_deassign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev);
+int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
+ u64 hpa, size_t size, int prot);
+void intel_iommu_unmap_pages(struct dmar_domain *domain,
+ dma_addr_t iova, size_t size);
u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
#ifdef CONFIG_DMAR
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 02/12] KVM: change to use new APIs for kvm vtd
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
2008-12-02 13:01 ` [PATCH 01/12] VT-d: Support multiple device assignment for KVM Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 03/12] KVM: rename vtd.c to iommu.c Joerg Roedel
` (11 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Weidong Han, Joerg Roedel
From: Weidong Han <weidong.han@intel.com>
This patch changes to use new APIs for KVM VT-d, and add device
deassignment for hotplug.
[Joerg: coding style cleanups]
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/kvm.h | 5 ++
include/linux/kvm_host.h | 23 ++++++++--
virt/kvm/kvm_main.c | 49 ++++++++++++++++++++-
virt/kvm/vtd.c | 107 +++++++++++++++++++++++++++++++---------------
4 files changed, 143 insertions(+), 41 deletions(-)
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0997e6f..49432e9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -395,6 +395,9 @@ struct kvm_trace_rec {
#if defined(CONFIG_X86)
#define KVM_CAP_DEVICE_MSI 20
#endif
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+#define KVM_CAP_DEVICE_DEASSIGNMENT 21
+#endif
/*
* ioctls for VM fds
@@ -428,6 +431,8 @@ struct kvm_trace_rec {
struct kvm_assigned_pci_dev)
#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
struct kvm_assigned_irq)
+#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \
+ struct kvm_assigned_pci_dev)
/*
* ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8091a4d..cb1d404 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -329,9 +329,12 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
#ifdef CONFIG_DMAR
int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
unsigned long npages);
-int kvm_iommu_map_guest(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_map_guest(struct kvm *kvm);
int kvm_iommu_unmap_guest(struct kvm *kvm);
+int kvm_assign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_deassign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev);
#else /* CONFIG_DMAR */
static inline int kvm_iommu_map_pages(struct kvm *kvm,
gfn_t base_gfn,
@@ -340,9 +343,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm,
return 0;
}
-static inline int kvm_iommu_map_guest(struct kvm *kvm,
- struct kvm_assigned_dev_kernel
- *assigned_dev)
+static inline int kvm_iommu_map_guest(struct kvm *kvm)
{
return -ENODEV;
}
@@ -351,6 +352,18 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
{
return 0;
}
+
+static inline int kvm_assign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev)
+{
+ return 0;
+}
+
+static inline int kvm_deassign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev)
+{
+ return 0;
+}
#endif /* CONFIG_DMAR */
static inline void kvm_guest_enter(void)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8dab7ce..fe6aba0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -472,7 +472,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(&match->list, &kvm->arch.assigned_dev_head);
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
- r = kvm_iommu_map_guest(kvm, match);
+ if (!kvm->arch.intel_iommu_domain) {
+ r = kvm_iommu_map_guest(kvm);
+ if (r)
+ goto out_list_del;
+ }
+ r = kvm_assign_device(kvm, match);
if (r)
goto out_list_del;
}
@@ -494,6 +499,35 @@ out_free:
}
#endif
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+ struct kvm_assigned_pci_dev *assigned_dev)
+{
+ int r = 0;
+ struct kvm_assigned_dev_kernel *match;
+
+ mutex_lock(&kvm->lock);
+
+ match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ assigned_dev->assigned_dev_id);
+ if (!match) {
+ printk(KERN_INFO "%s: device hasn't been assigned before, "
+ "so cannot be deassigned\n", __func__);
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
+ kvm_deassign_device(kvm, match);
+
+ kvm_free_assigned_device(kvm, match);
+
+out:
+ mutex_unlock(&kvm->lock);
+ return r;
+}
+#endif
+
static inline int valid_vcpu(int n)
{
return likely(n >= 0 && n < KVM_MAX_VCPUS);
@@ -1833,6 +1867,19 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
#endif
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+ case KVM_DEASSIGN_PCI_DEVICE: {
+ struct kvm_assigned_pci_dev assigned_dev;
+
+ r = -EFAULT;
+ if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+ goto out;
+ r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
+ if (r)
+ goto out;
+ break;
+ }
+#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
}
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index a770874..832ee04 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -36,7 +36,8 @@ int kvm_iommu_map_pages(struct kvm *kvm,
{
gfn_t gfn = base_gfn;
pfn_t pfn;
- int i, r = 0;
+ int r = 0;
+ unsigned long i;
struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
/* check if iommu exists and in use */
@@ -48,15 +49,15 @@ int kvm_iommu_map_pages(struct kvm *kvm,
pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
gfn_to_gpa(gfn));
if (pfn)
- continue;
+ kvm_iommu_put_pages(kvm, gfn, 1);
pfn = gfn_to_pfn(kvm, gfn);
- r = intel_iommu_page_mapping(domain,
- gfn_to_gpa(gfn),
- pfn_to_hpa(pfn),
- PAGE_SIZE,
- DMA_PTE_READ |
- DMA_PTE_WRITE);
+ r = intel_iommu_map_pages(domain,
+ gfn_to_gpa(gfn),
+ pfn_to_hpa(pfn),
+ PAGE_SIZE,
+ DMA_PTE_READ |
+ DMA_PTE_WRITE);
if (r) {
printk(KERN_ERR "kvm_iommu_map_pages:"
"iommu failed to map pfn=%lx\n", pfn);
@@ -86,50 +87,79 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
return r;
}
-int kvm_iommu_map_guest(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev)
{
struct pci_dev *pdev = NULL;
+ struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
int r;
- if (!intel_iommu_found()) {
- printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+ /* check if iommu exists and in use */
+ if (!domain)
+ return 0;
+
+ pdev = assigned_dev->dev;
+ if (pdev == NULL)
return -ENODEV;
+
+ r = intel_iommu_assign_device(domain, pdev);
+ if (r) {
+ printk(KERN_ERR "assign device %x:%x.%x failed",
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn));
+ return r;
}
- printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+ printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
+ return 0;
+}
+
+int kvm_deassign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev)
+{
+ struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+ struct pci_dev *pdev = NULL;
+
+ /* check if iommu exists and in use */
+ if (!domain)
+ return 0;
+
pdev = assigned_dev->dev;
+ if (pdev == NULL)
+ return -ENODEV;
- if (pdev == NULL) {
- if (kvm->arch.intel_iommu_domain) {
- intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
- kvm->arch.intel_iommu_domain = NULL;
- }
+ intel_iommu_deassign_device(domain, pdev);
+
+ printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+ assigned_dev->host_busnr,
+ PCI_SLOT(assigned_dev->host_devfn),
+ PCI_FUNC(assigned_dev->host_devfn));
+
+ return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm)
+{
+ int r;
+
+ if (!intel_iommu_found()) {
+ printk(KERN_ERR "%s: intel iommu not found\n", __func__);
return -ENODEV;
}
- kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+ kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain();
if (!kvm->arch.intel_iommu_domain)
- return -ENODEV;
+ return -ENOMEM;
r = kvm_iommu_map_memslots(kvm);
if (r)
goto out_unmap;
- intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
- pdev->bus->number, pdev->devfn);
-
- r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
- pdev);
- if (r) {
- printk(KERN_ERR "Domain context map for %s failed",
- pci_name(pdev));
- goto out_unmap;
- }
return 0;
out_unmap:
@@ -138,12 +168,16 @@ out_unmap:
}
static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages)
+ gfn_t base_gfn, unsigned long npages)
{
gfn_t gfn = base_gfn;
pfn_t pfn;
struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
- int i;
+ unsigned long i;
+
+ /* check if iommu exists and in use */
+ if (!domain)
+ return;
for (i = 0; i < npages; i++) {
pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
@@ -151,6 +185,10 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
kvm_release_pfn_clean(pfn);
gfn++;
}
+
+ intel_iommu_unmap_pages(domain,
+ gfn_to_gpa(base_gfn),
+ PAGE_SIZE * npages);
}
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
@@ -182,10 +220,9 @@ int kvm_iommu_unmap_guest(struct kvm *kvm)
PCI_FUNC(entry->host_devfn));
/* detach kvm dmar domain */
- intel_iommu_detach_dev(domain, entry->host_busnr,
- entry->host_devfn);
+ intel_iommu_deassign_device(domain, entry->dev);
}
kvm_iommu_unmap_memslots(kvm);
- intel_iommu_domain_exit(domain);
+ intel_iommu_free_domain(domain);
return 0;
}
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 03/12] KVM: rename vtd.c to iommu.c
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
2008-12-02 13:01 ` [PATCH 01/12] VT-d: Support multiple device assignment for KVM Joerg Roedel
2008-12-02 13:01 ` [PATCH 02/12] KVM: change to use new APIs for kvm vtd Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 04/12] introcude linux/iommu.h for an iommu api Joerg Roedel
` (10 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
Impace: file renamed
The code in the vtd.c file can be reused for other IOMMUs as well. So
rename it to make it clear that it handle more than VT-d.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/ia64/kvm/Makefile | 2 +-
arch/x86/kvm/Makefile | 2 +-
virt/kvm/{vtd.c => iommu.c} | 0
3 files changed, 2 insertions(+), 2 deletions(-)
rename virt/kvm/{vtd.c => iommu.c} (100%)
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 76464dc..cb69dfc 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -52,7 +52,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o)
ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c023435..00f46c2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,7 +8,7 @@ ifeq ($(CONFIG_KVM_TRACE),y)
common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
endif
ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
diff --git a/virt/kvm/vtd.c b/virt/kvm/iommu.c
similarity index 100%
rename from virt/kvm/vtd.c
rename to virt/kvm/iommu.c
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 04/12] introcude linux/iommu.h for an iommu api
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (2 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 03/12] KVM: rename vtd.c to iommu.c Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 05/12] add frontend implementation for the IOMMU API Joerg Roedel
` (9 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
This patch introduces the API to abstract the exported VT-d functions
for KVM into a generic API. This way the AMD IOMMU implementation can
plug into this API later.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/iommu.h | 109 +++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 109 insertions(+), 0 deletions(-)
create mode 100644 include/linux/iommu.h
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
new file mode 100644
index 0000000..47e9ec8
--- /dev/null
+++ b/include/linux/iommu.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __LINUX_IOMMU_H
+#define __LINUX_IOMMU_H
+
+struct device;
+
+struct iommu_domain {
+ void *priv;
+};
+
+struct iommu_ops {
+ int (*domain_init)(struct iommu_domain *domain);
+ void (*domain_destroy)(struct iommu_domain *domain);
+ int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
+ void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+ int (*map)(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot);
+ void (*unmap)(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
+ phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
+ unsigned long iova);
+};
+
+#ifdef CONFIG_IOMMU_API
+
+extern void register_iommu(struct iommu_ops *ops);
+extern bool iommu_found(void);
+extern struct iommu_domain *iommu_domain_alloc(void);
+extern void iommu_domain_free(struct iommu_domain *domain);
+extern int iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+extern void iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev);
+extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot);
+extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
+extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+ unsigned long iova);
+
+#else /* CONFIG_IOMMU_API */
+
+static inline void register_iommu(struct iommu_ops *ops)
+{
+}
+
+static inline bool iommu_found(void)
+{
+ return false;
+}
+
+static inline struct iommu_domain *iommu_domain_alloc(void)
+{
+ return NULL;
+}
+
+static inline void iommu_domain_free(struct iommu_domain *domain)
+{
+}
+
+static inline int iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline void iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+}
+
+static inline int iommu_map_range(struct iommu_domain *domain,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int prot)
+{
+ return -ENODEV;
+}
+
+static inline void iommu_unmap_range(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+}
+
+static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+ unsigned long iova)
+{
+ return 0;
+}
+
+#endif /* CONFIG_IOMMU_API */
+
+#endif /* __LINUX_IOMMU_H */
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 05/12] add frontend implementation for the IOMMU API
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (3 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 04/12] introcude linux/iommu.h for an iommu api Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 06/12] select IOMMU_API when DMAR and/or AMD_IOMMU is selected Joerg Roedel
` (8 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/base/iommu.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 100 insertions(+), 0 deletions(-)
create mode 100644 drivers/base/iommu.c
diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
new file mode 100644
index 0000000..5e039d4
--- /dev/null
+++ b/drivers/base/iommu.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/bug.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/iommu.h>
+
+static struct iommu_ops *iommu_ops;
+
+void register_iommu(struct iommu_ops *ops)
+{
+ if (iommu_ops)
+ BUG();
+
+ iommu_ops = ops;
+}
+
+bool iommu_found()
+{
+ return iommu_ops != NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_found);
+
+struct iommu_domain *iommu_domain_alloc(void)
+{
+ struct iommu_domain *domain;
+ int ret;
+
+ domain = kmalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return NULL;
+
+ ret = iommu_ops->domain_init(domain);
+ if (ret)
+ goto out_free;
+
+ return domain;
+
+out_free:
+ kfree(domain);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_domain_alloc);
+
+void iommu_domain_free(struct iommu_domain *domain)
+{
+ iommu_ops->domain_destroy(domain);
+ kfree(domain);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_free);
+
+int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
+{
+ return iommu_ops->attach_dev(domain, dev);
+}
+EXPORT_SYMBOL_GPL(iommu_attach_device);
+
+void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
+{
+ iommu_ops->detach_dev(domain, dev);
+}
+EXPORT_SYMBOL_GPL(iommu_detach_device);
+
+int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ return iommu_ops->map(domain, iova, paddr, size, prot);
+}
+EXPORT_SYMBOL_GPL(iommu_map_range);
+
+void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+{
+ iommu_ops->unmap(domain, iova, size);
+}
+EXPORT_SYMBOL_GPL(iommu_unmap_range);
+
+phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+ unsigned long iova)
+{
+ return iommu_ops->iova_to_phys(domain, iova);
+}
+EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 06/12] select IOMMU_API when DMAR and/or AMD_IOMMU is selected
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (4 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 05/12] add frontend implementation for the IOMMU API Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 07/12] KVM: change KVM iommu.c to use IOMMU API Joerg Roedel
` (7 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
These two IOMMUs can implement the current version of this API. So
select the API if one or both of these IOMMU drivers is selected.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/ia64/Kconfig | 3 +++
arch/x86/Kconfig | 3 +++
drivers/base/Makefile | 1 +
3 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6bd91ed..6a7b0c9 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -687,3 +687,6 @@ config IRQ_PER_CPU
config IOMMU_HELPER
def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
+
+config IOMMU_API
+ def_bool (DMAR)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7..b9f7187 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -580,6 +580,9 @@ config SWIOTLB
config IOMMU_HELPER
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+config IOMMU_API
+ def_bool (AMD_IOMMU || DMAR)
+
config MAXSMP
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && BROKEN
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index c666373..b5b8ba5 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_FW_LOADER) += firmware_class.o
obj-$(CONFIG_NUMA) += node.o
obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
obj-$(CONFIG_SMP) += topology.o
+obj-$(CONFIG_IOMMU_API) += iommu.o
ifeq ($(CONFIG_SYSFS),y)
obj-$(CONFIG_MODULES) += module.o
endif
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 07/12] KVM: change KVM iommu.c to use IOMMU API
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (5 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 06/12] select IOMMU_API when DMAR and/or AMD_IOMMU is selected Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 08/12] VT-d: adapt domain init and destroy functions for " Joerg Roedel
` (6 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/ia64/kvm/Makefile | 2 +-
arch/x86/include/asm/kvm_host.h | 3 +-
arch/x86/kvm/Makefile | 2 +-
virt/kvm/iommu.c | 68 ++++++++++++++++++++-------------------
virt/kvm/kvm_main.c | 2 +-
5 files changed, 40 insertions(+), 37 deletions(-)
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index cb69dfc..0bb99b7 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,7 +51,7 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o)
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f58f7eb..77f4afa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_notifier.h>
+#include <linux/iommu.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -356,7 +357,7 @@ struct kvm_arch{
*/
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
- struct dmar_domain *intel_iommu_domain;
+ struct iommu_domain *iommu_domain;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 00f46c2..d3ec292 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
ifeq ($(CONFIG_KVM_TRACE),y)
common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
endif
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 832ee04..110c455 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -16,15 +16,18 @@
*
* Copyright (C) 2006-2008 Intel Corporation
* Copyright IBM Corporation, 2008
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
* Author: Allen M. Kay <allen.m.kay@intel.com>
* Author: Weidong Han <weidong.han@intel.com>
* Author: Ben-Ami Yassour <benami@il.ibm.com>
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
*/
#include <linux/list.h>
#include <linux/kvm_host.h>
#include <linux/pci.h>
#include <linux/dmar.h>
+#include <linux/iommu.h>
#include <linux/intel-iommu.h>
static int kvm_iommu_unmap_memslots(struct kvm *kvm);
@@ -36,9 +39,9 @@ int kvm_iommu_map_pages(struct kvm *kvm,
{
gfn_t gfn = base_gfn;
pfn_t pfn;
- int r = 0;
- unsigned long i;
- struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+ phys_addr_t paddr;
+ int i, r = 0;
+ struct iommu_domain *domain = kvm->arch.iommu_domain;
/* check if iommu exists and in use */
if (!domain)
@@ -46,18 +49,13 @@ int kvm_iommu_map_pages(struct kvm *kvm,
for (i = 0; i < npages; i++) {
/* check if already mapped */
- pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
- gfn_to_gpa(gfn));
- if (pfn)
+ paddr = (pfn_t)iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+ if (paddr)
kvm_iommu_put_pages(kvm, gfn, 1);
pfn = gfn_to_pfn(kvm, gfn);
- r = intel_iommu_map_pages(domain,
- gfn_to_gpa(gfn),
- pfn_to_hpa(pfn),
- PAGE_SIZE,
- DMA_PTE_READ |
- DMA_PTE_WRITE);
+ r = iommu_map_range(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
+ PAGE_SIZE, DMA_PTE_READ | DMA_PTE_WRITE);
if (r) {
printk(KERN_ERR "kvm_iommu_map_pages:"
"iommu failed to map pfn=%lx\n", pfn);
@@ -91,7 +89,7 @@ int kvm_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
struct pci_dev *pdev = NULL;
- struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+ struct iommu_domain *domain = kvm->arch.iommu_domain;
int r;
/* check if iommu exists and in use */
@@ -102,7 +100,12 @@ int kvm_assign_device(struct kvm *kvm,
if (pdev == NULL)
return -ENODEV;
- r = intel_iommu_assign_device(domain, pdev);
+ if (!iommu_found()) {
+ printk(KERN_ERR "%s: No IOMMU found\n", __func__);
+ return -ENODEV;
+ }
+
+ r = iommu_attach_device(domain, &pdev->dev);
if (r) {
printk(KERN_ERR "assign device %x:%x.%x failed",
pdev->bus->number,
@@ -111,7 +114,7 @@ int kvm_assign_device(struct kvm *kvm,
return r;
}
- printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+ printk(KERN_DEBUG "KVM IOMMU direct map: host bdf = %x:%x:%x\n",
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
@@ -122,7 +125,7 @@ int kvm_assign_device(struct kvm *kvm,
int kvm_deassign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
- struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+ struct iommu_domain *domain = kvm->arch.iommu_domain;
struct pci_dev *pdev = NULL;
/* check if iommu exists and in use */
@@ -133,7 +136,7 @@ int kvm_deassign_device(struct kvm *kvm,
if (pdev == NULL)
return -ENODEV;
- intel_iommu_deassign_device(domain, pdev);
+ iommu_detach_device(domain, &pdev->dev);
printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
assigned_dev->host_busnr,
@@ -147,13 +150,13 @@ int kvm_iommu_map_guest(struct kvm *kvm)
{
int r;
- if (!intel_iommu_found()) {
- printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+ if (!iommu_found()) {
+ printk(KERN_ERR "%s: iommu not found\n", __func__);
return -ENODEV;
}
- kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain();
- if (!kvm->arch.intel_iommu_domain)
+ kvm->arch.iommu_domain = iommu_domain_alloc();
+ if (!kvm->arch.iommu_domain)
return -ENOMEM;
r = kvm_iommu_map_memslots(kvm);
@@ -171,8 +174,8 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages)
{
gfn_t gfn = base_gfn;
- pfn_t pfn;
- struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+ phys_addr_t paddr;
+ struct iommu_domain *domain = kvm->arch.iommu_domain;
unsigned long i;
/* check if iommu exists and in use */
@@ -180,15 +183,12 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
return;
for (i = 0; i < npages; i++) {
- pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
- gfn_to_gpa(gfn));
- kvm_release_pfn_clean(pfn);
+ paddr = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+ kvm_release_pfn_clean(paddr >> PAGE_SHIFT);
gfn++;
}
- intel_iommu_unmap_pages(domain,
- gfn_to_gpa(base_gfn),
- PAGE_SIZE * npages);
+ iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages);
}
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
@@ -207,22 +207,24 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
int kvm_iommu_unmap_guest(struct kvm *kvm)
{
struct kvm_assigned_dev_kernel *entry;
- struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+ struct iommu_domain *domain = kvm->arch.iommu_domain;
/* check if iommu exists and in use */
if (!domain)
return 0;
list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
- printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+ struct pci_dev *pdev = entry->dev;
+ printk(KERN_DEBUG "IOMMU unmap: host bdf = %x:%x:%x\n",
entry->host_busnr,
PCI_SLOT(entry->host_devfn),
PCI_FUNC(entry->host_devfn));
/* detach kvm dmar domain */
- intel_iommu_deassign_device(domain, entry->dev);
+ iommu_detach_device(domain, &pdev->dev);
}
kvm_iommu_unmap_memslots(kvm);
- intel_iommu_free_domain(domain);
+ iommu_domain_free(domain);
+
return 0;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fe6aba0..397ad0f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -472,7 +472,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(&match->list, &kvm->arch.assigned_dev_head);
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
- if (!kvm->arch.intel_iommu_domain) {
+ if (!kvm->arch.iommu_domain) {
r = kvm_iommu_map_guest(kvm);
if (r)
goto out_list_del;
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 08/12] VT-d: adapt domain init and destroy functions for IOMMU API
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (6 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 07/12] KVM: change KVM iommu.c to use IOMMU API Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 09/12] VT-d: adapt device attach and detach " Joerg Roedel
` (5 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/pci/intel-iommu.c | 30 +++++++++++++++++-------------
include/linux/intel-iommu.h | 2 --
2 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 7f12852..59b9cdb 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -35,6 +35,7 @@
#include <linux/mempool.h>
#include <linux/timer.h>
#include <linux/iova.h>
+#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@@ -2779,32 +2780,34 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
return domain;
}
-struct dmar_domain *intel_iommu_alloc_domain(void)
+static int intel_iommu_domain_init(struct iommu_domain *domain)
{
- struct dmar_domain *domain;
+ struct dmar_domain *dmar_domain;
- domain = iommu_alloc_vm_domain();
- if (!domain) {
+ dmar_domain = iommu_alloc_vm_domain();
+ if (!dmar_domain) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain == NULL\n");
- return NULL;
+ return -ENOMEM;
}
- if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+ if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain_init() failed\n");
- vm_domain_exit(domain);
- return NULL;
+ vm_domain_exit(dmar_domain);
+ return -ENOMEM;
}
+ domain->priv = dmar_domain;
- return domain;
+ return 0;
}
-EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
-void intel_iommu_free_domain(struct dmar_domain *domain)
+static void intel_iommu_domain_destroy(struct iommu_domain *domain)
{
- vm_domain_exit(domain);
+ struct dmar_domain *dmar_domain = domain->priv;
+
+ domain->priv = NULL;
+ vm_domain_exit(dmar_domain);
}
-EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
int intel_iommu_assign_device(struct dmar_domain *domain,
struct pci_dev *pdev)
@@ -2922,3 +2925,4 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
return pfn >> VTD_PAGE_SHIFT;
}
EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
+
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index c2f37b8..5a4ce23 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -337,8 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
-struct dmar_domain *intel_iommu_alloc_domain(void);
-void intel_iommu_free_domain(struct dmar_domain *domain);
int intel_iommu_assign_device(struct dmar_domain *domain,
struct pci_dev *pdev);
void intel_iommu_deassign_device(struct dmar_domain *domain,
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 09/12] VT-d: adapt device attach and detach functions for IOMMU API
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (7 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 08/12] VT-d: adapt domain init and destroy functions for " Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 10/12] VT-d: adapt domain map and unmap " Joerg Roedel
` (4 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/pci/intel-iommu.c | 28 +++++++++++++++-------------
include/linux/intel-iommu.h | 4 ----
2 files changed, 15 insertions(+), 17 deletions(-)
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 59b9cdb..62ae6b1 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2809,9 +2809,11 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain)
vm_domain_exit(dmar_domain);
}
-int intel_iommu_assign_device(struct dmar_domain *domain,
- struct pci_dev *pdev)
+static int intel_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
{
+ struct dmar_domain *dmar_domain = domain->priv;
+ struct pci_dev *pdev = to_pci_dev(dev);
struct intel_iommu *iommu;
int addr_width;
u64 end;
@@ -2823,7 +2825,7 @@ int intel_iommu_assign_device(struct dmar_domain *domain,
old_domain = find_domain(pdev);
if (old_domain) {
- if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
vm_domain_remove_one_dev_info(old_domain, pdev);
else
domain_remove_dev_info(old_domain);
@@ -2838,29 +2840,29 @@ int intel_iommu_assign_device(struct dmar_domain *domain,
addr_width = agaw_to_width(iommu->agaw);
end = DOMAIN_MAX_ADDR(addr_width);
end = end & VTD_PAGE_MASK;
- if (end < domain->max_addr) {
+ if (end < dmar_domain->max_addr) {
printk(KERN_ERR "%s: iommu agaw (%d) is not "
"sufficient for the mapped address (%llx)\n",
- __func__, iommu->agaw, domain->max_addr);
+ __func__, iommu->agaw, dmar_domain->max_addr);
return -EFAULT;
}
- ret = vm_domain_context_mapping(domain, pdev);
+ ret = vm_domain_context_mapping(dmar_domain, pdev);
if (ret)
return ret;
- ret = vm_domain_add_dev_info(domain, pdev);
+ ret = vm_domain_add_dev_info(dmar_domain, pdev);
return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_assign_device);
-
-void intel_iommu_deassign_device(struct dmar_domain *domain,
- struct pci_dev *pdev)
+static void intel_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
{
- vm_domain_remove_one_dev_info(domain, pdev);
+ struct dmar_domain *dmar_domain = domain->priv;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ vm_domain_remove_one_dev_info(dmar_domain, pdev);
}
-EXPORT_SYMBOL_GPL(intel_iommu_deassign_device);
int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
u64 hpa, size_t size, int prot)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 5a4ce23..41d2a3b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -337,10 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
-int intel_iommu_assign_device(struct dmar_domain *domain,
- struct pci_dev *pdev);
-void intel_iommu_deassign_device(struct dmar_domain *domain,
- struct pci_dev *pdev);
int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
u64 hpa, size_t size, int prot);
void intel_iommu_unmap_pages(struct dmar_domain *domain,
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 10/12] VT-d: adapt domain map and unmap functions for IOMMU API
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (8 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 09/12] VT-d: adapt device attach and detach " Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 11/12] VT-d: adapt domain iova_to_phys function " Joerg Roedel
` (3 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/pci/intel-iommu.c | 22 +++++++++++-----------
include/linux/intel-iommu.h | 4 ----
2 files changed, 11 insertions(+), 15 deletions(-)
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 62ae6b1..ac22973 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2864,20 +2864,21 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
vm_domain_remove_one_dev_info(dmar_domain, pdev);
}
-int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot)
+static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t hpa, size_t size, int prot)
{
+ struct dmar_domain *dmar_domain = domain->priv;
u64 max_addr;
int addr_width;
int ret;
max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
- if (domain->max_addr < max_addr) {
+ if (dmar_domain->max_addr < max_addr) {
int min_agaw;
u64 end;
/* check if minimum agaw is sufficient for mapped address */
- min_agaw = vm_domain_min_agaw(domain);
+ min_agaw = vm_domain_min_agaw(dmar_domain);
addr_width = agaw_to_width(min_agaw);
end = DOMAIN_MAX_ADDR(addr_width);
end = end & VTD_PAGE_MASK;
@@ -2887,25 +2888,24 @@ int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
__func__, min_agaw, max_addr);
return -EFAULT;
}
- domain->max_addr = max_addr;
+ dmar_domain->max_addr = max_addr;
}
- ret = domain_page_mapping(domain, iova, hpa, size, prot);
+ ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_map_pages);
-void intel_iommu_unmap_pages(struct dmar_domain *domain,
- dma_addr_t iova, size_t size)
+static void intel_iommu_unmap(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
+ struct dmar_domain *dmar_domain = domain->priv;
dma_addr_t base;
/* The address might not be aligned */
base = iova & PAGE_MASK;
size = PAGE_ALIGN(size);
- dma_pte_clear_range(domain, base, base + size);
+ dma_pte_clear_range(dmar_domain, base, base + size);
}
-EXPORT_SYMBOL_GPL(intel_iommu_unmap_pages);
int intel_iommu_found(void)
{
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 41d2a3b..ac79a1c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -337,10 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
-int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot);
-void intel_iommu_unmap_pages(struct dmar_domain *domain,
- dma_addr_t iova, size_t size);
u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
#ifdef CONFIG_DMAR
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 11/12] VT-d: adapt domain iova_to_phys function for IOMMU API
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (9 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 10/12] VT-d: adapt domain map and unmap " Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-02 13:01 ` [PATCH 12/12] VT-d: register functions for the " Joerg Roedel
` (2 subsequent siblings)
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/pci/intel-iommu.c | 15 ++++++++-------
include/linux/intel-iommu.h | 2 --
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index ac22973..2e8b102 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2913,18 +2913,19 @@ int intel_iommu_found(void)
}
EXPORT_SYMBOL_GPL(intel_iommu_found);
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+ unsigned long iova)
{
+ struct dmar_domain *dmar_domain = domain->priv;
struct dma_pte *pte;
- u64 pfn;
+ phys_addr_t paddr;
- pfn = 0;
- pte = addr_to_dma_pte(domain, iova);
+ paddr = 0;
+ pte = addr_to_dma_pte(dmar_domain, iova);
if (pte)
- pfn = dma_pte_addr(*pte);
+ paddr = dma_pte_addr(*pte);
- return pfn >> VTD_PAGE_SHIFT;
+ return paddr;
}
-EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index ac79a1c..469508f 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -337,8 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
-
#ifdef CONFIG_DMAR
int intel_iommu_found(void);
#else /* CONFIG_DMAR */
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 12/12] VT-d: register functions for the IOMMU API
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (10 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 11/12] VT-d: adapt domain iova_to_phys function " Joerg Roedel
@ 2008-12-02 13:01 ` Joerg Roedel
2008-12-03 3:44 ` [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Greg KH
2008-12-03 10:17 ` Han, Weidong
13 siblings, 0 replies; 19+ messages in thread
From: Joerg Roedel @ 2008-12-02 13:01 UTC (permalink / raw)
To: Ingo Molnar, Avi Kivity, David Woodhouse, Greg Kroah-Hartman,
Alexander Graf, Han Weidong
Cc: linux-kernel, kvm, iommu, Joerg Roedel
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/pci/intel-iommu.c | 13 +++++++++++++
1 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 2e8b102..bb6f771 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -55,6 +55,7 @@
static void flush_unmaps_timeout(unsigned long data);
+static struct iommu_ops intel_iommu_ops;
DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
@@ -2429,6 +2430,9 @@ int __init intel_iommu_init(void)
init_timer(&unmap_timer);
force_iommu = 1;
dma_ops = &intel_dma_ops;
+
+ register_iommu(&intel_iommu_ops);
+
return 0;
}
@@ -2929,3 +2933,12 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
return paddr;
}
+static struct iommu_ops intel_iommu_ops = {
+ .domain_init = intel_iommu_domain_init,
+ .domain_destroy = intel_iommu_domain_destroy,
+ .attach_dev = intel_iommu_attach_device,
+ .detach_dev = intel_iommu_detach_device,
+ .map = intel_iommu_map,
+ .unmap = intel_iommu_unmap,
+ .iova_to_phys = intel_iommu_iova_to_phys,
+};
--
1.5.6.4
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (11 preceding siblings ...)
2008-12-02 13:01 ` [PATCH 12/12] VT-d: register functions for the " Joerg Roedel
@ 2008-12-03 3:44 ` Greg KH
2008-12-03 7:50 ` Joerg Roedel
2008-12-03 10:17 ` Han, Weidong
13 siblings, 1 reply; 19+ messages in thread
From: Greg KH @ 2008-12-03 3:44 UTC (permalink / raw)
To: Joerg Roedel
Cc: Ingo Molnar, Avi Kivity, David Woodhouse, Alexander Graf,
Han Weidong, linux-kernel, kvm, iommu
On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote:
> Hi,
>
> this patch series makes the current KVM device passthrough code generic
> enough so that other IOMMU implementation can also plug into this code.
> It works by factoring the functions Vt-d code exports to KVM into a
> generic interface which allows different backends.
>
> This is the second version of the patchset. The most important change to
> the previous version is that this patchset was rebased to the improved
> API from Han Weidong which supports multiple devices per IOMMU domain.
>
> For completeness, this series also includes the patches from Han with
> some cleanups. So this patchset can be applied on current avi/master
> tree.
Have you tried porting any of the current iommu controllers to this new
framework to see if it works properly for them?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
2008-12-03 3:44 ` [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Greg KH
@ 2008-12-03 7:50 ` Joerg Roedel
2008-12-03 7:53 ` Greg KH
0 siblings, 1 reply; 19+ messages in thread
From: Joerg Roedel @ 2008-12-03 7:50 UTC (permalink / raw)
To: Greg KH
Cc: Joerg Roedel, kvm, Han Weidong, linux-kernel, Alexander Graf,
iommu, Ingo Molnar, Avi Kivity
On Tue, Dec 02, 2008 at 07:44:05PM -0800, Greg KH wrote:
> On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote:
> > Hi,
> >
> > this patch series makes the current KVM device passthrough code generic
> > enough so that other IOMMU implementation can also plug into this code.
> > It works by factoring the functions Vt-d code exports to KVM into a
> > generic interface which allows different backends.
> >
> > This is the second version of the patchset. The most important change to
> > the previous version is that this patchset was rebased to the improved
> > API from Han Weidong which supports multiple devices per IOMMU domain.
> >
> > For completeness, this series also includes the patches from Han with
> > some cleanups. So this patchset can be applied on current avi/master
> > tree.
>
> Have you tried porting any of the current iommu controllers to this new
> framework to see if it works properly for them?
It works currently for VT-d. I also port it to AMD IOMMU currently. With
some extensions (offset for start address, flags and size limitation) it
is also suitable for IOMMUs like GART or similar ones.
Joerg
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
2008-12-03 7:50 ` Joerg Roedel
@ 2008-12-03 7:53 ` Greg KH
2008-12-03 9:03 ` Joerg Roedel
0 siblings, 1 reply; 19+ messages in thread
From: Greg KH @ 2008-12-03 7:53 UTC (permalink / raw)
To: Joerg Roedel
Cc: Joerg Roedel, kvm, Han Weidong, linux-kernel, Alexander Graf,
iommu, Ingo Molnar, Avi Kivity
On Wed, Dec 03, 2008 at 08:50:49AM +0100, Joerg Roedel wrote:
> On Tue, Dec 02, 2008 at 07:44:05PM -0800, Greg KH wrote:
> > On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote:
> > > Hi,
> > >
> > > this patch series makes the current KVM device passthrough code generic
> > > enough so that other IOMMU implementation can also plug into this code.
> > > It works by factoring the functions Vt-d code exports to KVM into a
> > > generic interface which allows different backends.
> > >
> > > This is the second version of the patchset. The most important change to
> > > the previous version is that this patchset was rebased to the improved
> > > API from Han Weidong which supports multiple devices per IOMMU domain.
> > >
> > > For completeness, this series also includes the patches from Han with
> > > some cleanups. So this patchset can be applied on current avi/master
> > > tree.
> >
> > Have you tried porting any of the current iommu controllers to this new
> > framework to see if it works properly for them?
>
> It works currently for VT-d. I also port it to AMD IOMMU currently. With
> some extensions (offset for start address, flags and size limitation) it
> is also suitable for IOMMUs like GART or similar ones.
What about the Calgary chipset?
thanks,
gerg k-h
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
2008-12-03 7:53 ` Greg KH
@ 2008-12-03 9:03 ` Joerg Roedel
2008-12-04 10:47 ` Muli Ben-Yehuda
0 siblings, 1 reply; 19+ messages in thread
From: Joerg Roedel @ 2008-12-03 9:03 UTC (permalink / raw)
To: Greg KH
Cc: Joerg Roedel, kvm, Han Weidong, linux-kernel, Alexander Graf,
iommu, Ingo Molnar, Avi Kivity
On Tue, Dec 02, 2008 at 11:53:02PM -0800, Greg KH wrote:
> On Wed, Dec 03, 2008 at 08:50:49AM +0100, Joerg Roedel wrote:
> > On Tue, Dec 02, 2008 at 07:44:05PM -0800, Greg KH wrote:
> > > On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote:
> > > > Hi,
> > > >
> > > > this patch series makes the current KVM device passthrough code generic
> > > > enough so that other IOMMU implementation can also plug into this code.
> > > > It works by factoring the functions Vt-d code exports to KVM into a
> > > > generic interface which allows different backends.
> > > >
> > > > This is the second version of the patchset. The most important change to
> > > > the previous version is that this patchset was rebased to the improved
> > > > API from Han Weidong which supports multiple devices per IOMMU domain.
> > > >
> > > > For completeness, this series also includes the patches from Han with
> > > > some cleanups. So this patchset can be applied on current avi/master
> > > > tree.
> > >
> > > Have you tried porting any of the current iommu controllers to this new
> > > framework to see if it works properly for them?
> >
> > It works currently for VT-d. I also port it to AMD IOMMU currently. With
> > some extensions (offset for start address, flags and size limitation) it
> > is also suitable for IOMMUs like GART or similar ones.
>
> What about the Calgary chipset?
Calgary is quite similar to GART (there is something like the aperture
and a linear single-level pagetable). So with the before mentioned
extensions it is also suitable for Calgary.
Joerg
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
2008-12-03 9:03 ` Joerg Roedel
@ 2008-12-04 10:47 ` Muli Ben-Yehuda
0 siblings, 0 replies; 19+ messages in thread
From: Muli Ben-Yehuda @ 2008-12-04 10:47 UTC (permalink / raw)
To: Joerg Roedel
Cc: Greg KH, Joerg Roedel, kvm, Han Weidong, linux-kernel,
Alexander Graf, iommu, Ingo Molnar, Avi Kivity
On Wed, Dec 03, 2008 at 10:03:04AM +0100, Joerg Roedel wrote:
> > > > Have you tried porting any of the current iommu controllers to
> > > > this new framework to see if it works properly for them?
> > >
> > > It works currently for VT-d. I also port it to AMD IOMMU
> > > currently. With some extensions (offset for start address, flags
> > > and size limitation) it is also suitable for IOMMUs like GART or
> > > similar ones.
> >
> > What about the Calgary chipset?
>
> Calgary is quite similar to GART (there is something like the
> aperture and a linear single-level pagetable).
Actually, Calgary has multiple per-bus address spaces (each of which
is a single-level linear pagetable limited to 4GB of addressable
memory), so I think it should work with your current approach pretty
much as is, once we take into account these two (per-bus and 32-bit
addressability) limitations.
Cheers,
Muli
--
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
<->
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
^ permalink raw reply [flat|nested] 19+ messages in thread
* RE: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
2008-12-02 13:01 [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Joerg Roedel
` (12 preceding siblings ...)
2008-12-03 3:44 ` [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support) Greg KH
@ 2008-12-03 10:17 ` Han, Weidong
13 siblings, 0 replies; 19+ messages in thread
From: Han, Weidong @ 2008-12-03 10:17 UTC (permalink / raw)
To: 'Joerg Roedel', 'Ingo Molnar',
'Avi Kivity', 'David Woodhouse',
'Greg Kroah-Hartman', 'Alexander Graf'
Cc: 'linux-kernel@vger.kernel.org',
'kvm@vger.kernel.org',
'iommu@lists.linux-foundation.org'
Joerg Roedel wrote:
> Hi,
>
> this patch series makes the current KVM device passthrough code
> generic
> enough so that other IOMMU implementation can also plug into this
> code.
> It works by factoring the functions Vt-d code exports to KVM into a
> generic interface which allows different backends.
>
> This is the second version of the patchset. The most important change
> to
> the previous version is that this patchset was rebased to the improved
> API from Han Weidong which supports multiple devices per IOMMU domain.
>
> For completeness, this series also includes the patches from Han with
> some cleanups. So this patchset can be applied on current avi/master
> tree.
>
> For testing this code can also be pulled against avi/master tree from
>
> git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git
> iommu-api
>
> This a basic implementation of a generic interface. It can and should
> be improved later to support more types of hardware IOMMUs then VT-d
> and
> AMD IOMMU.
>
> Since I have no VT-d hardware available these patches are only compile
> tested for now.
>
> Please review, comment and test these patches.
Tried this patchset on avi/master tree, VT-d works. Your generic API approach is ok for me. But the multiple devices assignment patches (here patch 01 and 02) needs some improvement, and they are too big to review. I have split them into a serial small patches, and sent out for review in another mail thread. After my multiple device assignment patchset is accepted, I think you can easily rebase your generic patchset on it.
Regards,
Weidong
^ permalink raw reply [flat|nested] 19+ messages in thread