* [PATCH 1/2] VT-d: Support multiple device assignment for KVM
@ 2008-11-26 3:18 Han, Weidong
2008-11-26 10:53 ` Avi Kivity
0 siblings, 1 reply; 5+ messages in thread
From: Han, Weidong @ 2008-11-26 3:18 UTC (permalink / raw)
To: Woodhouse, David, Jesse Barnes, 'Avi Kivity'
Cc: Kay, Allen M, Yu, Fenghua, kvm@vger.kernel.org,
iommu@lists.linux-foundation.org
[-- Attachment #1: Type: text/plain, Size: 45273 bytes --]
In order to support multiple device assignment for KVM, this patch does following main changes:
- extend dmar_domain to own multiple devices from different iommus, use a bitmap of iommus to replace iommu pointer in dmar_domain.
- add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d usage. Many functions (e.g. intel_map_single() and intel_unmap_single()) won't be used by KVM VT-d. Let them return directly when this flag is set.
- "SAGAW" capability may be different across iommus, that's to say the VT-d page table levels may be different among iommus. This patch uses a defaut agaw, and skip top levels of page tables for iommus which have smaller agaw than default.
- rename the APIs for kvm VT-d, make it more readable.
Signed-off-by: Weidong Han <weidong.han@intel.com>
---
drivers/pci/dmar.c | 15 +
drivers/pci/intel-iommu.c | 601 +++++++++++++++++++++++++++++++----------
include/linux/dma_remapping.h | 13 +-
include/linux/intel-iommu.h | 21 +-
4 files changed, 503 insertions(+), 147 deletions(-)
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 691b3ad..d6bdced 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -484,6 +484,7 @@ void __init detect_intel_iommu(void)
dmar_tbl = NULL;
}
+extern int width_to_agaw(int width);
int alloc_iommu(struct dmar_drhd_unit *drhd)
{
@@ -491,6 +492,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
int map_size;
u32 ver;
static int iommu_allocated = 0;
+ unsigned long sagaw;
+ int agaw;
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -506,6 +509,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+ /* set agaw, "SAGAW" may be different across iommus */
+ sagaw = cap_sagaw(iommu->cap);
+ for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ agaw >= 0; agaw--)
+ if (test_bit(agaw, &sagaw))
+ break;
+ if (agaw < 0) {
+ printk(KERN_ERR "IOMMU: unsupported sagaw %lx\n", sagaw);
+ goto error;
+ }
+ iommu->agaw = agaw;
+
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa4..3576ebe 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -50,8 +50,6 @@
#define IOAPIC_RANGE_END (0xfeefffff)
#define IOVA_START_ADDR (0x1000)
-#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
-
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
@@ -64,6 +62,7 @@ struct deferred_flush_tables {
int next;
struct iova *iova[HIGH_WATER_MARK];
struct dmar_domain *domain[HIGH_WATER_MARK];
+ struct intel_iommu *iommu;
};
static struct deferred_flush_tables *deferred_flush;
@@ -77,7 +76,7 @@ static LIST_HEAD(unmaps_to_do);
static int timer_on;
static long list_size;
-static void domain_remove_dev_info(struct dmar_domain *domain);
+static void domain_remove_all_dev_info(struct dmar_domain *domain);
int dmar_disabled;
static int __initdata dmar_map_gfx = 1;
@@ -184,6 +183,72 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
}
+/* in native case, each domain is related to only one iommu */
+static struct intel_iommu *domain_get_only_iommu(struct dmar_domain *domain)
+{
+ struct dmar_drhd_unit *drhd;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return NULL;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ if (test_bit(drhd->iommu->seq_id, &domain->iommu_bmp))
+ return drhd->iommu;
+ }
+
+ return NULL;
+}
+
+static void domain_flush_cache(struct dmar_domain *domain,
+ void *addr, int size)
+{
+ struct intel_iommu *iommu;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ struct dmar_drhd_unit *drhd;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+
+ if (!test_bit(iommu->seq_id, &domain->iommu_bmp))
+ continue;
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+ }
+ }
+ else {
+ iommu = domain_get_only_iommu(domain);
+ if (iommu && !ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+ }
+}
+
+static struct intel_iommu *iommu_find_matched_iommu(u8 bus, u8 devfn)
+{
+ struct dmar_drhd_unit *drhd = NULL;
+ int i;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+
+ for (i = 0; i < drhd->devices_cnt; i++)
+ if (drhd->devices[i]->bus->number == bus &&
+ drhd->devices[i]->devfn == devfn)
+ return drhd->iommu;
+
+ if (drhd->include_all)
+ return drhd->iommu;
+ }
+
+ return NULL;
+}
+
/* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn)
@@ -287,7 +352,7 @@ static inline int agaw_to_width(int agaw)
}
-static inline int width_to_agaw(int width)
+int width_to_agaw(int width)
{
return (width - 30) / LEVEL_STRIDE;
}
@@ -347,8 +412,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
flags);
return NULL;
}
- __iommu_flush_cache(domain->iommu, tmp_page,
- PAGE_SIZE);
+ domain_flush_cache(domain, tmp_page, PAGE_SIZE);
dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
/*
* high level table always sets r/w, last level page
@@ -356,7 +420,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
*/
dma_set_pte_readable(*pte);
dma_set_pte_writable(*pte);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
parent = phys_to_virt(dma_pte_addr(*pte));
level--;
@@ -399,7 +463,7 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
if (pte) {
dma_clear_pte(*pte);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
}
@@ -447,8 +511,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
free_pgtable_page(
phys_to_virt(dma_pte_addr(*pte)));
dma_clear_pte(*pte);
- __iommu_flush_cache(domain->iommu,
- pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
tmp += level_size(level);
}
@@ -951,6 +1014,37 @@ static int iommu_init_domains(struct intel_iommu *iommu)
static void domain_exit(struct dmar_domain *domain);
+static int domain_in_other_iommus(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *other_iommu;
+ unsigned long ndomains;
+ unsigned long i;
+
+ if (!domain)
+ return 0;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored && drhd->iommu == iommu)
+ continue;
+ other_iommu = drhd->iommu;
+ ndomains = cap_ndoms(other_iommu->cap);
+ i = find_first_bit(other_iommu->domain_ids,
+ cap_ndoms(other_iommu->cap));
+ for (i = 0; i < ndomains; ) {
+ if (other_iommu->domains[i] == domain)
+ return 1;
+ i = find_next_bit(other_iommu->domain_ids,
+ ndomains, i+1);
+ }
+ }
+ }
+
+ return 0;
+}
+
void free_dmar_iommu(struct intel_iommu *iommu)
{
struct dmar_domain *domain;
@@ -960,7 +1054,14 @@ void free_dmar_iommu(struct intel_iommu *iommu)
for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i];
clear_bit(i, iommu->domain_ids);
- domain_exit(domain);
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ /* domain may be referenced by other iommus */
+ if (domain_in_other_iommus(domain, iommu) == 0)
+ domain_exit(domain);
+ }
+ else
+ domain_exit(domain);
i = find_next_bit(iommu->domain_ids,
cap_ndoms(iommu->cap), i+1);
}
@@ -1006,7 +1107,7 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
set_bit(num, iommu->domain_ids);
domain->id = num;
- domain->iommu = iommu;
+ set_bit(iommu->seq_id, &domain->iommu_bmp);
iommu->domains[num] = domain;
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1016,10 +1117,39 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
static void iommu_free_domain(struct dmar_domain *domain)
{
unsigned long flags;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long i;
+ unsigned long ndomains;
- spin_lock_irqsave(&domain->iommu->lock, flags);
- clear_bit(domain->id, domain->iommu->domain_ids);
- spin_unlock_irqrestore(&domain->iommu->lock, flags);
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+
+ ndomains = cap_ndoms(iommu->cap);
+ i = find_first_bit(iommu->domain_ids, ndomains);
+ for (; i < ndomains; ) {
+ if (iommu->domains[i] == domain) {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(i, iommu->domain_ids);
+ iommu->domains[i] = NULL;
+ spin_unlock_irqrestore(&iommu->lock,
+ flags);
+ break;
+ }
+ i = find_next_bit(iommu->domain_ids,
+ ndomains, i+1);
+ }
+ }
+ }
+ else {
+ iommu = domain_get_only_iommu(domain);
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(domain->id, iommu->domain_ids);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
}
static struct iova_domain reserved_iova_list;
@@ -1097,29 +1227,37 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain_reserve_special_ranges(domain);
- /* calculate AGAW */
- iommu = domain->iommu;
- if (guest_width > cap_mgaw(iommu->cap))
- guest_width = cap_mgaw(iommu->cap);
- domain->gaw = guest_width;
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- agaw = width_to_agaw(adjust_width);
- sagaw = cap_sagaw(iommu->cap);
- if (!test_bit(agaw, &sagaw)) {
- /* hardware doesn't support it, choose a bigger one */
- pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
- agaw = find_next_bit(&sagaw, 5, agaw);
- if (agaw >= 5)
- return -ENODEV;
- }
- domain->agaw = agaw;
+ /* set AGAW */
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ domain->gaw = DEFAULT_DOMAIN_ADDRESS_WIDTH;
+ domain->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ }
+ else {
+ iommu = domain_get_only_iommu(domain);
+ if (guest_width > cap_mgaw(iommu->cap))
+ guest_width = cap_mgaw(iommu->cap);
+ domain->gaw = guest_width;
+ adjust_width = guestwidth_to_adjustwidth(guest_width);
+ agaw = width_to_agaw(adjust_width);
+ sagaw = cap_sagaw(iommu->cap);
+ if (!test_bit(agaw, &sagaw)) {
+ /* hardware doesn't support it, choose a bigger one */
+ pr_debug("IOMMU: hardware doesn't support agaw %d\n",
+ agaw);
+ agaw = find_next_bit(&sagaw, 5, agaw);
+ if (agaw >= 5)
+ return -ENODEV;
+ }
+ domain->agaw = agaw;
+ }
+
INIT_LIST_HEAD(&domain->devices);
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page();
if (!domain->pgd)
return -ENOMEM;
- __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
+ domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
@@ -1131,7 +1269,7 @@ static void domain_exit(struct dmar_domain *domain)
if (!domain)
return;
- domain_remove_dev_info(domain);
+ domain_remove_all_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
@@ -1148,11 +1286,15 @@ static void domain_exit(struct dmar_domain *domain)
}
static int domain_context_mapping_one(struct dmar_domain *domain,
- u8 bus, u8 devfn)
+ struct intel_iommu *iommu, u8 bus, u8 devfn)
{
struct context_entry *context;
- struct intel_iommu *iommu = domain->iommu;
unsigned long flags;
+ struct dma_pte *pgd;
+ unsigned long num;
+ unsigned long ndomains;
+ int id;
+ int agaw;
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1166,22 +1308,67 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
return 0;
}
- context_set_domain_id(*context, domain->id);
- context_set_address_width(*context, domain->agaw);
- context_set_address_root(*context, virt_to_phys(domain->pgd));
+ id = domain->id;
+ pgd = domain->pgd;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ int found = 0;
+
+ /* find an available domain id for this device in iommu */
+ ndomains = cap_ndoms(iommu->cap);
+ num = find_first_bit(iommu->domain_ids, ndomains);
+ for (; num < ndomains; ) {
+ if (iommu->domains[num] == domain) {
+ id = num;
+ found = 1;
+ break;
+ }
+ num = find_next_bit(iommu->domain_ids,
+ cap_ndoms(iommu->cap), num+1);
+ }
+
+ if (found == 0) {
+ num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ if (num >= ndomains) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ printk(KERN_ERR "IOMMU: no free domain ids\n");
+ return -EFAULT;
+ }
+
+ set_bit(num, iommu->domain_ids);
+ iommu->domains[num] = domain;
+ id = num;
+ }
+
+ /* Skip top levels of page tables for
+ * iommu which has less agaw than default.
+ */
+ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(*pgd));
+ if (!dma_pte_present(*pgd)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ context_set_domain_id(*context, id);
+ context_set_address_width(*context, iommu->agaw);
+ context_set_address_root(*context, virt_to_phys(pgd));
context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
context_set_fault_enable(*context);
context_set_present(*context);
__iommu_flush_cache(iommu, context, sizeof(*context));
/* it's a non-present to present mapping */
- if (iommu->flush.flush_context(iommu, domain->id,
+ if (iommu->flush.flush_context(iommu, id,
(((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL, 1))
iommu_flush_write_buffer(iommu);
else
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
+ set_bit(iommu->seq_id, &domain->iommu_bmp);
spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
@@ -1191,9 +1378,15 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
{
int ret;
struct pci_dev *tmp, *parent;
+ struct intel_iommu *iommu;
- ret = domain_context_mapping_one(domain, pdev->bus->number,
- pdev->devfn);
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ ret = domain_context_mapping_one(domain, iommu,
+ pdev->bus->number, pdev->devfn);
if (ret)
return ret;
@@ -1204,27 +1397,32 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = domain_context_mapping_one(domain, parent->bus->number,
- parent->devfn);
+ ret = domain_context_mapping_one(domain, iommu,
+ parent->bus->number, parent->devfn);
if (ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
- return domain_context_mapping_one(domain,
+ return domain_context_mapping_one(domain, iommu,
tmp->subordinate->number, 0);
else /* this is a legacy PCI bridge */
- return domain_context_mapping_one(domain,
+ return domain_context_mapping_one(domain, iommu,
tmp->bus->number, tmp->devfn);
}
-static int domain_context_mapped(struct dmar_domain *domain,
- struct pci_dev *pdev)
+static int domain_context_mapped(struct pci_dev *pdev)
{
int ret;
struct pci_dev *tmp, *parent;
+ struct intel_iommu *iommu;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return 0;
- ret = device_context_mapped(domain->iommu,
+ ret = device_context_mapped(iommu,
pdev->bus->number, pdev->devfn);
if (!ret)
return ret;
@@ -1235,17 +1433,17 @@ static int domain_context_mapped(struct dmar_domain *domain,
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = device_context_mapped(domain->iommu, parent->bus->number,
+ ret = device_context_mapped(iommu, parent->bus->number,
parent->devfn);
if (!ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie)
- return device_context_mapped(domain->iommu,
+ return device_context_mapped(iommu,
tmp->subordinate->number, 0);
else
- return device_context_mapped(domain->iommu,
+ return device_context_mapped(iommu,
tmp->bus->number, tmp->devfn);
}
@@ -1276,23 +1474,60 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
BUG_ON(dma_pte_addr(*pte));
dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(*pte, prot);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
}
return 0;
}
-static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+static void detach_domain_for_dev(struct dmar_domain *domain,
+ u8 bus, u8 devfn)
{
- clear_context_table(domain->iommu, bus, devfn);
- domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
- DMA_CCMD_GLOBAL_INVL, 0);
- domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
- DMA_TLB_GLOBAL_FLUSH, 0);
+ struct device_domain_info *info;
+ unsigned long flags;
+ struct intel_iommu *iommu;
+
+ iommu = iommu_find_matched_iommu(bus, devfn);
+ if (!iommu)
+ return;
+
+ clear_context_table(iommu, bus, devfn);
+ iommu->flush.flush_context(iommu, 0, 0, 0,
+ DMA_CCMD_GLOBAL_INVL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+ DMA_TLB_GLOBAL_FLUSH, 0);
+
+ /* clear this iommu in iommu_bmp if there is no
+ * its device owned by this domain
+ */
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ int found = 0;
+
+ while (!list_empty(&domain->devices)) {
+ info = list_entry(domain->devices.next,
+ struct device_domain_info, link);
+
+ if (iommu_find_matched_iommu(info->bus,
+ info->devfn) == iommu) {
+ found = 1;
+ break;
+ }
+ }
+ if (found == 0) {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(iommu->seq_id, &domain->iommu_bmp);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ }
+ else {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(iommu->seq_id, &domain->iommu_bmp);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
}
-static void domain_remove_dev_info(struct dmar_domain *domain)
+static void domain_remove_all_dev_info(struct dmar_domain *domain)
{
struct device_domain_info *info;
unsigned long flags;
@@ -1336,7 +1571,6 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
{
struct dmar_domain *domain, *found = NULL;
struct intel_iommu *iommu;
- struct dmar_drhd_unit *drhd;
struct device_domain_info *info, *tmp;
struct pci_dev *dev_tmp;
unsigned long flags;
@@ -1371,13 +1605,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
}
/* Allocate new domain for the device */
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd) {
- printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
- pci_name(pdev));
- return NULL;
- }
- iommu = drhd->iommu;
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return NULL;
domain = iommu_alloc_domain(iommu);
if (!domain)
@@ -1400,7 +1631,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
info->dev = NULL;
info->domain = domain;
/* This domain is shared by devices under p2p bridge */
- domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
+ domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
/* pcie-to-pci bridge already has a domain, uses it */
found = NULL;
@@ -1805,7 +2036,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
}
/* make sure context mapping is ok */
- if (unlikely(!domain_context_mapped(domain, pdev))) {
+ if (unlikely(!domain_context_mapped(pdev))) {
ret = domain_context_mapping(domain, pdev);
if (ret) {
printk(KERN_ERR
@@ -1823,6 +2054,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
{
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
+ struct intel_iommu *iommu;
phys_addr_t start_paddr;
struct iova *iova;
int prot = 0;
@@ -1836,6 +2068,14 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
if (!domain)
return 0;
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return 0;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return 0;
+
size = aligned_size((u64)paddr, size);
iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
@@ -1849,7 +2089,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
- !cap_zlr(domain->iommu->cap))
+ !cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
@@ -1865,10 +2105,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
goto error;
/* it's a non-present to present mapping */
- ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ ret = iommu_flush_iotlb_psi(iommu, domain->id,
start_paddr, size >> VTD_PAGE_SHIFT, 1);
if (ret)
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
return start_paddr + ((u64)paddr & (~PAGE_MASK));
@@ -1896,8 +2136,7 @@ static void flush_unmaps(void)
/* just flush them all */
for (i = 0; i < g_num_of_iommus; i++) {
if (deferred_flush[i].next) {
- struct intel_iommu *iommu =
- deferred_flush[i].domain[0]->iommu;
+ struct intel_iommu *iommu = deferred_flush[i].iommu;
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH, 0);
@@ -1921,7 +2160,8 @@ static void flush_unmaps_timeout(unsigned long data)
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}
-static void add_unmap(struct dmar_domain *dom, struct iova *iova)
+static void add_unmap(struct dmar_domain *dom,
+ struct intel_iommu *iommu, struct iova *iova)
{
unsigned long flags;
int next, iommu_id;
@@ -1930,11 +2170,12 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
if (list_size == HIGH_WATER_MARK)
flush_unmaps();
- iommu_id = dom->iommu->seq_id;
+ iommu_id = iommu->seq_id;
next = deferred_flush[iommu_id].next;
deferred_flush[iommu_id].domain[next] = dom;
deferred_flush[iommu_id].iova[next] = iova;
+ deferred_flush[iommu_id].iommu = iommu;
deferred_flush[iommu_id].next++;
if (!timer_on) {
@@ -1952,12 +2193,21 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
struct dmar_domain *domain;
unsigned long start_addr;
struct iova *iova;
+ struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return;
+
iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
if (!iova)
return;
@@ -1973,13 +2223,13 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
if (intel_iommu_strict) {
- if (iommu_flush_iotlb_psi(domain->iommu,
+ if (iommu_flush_iotlb_psi(iommu,
domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
} else {
- add_unmap(domain, iova);
+ add_unmap(domain, iommu, iova);
/*
* queue up the release of the unmap to save the 1/6th of the
* cpu used up by the iotlb flush operation...
@@ -2036,12 +2286,21 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
size_t size = 0;
void *addr;
struct scatterlist *sg;
+ struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return;
+
iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
if (!iova)
return;
@@ -2057,9 +2316,9 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
+ if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
size >> VTD_PAGE_SHIFT, 0))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
@@ -2093,6 +2352,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
int ret;
struct scatterlist *sg;
unsigned long start_addr;
+ struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -2102,6 +2362,14 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
if (!domain)
return 0;
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return 0;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return 0;
+
for_each_sg(sglist, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
addr = (void *)virt_to_phys(addr);
@@ -2119,7 +2387,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
- !cap_zlr(domain->iommu->cap))
+ !cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
@@ -2151,9 +2419,9 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
}
/* it's a non-present to present mapping */
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ if (iommu_flush_iotlb_psi(iommu, domain->id,
start_addr, offset >> VTD_PAGE_SHIFT, 1))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
return nelems;
}
@@ -2328,47 +2596,28 @@ int __init intel_iommu_init(void)
return 0;
}
-void intel_iommu_domain_exit(struct dmar_domain *domain)
+/* domain id for virtual machine, it won't be used to set in context */
+static unsigned long vm_domid;
+
+static struct dmar_domain *iommu_alloc_vm_domain(void)
{
- u64 end;
+ struct dmar_domain *domain;
- /* Domain 0 is reserved, so dont process it */
+ domain = alloc_domain_mem();
if (!domain)
- return;
-
- end = DOMAIN_MAX_ADDR(domain->gaw);
- end = end & (~VTD_PAGE_MASK);
-
- /* clear ptes */
- dma_pte_clear_range(domain, 0, end);
+ return NULL;
- /* free page tables */
- dma_pte_free_pagetable(domain, 0, end);
+ domain->id = vm_domid++;
+ domain->flags |= DOMAIN_FLAG_VIRTUAL_MACHINE;
- iommu_free_domain(domain);
- free_domain_mem(domain);
+ return domain;
}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+struct dmar_domain *intel_iommu_alloc_domain(void)
{
- struct dmar_drhd_unit *drhd;
struct dmar_domain *domain;
- struct intel_iommu *iommu;
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd) {
- printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
- return NULL;
- }
-
- iommu = drhd->iommu;
- if (!iommu) {
- printk(KERN_ERR
- "intel_iommu_domain_alloc: iommu == NULL\n");
- return NULL;
- }
- domain = iommu_alloc_domain(iommu);
+ domain = iommu_alloc_vm_domain();
if (!domain) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain == NULL\n");
@@ -2377,44 +2626,122 @@ struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain_init() failed\n");
- intel_iommu_domain_exit(domain);
+ domain_exit(domain);
return NULL;
}
+
return domain;
}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
-int intel_iommu_context_mapping(
- struct dmar_domain *domain, struct pci_dev *pdev)
+void intel_iommu_free_domain(struct dmar_domain *domain)
{
- int rc;
- rc = domain_context_mapping(domain, pdev);
- return rc;
+ domain_exit(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
+
+static int domain_add_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ info = alloc_devinfo_mem();
+ if (!info)
+ return -ENOMEM;
+
+ info->bus = pdev->bus->number;
+ info->devfn = pdev->devfn;
+ info->dev = pdev;
+ info->domain = domain;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_add(&info->link, &domain->devices);
+ list_add(&info->global, &device_domain_list);
+ pdev->dev.archdata.iommu = info;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return 0;
+}
+
+static void domain_remove_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ while (!list_empty(&domain->devices)) {
+ info = list_entry(domain->devices.next,
+ struct device_domain_info, link);
+ if (info->bus == pdev->bus->number &&
+ info->devfn == pdev->devfn) {
+ list_del(&info->link);
+ list_del(&info->global);
+ info->dev->dev.archdata.iommu = NULL;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ detach_domain_for_dev(info->domain,
+ info->bus, info->devfn);
+ free_devinfo_mem(info);
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+int intel_iommu_assign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ int ret;
+
+ if (domain_context_mapped(pdev)) {
+ struct dmar_domain *old_domain;
+
+ old_domain = find_domain(pdev);
+ if (old_domain)
+ domain_remove_dev_info(old_domain, pdev);
+ }
+
+ ret = domain_context_mapping(domain, pdev);
+ if (ret)
+ return ret;
+
+ ret = domain_add_dev_info(domain, pdev);
+ return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_assign_device);
-int intel_iommu_page_mapping(
- struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot)
+
+void intel_iommu_deassign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev)
{
- int rc;
- rc = domain_page_mapping(domain, iova, hpa, size, prot);
- return rc;
+ domain_remove_dev_info(domain, pdev);
}
-EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_deassign_device);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
+ u64 hpa, size_t size, int prot)
{
- detach_domain_for_dev(domain, bus, devfn);
+ int ret;
+ ret = domain_page_mapping(domain, iova, hpa, size, prot);
+ return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+EXPORT_SYMBOL_GPL(intel_iommu_map_pages);
-struct dmar_domain *
-intel_iommu_find_domain(struct pci_dev *pdev)
+void intel_iommu_unmap_pages(struct dmar_domain *domain,
+ dma_addr_t iova, size_t size)
{
- return find_domain(pdev);
+ dma_addr_t base;
+
+ /* The address might not be aligned */
+ base = iova & PAGE_MASK;
+ size = PAGE_ALIGN(size);
+ dma_pte_clear_range(domain, base, base + size);
}
-EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+EXPORT_SYMBOL_GPL(intel_iommu_unmap_pages);
int intel_iommu_found(void)
{
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 952df39..28ef89d 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -111,11 +111,21 @@ struct dma_pte {
(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
#define dma_pte_present(p) (((p).val & 3) != 0)
+/* domain flags, one domain owns one device by default */
+
+/* devices under the same p2p bridge are owned in one domain */
+#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
+
+/* domain represents a virtual domain, more than one devices
+ * may be owned in one domain, e.g. kvm guest.
+ */
+#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
+
struct intel_iommu;
struct dmar_domain {
int id; /* domain id */
- struct intel_iommu *iommu; /* back pointer to owning iommu */
+ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
struct list_head devices; /* all devices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
@@ -127,7 +137,6 @@ struct dmar_domain {
/* adjusted guest address width, 0 is level 2 30-bit */
int agaw;
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
int flags;
};
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3d017cf..c2f37b8 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -219,6 +219,8 @@ do { \
} \
} while (0)
+#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
+
#define QI_LENGTH 256 /* queue length */
enum {
@@ -299,6 +301,7 @@ struct intel_iommu {
struct dmar_domain **domains; /* ptr to domains */
spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */
+ int agaw;
unsigned int irq;
unsigned char name[7]; /* Device Name */
@@ -334,14 +337,16 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
-void intel_iommu_domain_exit(struct dmar_domain *domain);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
-int intel_iommu_context_mapping(struct dmar_domain *domain,
- struct pci_dev *pdev);
-int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
-struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+struct dmar_domain *intel_iommu_alloc_domain(void);
+void intel_iommu_free_domain(struct dmar_domain *domain);
+int intel_iommu_assign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev);
+void intel_iommu_deassign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev);
+int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
+ u64 hpa, size_t size, int prot);
+void intel_iommu_unmap_pages(struct dmar_domain *domain,
+ dma_addr_t iova, size_t size);
u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
#ifdef CONFIG_DMAR
--
1.5.1
[-- Attachment #2: 0001-VT-d-multiple-device-assignment-for-KVM.patch --]
[-- Type: application/octet-stream, Size: 34464 bytes --]
From 8202dd6dbe68fe2cde052a14071af7109e7b4b2b Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Wed, 26 Nov 2008 09:22:05 +0800
Subject: [PATCH] VT-d multiple device assignment for KVM
Signed-off-by: Weidong Han <weidong.han@intel.com>
---
drivers/pci/dmar.c | 15 +
drivers/pci/intel-iommu.c | 601 +++++++++++++++++++++++++++++++----------
include/linux/dma_remapping.h | 13 +-
include/linux/intel-iommu.h | 21 +-
4 files changed, 503 insertions(+), 147 deletions(-)
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 691b3ad..d6bdced 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -484,6 +484,7 @@ void __init detect_intel_iommu(void)
dmar_tbl = NULL;
}
+extern int width_to_agaw(int width);
int alloc_iommu(struct dmar_drhd_unit *drhd)
{
@@ -491,6 +492,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
int map_size;
u32 ver;
static int iommu_allocated = 0;
+ unsigned long sagaw;
+ int agaw;
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -506,6 +509,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+ /* set agaw, "SAGAW" may be different across iommus */
+ sagaw = cap_sagaw(iommu->cap);
+ for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ agaw >= 0; agaw--)
+ if (test_bit(agaw, &sagaw))
+ break;
+ if (agaw < 0) {
+ printk(KERN_ERR "IOMMU: unsupported sagaw %lx\n", sagaw);
+ goto error;
+ }
+ iommu->agaw = agaw;
+
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa4..3576ebe 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -50,8 +50,6 @@
#define IOAPIC_RANGE_END (0xfeefffff)
#define IOVA_START_ADDR (0x1000)
-#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
-
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
@@ -64,6 +62,7 @@ struct deferred_flush_tables {
int next;
struct iova *iova[HIGH_WATER_MARK];
struct dmar_domain *domain[HIGH_WATER_MARK];
+ struct intel_iommu *iommu;
};
static struct deferred_flush_tables *deferred_flush;
@@ -77,7 +76,7 @@ static LIST_HEAD(unmaps_to_do);
static int timer_on;
static long list_size;
-static void domain_remove_dev_info(struct dmar_domain *domain);
+static void domain_remove_all_dev_info(struct dmar_domain *domain);
int dmar_disabled;
static int __initdata dmar_map_gfx = 1;
@@ -184,6 +183,72 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
}
+/* in native case, each domain is related to only one iommu */
+static struct intel_iommu *domain_get_only_iommu(struct dmar_domain *domain)
+{
+ struct dmar_drhd_unit *drhd;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return NULL;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ if (test_bit(drhd->iommu->seq_id, &domain->iommu_bmp))
+ return drhd->iommu;
+ }
+
+ return NULL;
+}
+
+static void domain_flush_cache(struct dmar_domain *domain,
+ void *addr, int size)
+{
+ struct intel_iommu *iommu;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ struct dmar_drhd_unit *drhd;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+
+ if (!test_bit(iommu->seq_id, &domain->iommu_bmp))
+ continue;
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+ }
+ }
+ else {
+ iommu = domain_get_only_iommu(domain);
+ if (iommu && !ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+ }
+}
+
+static struct intel_iommu *iommu_find_matched_iommu(u8 bus, u8 devfn)
+{
+ struct dmar_drhd_unit *drhd = NULL;
+ int i;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+
+ for (i = 0; i < drhd->devices_cnt; i++)
+ if (drhd->devices[i]->bus->number == bus &&
+ drhd->devices[i]->devfn == devfn)
+ return drhd->iommu;
+
+ if (drhd->include_all)
+ return drhd->iommu;
+ }
+
+ return NULL;
+}
+
/* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn)
@@ -287,7 +352,7 @@ static inline int agaw_to_width(int agaw)
}
-static inline int width_to_agaw(int width)
+int width_to_agaw(int width)
{
return (width - 30) / LEVEL_STRIDE;
}
@@ -347,8 +412,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
flags);
return NULL;
}
- __iommu_flush_cache(domain->iommu, tmp_page,
- PAGE_SIZE);
+ domain_flush_cache(domain, tmp_page, PAGE_SIZE);
dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
/*
* high level table always sets r/w, last level page
@@ -356,7 +420,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
*/
dma_set_pte_readable(*pte);
dma_set_pte_writable(*pte);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
parent = phys_to_virt(dma_pte_addr(*pte));
level--;
@@ -399,7 +463,7 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
if (pte) {
dma_clear_pte(*pte);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
}
@@ -447,8 +511,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
free_pgtable_page(
phys_to_virt(dma_pte_addr(*pte)));
dma_clear_pte(*pte);
- __iommu_flush_cache(domain->iommu,
- pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
tmp += level_size(level);
}
@@ -951,6 +1014,37 @@ static int iommu_init_domains(struct intel_iommu *iommu)
static void domain_exit(struct dmar_domain *domain);
+static int domain_in_other_iommus(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *other_iommu;
+ unsigned long ndomains;
+ unsigned long i;
+
+ if (!domain)
+ return 0;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored && drhd->iommu == iommu)
+ continue;
+ other_iommu = drhd->iommu;
+ ndomains = cap_ndoms(other_iommu->cap);
+ i = find_first_bit(other_iommu->domain_ids,
+ cap_ndoms(other_iommu->cap));
+ for (i = 0; i < ndomains; ) {
+ if (other_iommu->domains[i] == domain)
+ return 1;
+ i = find_next_bit(other_iommu->domain_ids,
+ ndomains, i+1);
+ }
+ }
+ }
+
+ return 0;
+}
+
void free_dmar_iommu(struct intel_iommu *iommu)
{
struct dmar_domain *domain;
@@ -960,7 +1054,14 @@ void free_dmar_iommu(struct intel_iommu *iommu)
for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i];
clear_bit(i, iommu->domain_ids);
- domain_exit(domain);
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ /* domain may be referenced by other iommus */
+ if (domain_in_other_iommus(domain, iommu) == 0)
+ domain_exit(domain);
+ }
+ else
+ domain_exit(domain);
i = find_next_bit(iommu->domain_ids,
cap_ndoms(iommu->cap), i+1);
}
@@ -1006,7 +1107,7 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
set_bit(num, iommu->domain_ids);
domain->id = num;
- domain->iommu = iommu;
+ set_bit(iommu->seq_id, &domain->iommu_bmp);
iommu->domains[num] = domain;
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1016,10 +1117,39 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
static void iommu_free_domain(struct dmar_domain *domain)
{
unsigned long flags;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long i;
+ unsigned long ndomains;
- spin_lock_irqsave(&domain->iommu->lock, flags);
- clear_bit(domain->id, domain->iommu->domain_ids);
- spin_unlock_irqrestore(&domain->iommu->lock, flags);
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+
+ ndomains = cap_ndoms(iommu->cap);
+ i = find_first_bit(iommu->domain_ids, ndomains);
+ for (; i < ndomains; ) {
+ if (iommu->domains[i] == domain) {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(i, iommu->domain_ids);
+ iommu->domains[i] = NULL;
+ spin_unlock_irqrestore(&iommu->lock,
+ flags);
+ break;
+ }
+ i = find_next_bit(iommu->domain_ids,
+ ndomains, i+1);
+ }
+ }
+ }
+ else {
+ iommu = domain_get_only_iommu(domain);
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(domain->id, iommu->domain_ids);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
}
static struct iova_domain reserved_iova_list;
@@ -1097,29 +1227,37 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain_reserve_special_ranges(domain);
- /* calculate AGAW */
- iommu = domain->iommu;
- if (guest_width > cap_mgaw(iommu->cap))
- guest_width = cap_mgaw(iommu->cap);
- domain->gaw = guest_width;
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- agaw = width_to_agaw(adjust_width);
- sagaw = cap_sagaw(iommu->cap);
- if (!test_bit(agaw, &sagaw)) {
- /* hardware doesn't support it, choose a bigger one */
- pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
- agaw = find_next_bit(&sagaw, 5, agaw);
- if (agaw >= 5)
- return -ENODEV;
- }
- domain->agaw = agaw;
+ /* set AGAW */
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ domain->gaw = DEFAULT_DOMAIN_ADDRESS_WIDTH;
+ domain->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ }
+ else {
+ iommu = domain_get_only_iommu(domain);
+ if (guest_width > cap_mgaw(iommu->cap))
+ guest_width = cap_mgaw(iommu->cap);
+ domain->gaw = guest_width;
+ adjust_width = guestwidth_to_adjustwidth(guest_width);
+ agaw = width_to_agaw(adjust_width);
+ sagaw = cap_sagaw(iommu->cap);
+ if (!test_bit(agaw, &sagaw)) {
+ /* hardware doesn't support it, choose a bigger one */
+ pr_debug("IOMMU: hardware doesn't support agaw %d\n",
+ agaw);
+ agaw = find_next_bit(&sagaw, 5, agaw);
+ if (agaw >= 5)
+ return -ENODEV;
+ }
+ domain->agaw = agaw;
+ }
+
INIT_LIST_HEAD(&domain->devices);
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page();
if (!domain->pgd)
return -ENOMEM;
- __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
+ domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
@@ -1131,7 +1269,7 @@ static void domain_exit(struct dmar_domain *domain)
if (!domain)
return;
- domain_remove_dev_info(domain);
+ domain_remove_all_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
@@ -1148,11 +1286,15 @@ static void domain_exit(struct dmar_domain *domain)
}
static int domain_context_mapping_one(struct dmar_domain *domain,
- u8 bus, u8 devfn)
+ struct intel_iommu *iommu, u8 bus, u8 devfn)
{
struct context_entry *context;
- struct intel_iommu *iommu = domain->iommu;
unsigned long flags;
+ struct dma_pte *pgd;
+ unsigned long num;
+ unsigned long ndomains;
+ int id;
+ int agaw;
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1166,22 +1308,67 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
return 0;
}
- context_set_domain_id(*context, domain->id);
- context_set_address_width(*context, domain->agaw);
- context_set_address_root(*context, virt_to_phys(domain->pgd));
+ id = domain->id;
+ pgd = domain->pgd;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ int found = 0;
+
+ /* find an available domain id for this device in iommu */
+ ndomains = cap_ndoms(iommu->cap);
+ num = find_first_bit(iommu->domain_ids, ndomains);
+ for (; num < ndomains; ) {
+ if (iommu->domains[num] == domain) {
+ id = num;
+ found = 1;
+ break;
+ }
+ num = find_next_bit(iommu->domain_ids,
+ cap_ndoms(iommu->cap), num+1);
+ }
+
+ if (found == 0) {
+ num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ if (num >= ndomains) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ printk(KERN_ERR "IOMMU: no free domain ids\n");
+ return -EFAULT;
+ }
+
+ set_bit(num, iommu->domain_ids);
+ iommu->domains[num] = domain;
+ id = num;
+ }
+
+ /* Skip top levels of page tables for
+ * iommu which has less agaw than default.
+ */
+ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(*pgd));
+ if (!dma_pte_present(*pgd)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ context_set_domain_id(*context, id);
+ context_set_address_width(*context, iommu->agaw);
+ context_set_address_root(*context, virt_to_phys(pgd));
context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
context_set_fault_enable(*context);
context_set_present(*context);
__iommu_flush_cache(iommu, context, sizeof(*context));
/* it's a non-present to present mapping */
- if (iommu->flush.flush_context(iommu, domain->id,
+ if (iommu->flush.flush_context(iommu, id,
(((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL, 1))
iommu_flush_write_buffer(iommu);
else
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
+ set_bit(iommu->seq_id, &domain->iommu_bmp);
spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
@@ -1191,9 +1378,15 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
{
int ret;
struct pci_dev *tmp, *parent;
+ struct intel_iommu *iommu;
- ret = domain_context_mapping_one(domain, pdev->bus->number,
- pdev->devfn);
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ ret = domain_context_mapping_one(domain, iommu,
+ pdev->bus->number, pdev->devfn);
if (ret)
return ret;
@@ -1204,27 +1397,32 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = domain_context_mapping_one(domain, parent->bus->number,
- parent->devfn);
+ ret = domain_context_mapping_one(domain, iommu,
+ parent->bus->number, parent->devfn);
if (ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
- return domain_context_mapping_one(domain,
+ return domain_context_mapping_one(domain, iommu,
tmp->subordinate->number, 0);
else /* this is a legacy PCI bridge */
- return domain_context_mapping_one(domain,
+ return domain_context_mapping_one(domain, iommu,
tmp->bus->number, tmp->devfn);
}
-static int domain_context_mapped(struct dmar_domain *domain,
- struct pci_dev *pdev)
+static int domain_context_mapped(struct pci_dev *pdev)
{
int ret;
struct pci_dev *tmp, *parent;
+ struct intel_iommu *iommu;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return 0;
- ret = device_context_mapped(domain->iommu,
+ ret = device_context_mapped(iommu,
pdev->bus->number, pdev->devfn);
if (!ret)
return ret;
@@ -1235,17 +1433,17 @@ static int domain_context_mapped(struct dmar_domain *domain,
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = device_context_mapped(domain->iommu, parent->bus->number,
+ ret = device_context_mapped(iommu, parent->bus->number,
parent->devfn);
if (!ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie)
- return device_context_mapped(domain->iommu,
+ return device_context_mapped(iommu,
tmp->subordinate->number, 0);
else
- return device_context_mapped(domain->iommu,
+ return device_context_mapped(iommu,
tmp->bus->number, tmp->devfn);
}
@@ -1276,23 +1474,60 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
BUG_ON(dma_pte_addr(*pte));
dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(*pte, prot);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
}
return 0;
}
-static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+static void detach_domain_for_dev(struct dmar_domain *domain,
+ u8 bus, u8 devfn)
{
- clear_context_table(domain->iommu, bus, devfn);
- domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
- DMA_CCMD_GLOBAL_INVL, 0);
- domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
- DMA_TLB_GLOBAL_FLUSH, 0);
+ struct device_domain_info *info;
+ unsigned long flags;
+ struct intel_iommu *iommu;
+
+ iommu = iommu_find_matched_iommu(bus, devfn);
+ if (!iommu)
+ return;
+
+ clear_context_table(iommu, bus, devfn);
+ iommu->flush.flush_context(iommu, 0, 0, 0,
+ DMA_CCMD_GLOBAL_INVL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+ DMA_TLB_GLOBAL_FLUSH, 0);
+
+ /* clear this iommu in iommu_bmp if there is no
+ * its device owned by this domain
+ */
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ int found = 0;
+
+ while (!list_empty(&domain->devices)) {
+ info = list_entry(domain->devices.next,
+ struct device_domain_info, link);
+
+ if (iommu_find_matched_iommu(info->bus,
+ info->devfn) == iommu) {
+ found = 1;
+ break;
+ }
+ }
+ if (found == 0) {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(iommu->seq_id, &domain->iommu_bmp);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ }
+ else {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(iommu->seq_id, &domain->iommu_bmp);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
}
-static void domain_remove_dev_info(struct dmar_domain *domain)
+static void domain_remove_all_dev_info(struct dmar_domain *domain)
{
struct device_domain_info *info;
unsigned long flags;
@@ -1336,7 +1571,6 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
{
struct dmar_domain *domain, *found = NULL;
struct intel_iommu *iommu;
- struct dmar_drhd_unit *drhd;
struct device_domain_info *info, *tmp;
struct pci_dev *dev_tmp;
unsigned long flags;
@@ -1371,13 +1605,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
}
/* Allocate new domain for the device */
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd) {
- printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
- pci_name(pdev));
- return NULL;
- }
- iommu = drhd->iommu;
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return NULL;
domain = iommu_alloc_domain(iommu);
if (!domain)
@@ -1400,7 +1631,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
info->dev = NULL;
info->domain = domain;
/* This domain is shared by devices under p2p bridge */
- domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
+ domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
/* pcie-to-pci bridge already has a domain, uses it */
found = NULL;
@@ -1805,7 +2036,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
}
/* make sure context mapping is ok */
- if (unlikely(!domain_context_mapped(domain, pdev))) {
+ if (unlikely(!domain_context_mapped(pdev))) {
ret = domain_context_mapping(domain, pdev);
if (ret) {
printk(KERN_ERR
@@ -1823,6 +2054,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
{
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
+ struct intel_iommu *iommu;
phys_addr_t start_paddr;
struct iova *iova;
int prot = 0;
@@ -1836,6 +2068,14 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
if (!domain)
return 0;
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return 0;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return 0;
+
size = aligned_size((u64)paddr, size);
iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
@@ -1849,7 +2089,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
- !cap_zlr(domain->iommu->cap))
+ !cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
@@ -1865,10 +2105,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
goto error;
/* it's a non-present to present mapping */
- ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ ret = iommu_flush_iotlb_psi(iommu, domain->id,
start_paddr, size >> VTD_PAGE_SHIFT, 1);
if (ret)
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
return start_paddr + ((u64)paddr & (~PAGE_MASK));
@@ -1896,8 +2136,7 @@ static void flush_unmaps(void)
/* just flush them all */
for (i = 0; i < g_num_of_iommus; i++) {
if (deferred_flush[i].next) {
- struct intel_iommu *iommu =
- deferred_flush[i].domain[0]->iommu;
+ struct intel_iommu *iommu = deferred_flush[i].iommu;
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH, 0);
@@ -1921,7 +2160,8 @@ static void flush_unmaps_timeout(unsigned long data)
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}
-static void add_unmap(struct dmar_domain *dom, struct iova *iova)
+static void add_unmap(struct dmar_domain *dom,
+ struct intel_iommu *iommu, struct iova *iova)
{
unsigned long flags;
int next, iommu_id;
@@ -1930,11 +2170,12 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
if (list_size == HIGH_WATER_MARK)
flush_unmaps();
- iommu_id = dom->iommu->seq_id;
+ iommu_id = iommu->seq_id;
next = deferred_flush[iommu_id].next;
deferred_flush[iommu_id].domain[next] = dom;
deferred_flush[iommu_id].iova[next] = iova;
+ deferred_flush[iommu_id].iommu = iommu;
deferred_flush[iommu_id].next++;
if (!timer_on) {
@@ -1952,12 +2193,21 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
struct dmar_domain *domain;
unsigned long start_addr;
struct iova *iova;
+ struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return;
+
iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
if (!iova)
return;
@@ -1973,13 +2223,13 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
if (intel_iommu_strict) {
- if (iommu_flush_iotlb_psi(domain->iommu,
+ if (iommu_flush_iotlb_psi(iommu,
domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
} else {
- add_unmap(domain, iova);
+ add_unmap(domain, iommu, iova);
/*
* queue up the release of the unmap to save the 1/6th of the
* cpu used up by the iotlb flush operation...
@@ -2036,12 +2286,21 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
size_t size = 0;
void *addr;
struct scatterlist *sg;
+ struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return;
+
iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
if (!iova)
return;
@@ -2057,9 +2316,9 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
+ if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
size >> VTD_PAGE_SHIFT, 0))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
@@ -2093,6 +2352,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
int ret;
struct scatterlist *sg;
unsigned long start_addr;
+ struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -2102,6 +2362,14 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
if (!domain)
return 0;
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ return 0;
+
+ iommu = iommu_find_matched_iommu(pdev->bus->number,
+ pdev->devfn);
+ if (!iommu)
+ return 0;
+
for_each_sg(sglist, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
addr = (void *)virt_to_phys(addr);
@@ -2119,7 +2387,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
- !cap_zlr(domain->iommu->cap))
+ !cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
@@ -2151,9 +2419,9 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
}
/* it's a non-present to present mapping */
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ if (iommu_flush_iotlb_psi(iommu, domain->id,
start_addr, offset >> VTD_PAGE_SHIFT, 1))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
return nelems;
}
@@ -2328,47 +2596,28 @@ int __init intel_iommu_init(void)
return 0;
}
-void intel_iommu_domain_exit(struct dmar_domain *domain)
+/* domain id for virtual machine, it won't be used to set in context */
+static unsigned long vm_domid;
+
+static struct dmar_domain *iommu_alloc_vm_domain(void)
{
- u64 end;
+ struct dmar_domain *domain;
- /* Domain 0 is reserved, so dont process it */
+ domain = alloc_domain_mem();
if (!domain)
- return;
-
- end = DOMAIN_MAX_ADDR(domain->gaw);
- end = end & (~VTD_PAGE_MASK);
-
- /* clear ptes */
- dma_pte_clear_range(domain, 0, end);
+ return NULL;
- /* free page tables */
- dma_pte_free_pagetable(domain, 0, end);
+ domain->id = vm_domid++;
+ domain->flags |= DOMAIN_FLAG_VIRTUAL_MACHINE;
- iommu_free_domain(domain);
- free_domain_mem(domain);
+ return domain;
}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+struct dmar_domain *intel_iommu_alloc_domain(void)
{
- struct dmar_drhd_unit *drhd;
struct dmar_domain *domain;
- struct intel_iommu *iommu;
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd) {
- printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
- return NULL;
- }
-
- iommu = drhd->iommu;
- if (!iommu) {
- printk(KERN_ERR
- "intel_iommu_domain_alloc: iommu == NULL\n");
- return NULL;
- }
- domain = iommu_alloc_domain(iommu);
+ domain = iommu_alloc_vm_domain();
if (!domain) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain == NULL\n");
@@ -2377,44 +2626,122 @@ struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
"intel_iommu_domain_alloc: domain_init() failed\n");
- intel_iommu_domain_exit(domain);
+ domain_exit(domain);
return NULL;
}
+
return domain;
}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
-int intel_iommu_context_mapping(
- struct dmar_domain *domain, struct pci_dev *pdev)
+void intel_iommu_free_domain(struct dmar_domain *domain)
{
- int rc;
- rc = domain_context_mapping(domain, pdev);
- return rc;
+ domain_exit(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
+
+static int domain_add_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ info = alloc_devinfo_mem();
+ if (!info)
+ return -ENOMEM;
+
+ info->bus = pdev->bus->number;
+ info->devfn = pdev->devfn;
+ info->dev = pdev;
+ info->domain = domain;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_add(&info->link, &domain->devices);
+ list_add(&info->global, &device_domain_list);
+ pdev->dev.archdata.iommu = info;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return 0;
+}
+
+static void domain_remove_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ while (!list_empty(&domain->devices)) {
+ info = list_entry(domain->devices.next,
+ struct device_domain_info, link);
+ if (info->bus == pdev->bus->number &&
+ info->devfn == pdev->devfn) {
+ list_del(&info->link);
+ list_del(&info->global);
+ info->dev->dev.archdata.iommu = NULL;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ detach_domain_for_dev(info->domain,
+ info->bus, info->devfn);
+ free_devinfo_mem(info);
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+int intel_iommu_assign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ int ret;
+
+ if (domain_context_mapped(pdev)) {
+ struct dmar_domain *old_domain;
+
+ old_domain = find_domain(pdev);
+ if (old_domain)
+ domain_remove_dev_info(old_domain, pdev);
+ }
+
+ ret = domain_context_mapping(domain, pdev);
+ if (ret)
+ return ret;
+
+ ret = domain_add_dev_info(domain, pdev);
+ return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_assign_device);
-int intel_iommu_page_mapping(
- struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot)
+
+void intel_iommu_deassign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev)
{
- int rc;
- rc = domain_page_mapping(domain, iova, hpa, size, prot);
- return rc;
+ domain_remove_dev_info(domain, pdev);
}
-EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_deassign_device);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
+ u64 hpa, size_t size, int prot)
{
- detach_domain_for_dev(domain, bus, devfn);
+ int ret;
+ ret = domain_page_mapping(domain, iova, hpa, size, prot);
+ return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+EXPORT_SYMBOL_GPL(intel_iommu_map_pages);
-struct dmar_domain *
-intel_iommu_find_domain(struct pci_dev *pdev)
+void intel_iommu_unmap_pages(struct dmar_domain *domain,
+ dma_addr_t iova, size_t size)
{
- return find_domain(pdev);
+ dma_addr_t base;
+
+ /* The address might not be aligned */
+ base = iova & PAGE_MASK;
+ size = PAGE_ALIGN(size);
+ dma_pte_clear_range(domain, base, base + size);
}
-EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+EXPORT_SYMBOL_GPL(intel_iommu_unmap_pages);
int intel_iommu_found(void)
{
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 952df39..28ef89d 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -111,11 +111,21 @@ struct dma_pte {
(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
#define dma_pte_present(p) (((p).val & 3) != 0)
+/* domain flags, one domain owns one device by default */
+
+/* devices under the same p2p bridge are owned in one domain */
+#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
+
+/* domain represents a virtual domain, more than one devices
+ * may be owned in one domain, e.g. kvm guest.
+ */
+#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
+
struct intel_iommu;
struct dmar_domain {
int id; /* domain id */
- struct intel_iommu *iommu; /* back pointer to owning iommu */
+ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
struct list_head devices; /* all devices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
@@ -127,7 +137,6 @@ struct dmar_domain {
/* adjusted guest address width, 0 is level 2 30-bit */
int agaw;
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
int flags;
};
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3d017cf..c2f37b8 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -219,6 +219,8 @@ do { \
} \
} while (0)
+#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
+
#define QI_LENGTH 256 /* queue length */
enum {
@@ -299,6 +301,7 @@ struct intel_iommu {
struct dmar_domain **domains; /* ptr to domains */
spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */
+ int agaw;
unsigned int irq;
unsigned char name[7]; /* Device Name */
@@ -334,14 +337,16 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
-void intel_iommu_domain_exit(struct dmar_domain *domain);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
-int intel_iommu_context_mapping(struct dmar_domain *domain,
- struct pci_dev *pdev);
-int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
-struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+struct dmar_domain *intel_iommu_alloc_domain(void);
+void intel_iommu_free_domain(struct dmar_domain *domain);
+int intel_iommu_assign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev);
+void intel_iommu_deassign_device(struct dmar_domain *domain,
+ struct pci_dev *pdev);
+int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
+ u64 hpa, size_t size, int prot);
+void intel_iommu_unmap_pages(struct dmar_domain *domain,
+ dma_addr_t iova, size_t size);
u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
#ifdef CONFIG_DMAR
--
1.5.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH 1/2] VT-d: Support multiple device assignment for KVM
2008-11-26 3:18 [PATCH 1/2] VT-d: Support multiple device assignment for KVM Han, Weidong
@ 2008-11-26 10:53 ` Avi Kivity
2008-11-26 14:04 ` Han, Weidong
0 siblings, 1 reply; 5+ messages in thread
From: Avi Kivity @ 2008-11-26 10:53 UTC (permalink / raw)
To: Han, Weidong
Cc: Woodhouse, David, Jesse Barnes, Kay, Allen M, Yu, Fenghua,
kvm@vger.kernel.org, iommu@lists.linux-foundation.org
Han, Weidong wrote:
> In order to support multiple device assignment for KVM, this patch does following main changes:
> - extend dmar_domain to own multiple devices from different iommus, use a bitmap of iommus to replace iommu pointer in dmar_domain.
> - add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d usage. Many functions (e.g. intel_map_single() and intel_unmap_single()) won't be used by KVM VT-d. Let them return directly when this flag is set.
>
This seems brittle. An API that has some functions shorted out
depending on some flag is hard to understand and use.
We should either implement the functions, or split the API into a basic
version that talks only to one device, and an expanded versions that
talks to multiple devices, and is implemented by the using the lower
level API. This may require more changes due to the need to share io
pagetables.
> - "SAGAW" capability may be different across iommus, that's to say the VT-d page table levels may be different among iommus. This patch uses a defaut agaw, and skip top levels of page tables for iommus which have smaller agaw than default.
>
Neat trick.
> void free_dmar_iommu(struct intel_iommu *iommu)
> {
> struct dmar_domain *domain;
> @@ -960,7 +1054,14 @@ void free_dmar_iommu(struct intel_iommu *iommu)
> for (; i < cap_ndoms(iommu->cap); ) {
> domain = iommu->domains[i];
> clear_bit(i, iommu->domain_ids);
> - domain_exit(domain);
> +
> + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
> + /* domain may be referenced by other iommus */
> + if (domain_in_other_iommus(domain, iommu) == 0)
> + domain_exit(domain);
> + }
> + else
> + domain_exit(domain);
>
Things like this are best expressed using reference counts, which
removes the need for the test as well.
> +
> + /* Skip top levels of page tables for
> + * iommu which has less agaw than default.
> + */
> + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
> + pgd = phys_to_virt(dma_pte_addr(*pgd));
> + if (!dma_pte_present(*pgd)) {
> + spin_unlock_irqrestore(&iommu->lock, flags);
> + return -ENOMEM;
> + }
> + }
> + }
>
Need to check that the agaw is sufficient for mapped memory (and when
adding a device or mapping more memory, need a similar check).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 5+ messages in thread* RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM
2008-11-26 10:53 ` Avi Kivity
@ 2008-11-26 14:04 ` Han, Weidong
2008-11-26 19:07 ` Yu, Fenghua
0 siblings, 1 reply; 5+ messages in thread
From: Han, Weidong @ 2008-11-26 14:04 UTC (permalink / raw)
To: 'Avi Kivity'
Cc: Woodhouse, David, 'Jesse Barnes', Kay, Allen M,
Yu, Fenghua, 'kvm@vger.kernel.org',
'iommu@lists.linux-foundation.org'
Avi Kivity wrote:
> Han, Weidong wrote:
>> In order to support multiple device assignment for KVM, this patch
>> does following main changes:
>> - extend dmar_domain to own multiple devices from different
>> iommus, use a bitmap of iommus to replace iommu pointer in
>> dmar_domain.
>> - add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d
>> usage. Many functions (e.g. intel_map_single() and
>> intel_unmap_single()) won't be used by KVM VT-d. Let them return
>> directly when this flag is set.
>>
>
>
> This seems brittle. An API that has some functions shorted out
> depending on some flag is hard to understand and use.
This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs reuse native VT-d code, it needn't duplicate code for kvm VT-d APIs, and it won't impact existed native VT-d code.
>
> We should either implement the functions, or split the API into a
> basic version that talks only to one device, and an expanded versions
> that talks to multiple devices, and is implemented by the using the
> lower level API. This may require more changes due to the need to
> share io pagetables.
The expanded versions that supports multiple devices will need to change dmar_domain, this will cause lots of changes, almost duplicate the main functions.
>
>> - "SAGAW" capability may be different across iommus, that's to
>> say the VT-d page table levels may be different among iommus. This
>> patch uses a defaut agaw, and skip top levels of page tables for
>> iommus which have smaller agaw than default.
>>
>
> Neat trick.
>
>> void free_dmar_iommu(struct intel_iommu *iommu)
>> {
>> struct dmar_domain *domain;
>> @@ -960,7 +1054,14 @@ void free_dmar_iommu(struct intel_iommu *iommu)
>> for (; i < cap_ndoms(iommu->cap); ) {
>> domain = iommu->domains[i];
>> clear_bit(i, iommu->domain_ids);
>> - domain_exit(domain);
>> +
>> + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
>> + /* domain may be referenced by other iommus
>> */ + if (domain_in_other_iommus(domain, iommu)
>> == 0) + domain_exit(domain); +
>> } + else
>> + domain_exit(domain);
>>
>
> Things like this are best expressed using reference counts, which
> removes the need for the test as well.
will add a reference count for it.
>
>> +
>> + /* Skip top levels of page tables for
>> + * iommu which has less agaw than default. +
>> */ + for (agaw = domain->agaw; agaw != iommu->agaw;
>> agaw--) { + pgd =
>> phys_to_virt(dma_pte_addr(*pgd)); + if
>> (!dma_pte_present(*pgd)) { +
>> spin_unlock_irqrestore(&iommu->lock, flags); +
>> return -ENOMEM; + }
>> + }
>> + }
>>
>
> Need to check that the agaw is sufficient for mapped memory (and when
> adding a device or mapping more memory, need a similar check).
I think we can check the smallest agaw across iommus for mapped memory when mapping memory.
Regards,
Weidong
^ permalink raw reply [flat|nested] 5+ messages in thread* RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM
2008-11-26 14:04 ` Han, Weidong
@ 2008-11-26 19:07 ` Yu, Fenghua
2008-11-27 3:38 ` Han, Weidong
0 siblings, 1 reply; 5+ messages in thread
From: Yu, Fenghua @ 2008-11-26 19:07 UTC (permalink / raw)
To: Han, Weidong, 'Avi Kivity'
Cc: Woodhouse, David, 'Jesse Barnes', Kay, Allen M,
'kvm@vger.kernel.org',
'iommu@lists.linux-foundation.org'
Avi Kivity wrote:
>> Han, Weidong wrote:
>>> In order to support multiple device assignment for KVM, this patch
>>> does following main changes:
>>> - extend dmar_domain to own multiple devices from different
>>> iommus, use a bitmap of iommus to replace iommu pointer in
>>> dmar_domain.
>>> - add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d
>>> usage. Many functions (e.g. intel_map_single() and
>>> intel_unmap_single()) won't be used by KVM VT-d. Let them return
>>> directly when this flag is set.
>>>
>>
>>
>> This seems brittle. An API that has some functions shorted out
>> depending on some flag is hard to understand and use.
>This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs reuse >native VT-d code, it needn't duplicate code for kvm VT-d APIs, and it won't >impact existed native VT-d code.
>>
>> We should either implement the functions, or split the API into a
>> basic version that talks only to one device, and an expanded versions
>> that talks to multiple devices, and is implemented by the using the
>> lower level API. This may require more changes due to the need to
>> share io pagetables.
>The expanded versions that supports multiple devices will need to change >dmar_domain, this will cause lots of changes, almost duplicate the main >functions.
I would agree with Avi. It's wired to merge the KVM api and native api together while they are logically separate. If we have two sets of interfaces for KVM and native iommu each, code is clean and robust and easy to extend in the future. If you well organize the code, we should have both good interface and shared code. While it makes sense to share code at beginning of KVM like this, it will get harder to maintain and evolve both KVM and native later, e.g. changing one part may break the other.
Some comments on the code in this patch:
1. You don't need to check DOMAIN_FLAG_VIRTUAL_MACHINE in the DMA functions. When the DMA functions are called, the domain should not have the DOMAIN_FLAG_VIRTUAL_MACHINE flag. Otherwise, the domain allocation code is wrong.
2. You need to initialize domain->flags=0 for DMA domain. It's random number after the domain is allocated by kmem_cache_alloc.
Thanks.
-Fenghua
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM
2008-11-26 19:07 ` Yu, Fenghua
@ 2008-11-27 3:38 ` Han, Weidong
0 siblings, 0 replies; 5+ messages in thread
From: Han, Weidong @ 2008-11-27 3:38 UTC (permalink / raw)
To: Yu, Fenghua, 'Avi Kivity'
Cc: Woodhouse, David, 'Jesse Barnes', Kay, Allen M,
'kvm@vger.kernel.org',
'iommu@lists.linux-foundation.org'
Yu, Fenghua wrote:
> Avi Kivity wrote:
>>> Han, Weidong wrote:
>>>> In order to support multiple device assignment for KVM, this patch
>>>> does following main changes: - extend dmar_domain to own
>>>> multiple devices from different iommus, use a bitmap of iommus to
>>>> replace iommu pointer in dmar_domain. - add a flag
>>>> DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d usage. Many
>>>> functions (e.g. intel_map_single() and intel_unmap_single()) won't
>>>> be used by KVM VT-d. Let them return directly when this flag is
>>>> set.
>>>>
>>>
>>>
>>> This seems brittle. An API that has some functions shorted out
>>> depending on some flag is hard to understand and use.
>
>> This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs
>> reuse >native VT-d code, it needn't duplicate code for kvm VT-d
>> APIs, and it won't >impact existed native VT-d code.
>
>>>
>>> We should either implement the functions, or split the API into a
>>> basic version that talks only to one device, and an expanded
>>> versions that talks to multiple devices, and is implemented by the
>>> using the lower level API. This may require more changes due to
>>> the need to share io pagetables.
>
>> The expanded versions that supports multiple devices will need to
>> change >dmar_domain, this will cause lots of changes, almost
>> duplicate the main >functions.
>
> I would agree with Avi. It's wired to merge the KVM api and native
> api together while they are logically separate. If we have two sets
> of interfaces for KVM and native iommu each, code is clean and robust
> and easy to extend in the future. If you well organize the code, we
> should have both good interface and shared code. While it makes sense
> to share code at beginning of KVM like this, it will get harder to
> maintain and evolve both KVM and native later, e.g. changing one part
> may break the other.
I will remove the flag judgement, and implement independent functions for KVM.
>
> Some comments on the code in this patch:
>
> 1. You don't need to check DOMAIN_FLAG_VIRTUAL_MACHINE in the DMA
> functions. When the DMA functions are called, the domain should not
> have the DOMAIN_FLAG_VIRTUAL_MACHINE flag. Otherwise, the domain
> allocation code is wrong.
Yes, I will remove these meaningless checks.
>
> 2. You need to initialize domain->flags=0 for DMA domain. It's random
> number after the domain is allocated by kmem_cache_alloc.
Right. Thanks.
Regards,
Weidong
>
> Thanks.
>
> -Fenghua
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-11-27 3:38 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-26 3:18 [PATCH 1/2] VT-d: Support multiple device assignment for KVM Han, Weidong
2008-11-26 10:53 ` Avi Kivity
2008-11-26 14:04 ` Han, Weidong
2008-11-26 19:07 ` Yu, Fenghua
2008-11-27 3:38 ` Han, Weidong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox