* [PATCH V2 1/4] IOMMU: add a percpu cache for iova allocation
[not found] ` <cover.1452297604.git.shli-b10kYP2dOMg@public.gmane.org>
@ 2016-01-09 0:44 ` Shaohua Li
2016-01-09 0:44 ` [PATCH V2 2/4] iommu: free_iova doesn't need lock twice Shaohua Li
` (3 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Shaohua Li @ 2016-01-09 0:44 UTC (permalink / raw)
To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Cc: Kernel-team-b10kYP2dOMg, Joerg Roedel, David Woodhouse
This adds a percpu cache for small size iova allocation. For allocation
size is bigger than 64k, the cache is bypassed. The cache allows iova
allocation batched, so dramatically reduce iovad lock contention. In
test, the the lock contention becomes very tiny. The cache has 5 classes
of size (4k, 8k, 16k, 32k, 64k), each will cache 512K dma address. Each
cpu will cache 512k * 5 dma address at most. For an IOMMU system, the
cached dma address is quite tiny, so we don't bother draining cache for
CPU hotplug.
Cc: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
Cc: David Woodhouse <David.Woodhouse-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Shaohua Li <shli-b10kYP2dOMg@public.gmane.org>
---
drivers/iommu/iova.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++-----
include/linux/iova.h | 9 ++++++
2 files changed, 89 insertions(+), 8 deletions(-)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index fa0adef..5c86c5c 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -25,6 +25,7 @@ void
init_iova_domain(struct iova_domain *iovad, unsigned long granule,
unsigned long start_pfn, unsigned long pfn_32bit)
{
+ int cpu;
/*
* IOVA granularity will normally be equal to the smallest
* supported IOMMU page size; both *must* be capable of
@@ -38,6 +39,13 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
iovad->granule = granule;
iovad->start_pfn = start_pfn;
iovad->dma_32bit_pfn = pfn_32bit;
+ iovad->percpu_cache = alloc_percpu(struct iova_cache);
+ for_each_possible_cpu(cpu) {
+ struct iova_cache *cache = per_cpu_ptr(iovad->percpu_cache, cpu);
+ int i;
+ for (i = 0; i <= ilog2(MAX_CACHE_SIZE); i++)
+ INIT_LIST_HEAD(&cache->cache_lists[i]);
+ }
}
EXPORT_SYMBOL_GPL(init_iova_domain);
@@ -103,12 +111,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
struct iova *new, bool size_aligned)
{
struct rb_node *prev, *curr = NULL;
- unsigned long flags;
unsigned long saved_pfn;
unsigned int pad_size = 0;
/* Walk the tree backwards */
- spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
saved_pfn = limit_pfn;
curr = __get_cached_rbnode(iovad, &limit_pfn);
prev = curr;
@@ -135,10 +141,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
if (!curr) {
if (size_aligned)
pad_size = iova_get_pad_size(size, limit_pfn);
- if ((iovad->start_pfn + size + pad_size) > limit_pfn) {
- spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ if ((iovad->start_pfn + size + pad_size) > limit_pfn)
return -ENOMEM;
- }
}
/* pfn_lo will point to size aligned address if size_aligned is set */
@@ -177,9 +181,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
}
__cached_rbnode_insert_update(iovad, saved_pfn, new);
- spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-
-
return 0;
}
@@ -256,6 +257,69 @@ void iova_cache_put(void)
}
EXPORT_SYMBOL_GPL(iova_cache_put);
+static void refill_percpu_iova(struct iova_domain *iovad, int index)
+{
+ struct iova_cache *cache;
+ struct iova *new_iova;
+ unsigned long flags;
+ int ret;
+ int count = 0;
+
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ cache = this_cpu_ptr(iovad->percpu_cache);
+
+ if (!list_empty(&cache->cache_lists[index]))
+ goto out;
+
+ while (count < (MAX_CACHE_UNIT_SIZE >> index)) {
+ new_iova = alloc_iova_mem();
+ if (!new_iova)
+ goto out;
+ ret = __alloc_and_insert_iova_range(iovad, 1 << index,
+ iovad->dma_32bit_pfn, new_iova, true);
+ if (ret) {
+ free_iova_mem(new_iova);
+ goto out;
+ }
+ count++;
+ list_add(&new_iova->sibling, &cache->cache_lists[index]);
+ }
+out:
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+}
+
+static struct iova *alloc_percpu_iova(struct iova_domain *iovad,
+ unsigned long size, unsigned long limit_pfn)
+{
+ int cache_index;
+ struct iova_cache *cache;
+ struct iova *iova;
+ bool refilled = false;
+
+ if (size > MAX_CACHE_SIZE || limit_pfn < iovad->dma_32bit_pfn)
+ return NULL;
+ cache_index = order_base_2(size);
+
+again:
+ preempt_disable();
+ cache = this_cpu_ptr(iovad->percpu_cache);
+ iova = list_first_entry_or_null(&cache->cache_lists[cache_index],
+ struct iova, sibling);
+ if (iova)
+ list_del(&iova->sibling);
+ preempt_enable();
+ if (iova)
+ return iova;
+
+ if (!refilled) {
+ refill_percpu_iova(iovad, cache_index);
+ refilled = true;
+ goto again;
+ }
+
+ return NULL;
+}
+
/**
* alloc_iova - allocates an iova
* @iovad: - iova domain in question
@@ -274,13 +338,20 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
{
struct iova *new_iova;
int ret;
+ unsigned long flags;
+
+ new_iova = alloc_percpu_iova(iovad, size, limit_pfn);
+ if (new_iova)
+ return new_iova;
new_iova = alloc_iova_mem();
if (!new_iova)
return NULL;
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
new_iova, size_aligned);
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
if (ret) {
free_iova_mem(new_iova);
@@ -389,6 +460,7 @@ void put_iova_domain(struct iova_domain *iovad)
node = rb_first(&iovad->rbroot);
}
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ free_percpu(iovad->percpu_cache);
}
EXPORT_SYMBOL_GPL(put_iova_domain);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 92f7177..1386a7b 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -21,6 +21,14 @@ struct iova {
struct rb_node node;
unsigned long pfn_hi; /* IOMMU dish out addr hi */
unsigned long pfn_lo; /* IOMMU dish out addr lo */
+ struct list_head sibling;
+};
+
+#define MAX_CACHE_SIZE 16 /* 16 * 4k, only cache <= 64K allocation */
+#define MAX_CACHE_UNIT_SIZE (512 / 4) /* 512k total cache size */
+
+struct iova_cache {
+ struct list_head cache_lists[ilog2(MAX_CACHE_SIZE) + 1];
};
/* holds all the iova translations for a domain */
@@ -31,6 +39,7 @@ struct iova_domain {
unsigned long granule; /* pfn granularity for this domain */
unsigned long start_pfn; /* Lower limit for this domain */
unsigned long dma_32bit_pfn;
+ struct iova_cache __percpu *percpu_cache;
};
static inline unsigned long iova_size(struct iova *iova)
--
2.4.6
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH V2 2/4] iommu: free_iova doesn't need lock twice
[not found] ` <cover.1452297604.git.shli-b10kYP2dOMg@public.gmane.org>
2016-01-09 0:44 ` [PATCH V2 1/4] IOMMU: add a percpu cache for " Shaohua Li
@ 2016-01-09 0:44 ` Shaohua Li
2016-01-09 0:44 ` [PATCH V2 3/4] intel-iommu: remove find_iova in unmap path Shaohua Li
` (2 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Shaohua Li @ 2016-01-09 0:44 UTC (permalink / raw)
To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Cc: Kernel-team-b10kYP2dOMg, Joerg Roedel, David Woodhouse
cleanup the code a bit. free_iova doesn't need lock twice.
Cc: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
Cc: David Woodhouse <David.Woodhouse-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Shaohua Li <shli-b10kYP2dOMg@public.gmane.org>
---
drivers/iommu/iova.c | 62 +++++++++++++++++++++++++++++++---------------------
1 file changed, 37 insertions(+), 25 deletions(-)
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 5c86c5c..bb8a328 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -362,35 +362,17 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
}
EXPORT_SYMBOL_GPL(alloc_iova);
-/**
- * find_iova - find's an iova for a given pfn
- * @iovad: - iova domain in question.
- * @pfn: - page frame number
- * This function finds and returns an iova belonging to the
- * given doamin which matches the given pfn.
- */
-struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
+static struct iova *__find_iova(struct iova_domain *iovad, unsigned long pfn)
{
- unsigned long flags;
struct rb_node *node;
- /* Take the lock so that no other thread is manipulating the rbtree */
- spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
node = iovad->rbroot.rb_node;
while (node) {
struct iova *iova = container_of(node, struct iova, node);
/* If pfn falls within iova's range, return iova */
- if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
- spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
- /* We are not holding the lock while this iova
- * is referenced by the caller as the same thread
- * which called this function also calls __free_iova()
- * and it is by design that only one thread can possibly
- * reference a particular iova and hence no conflict.
- */
+ if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi))
return iova;
- }
if (pfn < iova->pfn_lo)
node = node->rb_left;
@@ -398,9 +380,33 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
node = node->rb_right;
}
- spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
return NULL;
}
+
+/**
+ * find_iova - find's an iova for a given pfn
+ * @iovad: - iova domain in question.
+ * @pfn: - page frame number
+ * This function finds and returns an iova belonging to the
+ * given doamin which matches the given pfn.
+ */
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
+{
+ unsigned long flags;
+ struct iova *iova;
+
+ /* Take the lock so that no other thread is manipulating the rbtree */
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ iova = __find_iova(iovad, pfn);
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ /* We are not holding the lock while this iova
+ * is referenced by the caller as the same thread
+ * which called this function also calls __free_iova()
+ * and it is by design that only one thread can possibly
+ * reference a particular iova and hence no conflict.
+ */
+ return iova;
+}
EXPORT_SYMBOL_GPL(find_iova);
/**
@@ -432,11 +438,17 @@ EXPORT_SYMBOL_GPL(__free_iova);
void
free_iova(struct iova_domain *iovad, unsigned long pfn)
{
- struct iova *iova = find_iova(iovad, pfn);
-
- if (iova)
- __free_iova(iovad, iova);
+ unsigned long flags;
+ struct iova *iova;
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ iova = __find_iova(iovad, pfn);
+ if (iova) {
+ __cached_rbnode_delete_update(iovad, iova);
+ rb_erase(&iova->node, &iovad->rbroot);
+ }
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ free_iova_mem(iova);
}
EXPORT_SYMBOL_GPL(free_iova);
--
2.4.6
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH V2 3/4] intel-iommu: remove find_iova in unmap path
[not found] ` <cover.1452297604.git.shli-b10kYP2dOMg@public.gmane.org>
2016-01-09 0:44 ` [PATCH V2 1/4] IOMMU: add a percpu cache for " Shaohua Li
2016-01-09 0:44 ` [PATCH V2 2/4] iommu: free_iova doesn't need lock twice Shaohua Li
@ 2016-01-09 0:44 ` Shaohua Li
2016-01-09 0:44 ` [PATCH V2 4/4] intel-iommu: do batch iova free Shaohua Li
2016-01-10 22:56 ` [PATCH V2 0/4]IOMMU: avoid lock contention in iova allocation Adam Morrison
4 siblings, 0 replies; 14+ messages in thread
From: Shaohua Li @ 2016-01-09 0:44 UTC (permalink / raw)
To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Cc: Kernel-team-b10kYP2dOMg, Joerg Roedel, David Woodhouse
unmap path does find_iova and follows __free_iova, which takes lock
twice. The find_iova isn't really required and so we can avoid the lock
overhead. Also next patch depends on this to reduce lock further.
Cc: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
Cc: David Woodhouse <David.Woodhouse-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Shaohua Li <shli-b10kYP2dOMg@public.gmane.org>
---
drivers/iommu/intel-iommu.c | 62 ++++++++++++++++++++++++++++-----------------
1 file changed, 39 insertions(+), 23 deletions(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ac73876..b06a901 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -81,6 +81,7 @@
#define IOVA_START_PFN (1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define IOVA_PFN_ROUNDUP(addr) ((addr + PAGE_SIZE -1) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
@@ -461,7 +462,8 @@ static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
#define HIGH_WATER_MARK 250
struct deferred_flush_tables {
int next;
- struct iova *iova[HIGH_WATER_MARK];
+ unsigned long iova_pfn[HIGH_WATER_MARK];
+ unsigned long pages[HIGH_WATER_MARK];
struct dmar_domain *domain[HIGH_WATER_MARK];
struct page *freelist[HIGH_WATER_MARK];
};
@@ -3518,20 +3520,24 @@ static void flush_unmaps(void)
DMA_TLB_GLOBAL_FLUSH);
for (j = 0; j < deferred_flush[i].next; j++) {
unsigned long mask;
- struct iova *iova = deferred_flush[i].iova[j];
+ unsigned long iova_pfn = deferred_flush[i].iova_pfn[j];
+ size_t pages = deferred_flush[i].pages[j];
struct dmar_domain *domain = deferred_flush[i].domain[j];
+ struct iova_domain *iovad = &domain->iovad;
/* On real hardware multiple invalidations are expensive */
if (cap_caching_mode(iommu->cap))
iommu_flush_iotlb_psi(iommu, domain,
- iova->pfn_lo, iova_size(iova),
+ mm_to_dma_pfn(iova_pfn),
+ mm_to_dma_pfn(pages),
!deferred_flush[i].freelist[j], 0);
else {
- mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
- iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
- (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
+ mask = ilog2(mm_to_dma_pfn(pages));
+ iommu_flush_dev_iotlb(domain,
+ (uint64_t)iova_pfn << PAGE_SHIFT, mask);
}
- __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
+
+ free_iova(iovad, iova_pfn);
if (deferred_flush[i].freelist[j])
dma_free_pagelist(deferred_flush[i].freelist[j]);
}
@@ -3550,7 +3556,8 @@ static void flush_unmaps_timeout(unsigned long data)
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}
-static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
+static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
+ size_t pages, struct page *freelist)
{
unsigned long flags;
int next, iommu_id;
@@ -3565,7 +3572,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *f
next = deferred_flush[iommu_id].next;
deferred_flush[iommu_id].domain[next] = dom;
- deferred_flush[iommu_id].iova[next] = iova;
+ deferred_flush[iommu_id].iova_pfn[next] = iova_pfn;
+ deferred_flush[iommu_id].pages[next] = pages;
deferred_flush[iommu_id].freelist[next] = freelist;
deferred_flush[iommu_id].next++;
@@ -3577,11 +3585,10 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *f
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}
-static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
+static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t pages)
{
struct dmar_domain *domain;
unsigned long start_pfn, last_pfn;
- struct iova *iova;
struct intel_iommu *iommu;
struct page *freelist;
@@ -3592,14 +3599,11 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
BUG_ON(!domain);
iommu = domain_get_iommu(domain);
+ /* intel_alloc_iova does the roundup */
+ pages = __roundup_pow_of_two(pages);
- iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
- if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
- (unsigned long long)dev_addr))
- return;
-
- start_pfn = mm_to_dma_pfn(iova->pfn_lo);
- last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
+ start_pfn = mm_to_dma_pfn(IOVA_PFN(dev_addr));
+ last_pfn = mm_to_dma_pfn(IOVA_PFN(dev_addr) + pages) - 1;
pr_debug("Device %s unmapping: pfn %lx-%lx\n",
dev_name(dev), start_pfn, last_pfn);
@@ -3610,10 +3614,10 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
iommu_flush_iotlb_psi(iommu, domain, start_pfn,
last_pfn - start_pfn + 1, !freelist, 0);
/* free iova */
- __free_iova(&domain->iovad, iova);
+ free_iova(&domain->iovad, IOVA_PFN(dev_addr));
dma_free_pagelist(freelist);
} else {
- add_unmap(domain, iova, freelist);
+ add_unmap(domain, IOVA_PFN(dev_addr), pages, freelist);
/*
* queue up the release of the unmap to save the 1/6th of the
* cpu used up by the iotlb flush operation...
@@ -3625,7 +3629,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- intel_unmap(dev, dev_addr);
+ intel_unmap(dev, dev_addr, IOVA_PFN_ROUNDUP(dev_addr + size) -
+ IOVA_PFN(dev_addr));
}
static void *intel_alloc_coherent(struct device *dev, size_t size,
@@ -3684,7 +3689,7 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
size = PAGE_ALIGN(size);
order = get_order(size);
- intel_unmap(dev, dma_handle);
+ intel_unmap(dev, dma_handle, size);
if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
__free_pages(page, order);
}
@@ -3693,7 +3698,18 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- intel_unmap(dev, sglist[0].dma_address);
+ struct scatterlist *sg;
+ size_t size = 0;
+ int i;
+
+ for_each_sg(sglist, sg, nelems, i) {
+ dma_addr_t dma_addr = sg_dma_address(sg);
+ unsigned int dma_len = sg_dma_len(sg);
+ size += IOVA_PFN_ROUNDUP(dma_addr + dma_len) -
+ IOVA_PFN(dma_addr);
+ }
+
+ intel_unmap(dev, sglist[0].dma_address, size);
}
static int intel_nontranslate_map_sg(struct device *hddev,
--
2.4.6
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH V2 4/4] intel-iommu: do batch iova free
[not found] ` <cover.1452297604.git.shli-b10kYP2dOMg@public.gmane.org>
` (2 preceding siblings ...)
2016-01-09 0:44 ` [PATCH V2 3/4] intel-iommu: remove find_iova in unmap path Shaohua Li
@ 2016-01-09 0:44 ` Shaohua Li
2016-01-10 22:56 ` [PATCH V2 0/4]IOMMU: avoid lock contention in iova allocation Adam Morrison
4 siblings, 0 replies; 14+ messages in thread
From: Shaohua Li @ 2016-01-09 0:44 UTC (permalink / raw)
To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Cc: Kernel-team-b10kYP2dOMg, Joerg Roedel, David Woodhouse
iova free can be easily batched and remove the lock contention in unmap
path.
Cc: Joerg Roedel <jroedel-l3A5Bk7waGM@public.gmane.org>
Cc: David Woodhouse <David.Woodhouse-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Shaohua Li <shli-b10kYP2dOMg@public.gmane.org>
---
drivers/iommu/intel-iommu.c | 17 ++++++++++++++++-
drivers/iommu/iova.c | 27 +++++++++++++++++++++++++++
include/linux/iova.h | 1 +
3 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index b06a901..c1e0702 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3499,9 +3499,13 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
dir, *dev->dma_mask);
}
+#define MAX_FREE_IOVA 16
static void flush_unmaps(void)
{
int i, j;
+ unsigned long free_iovas[MAX_FREE_IOVA];
+ int iova_cnt;
+ struct iova_domain *free_iovad;
timer_on = 0;
@@ -3518,6 +3522,8 @@ static void flush_unmaps(void)
if (!cap_caching_mode(iommu->cap))
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH);
+ iova_cnt = 0;
+ free_iovad = NULL;
for (j = 0; j < deferred_flush[i].next; j++) {
unsigned long mask;
unsigned long iova_pfn = deferred_flush[i].iova_pfn[j];
@@ -3537,10 +3543,19 @@ static void flush_unmaps(void)
(uint64_t)iova_pfn << PAGE_SHIFT, mask);
}
- free_iova(iovad, iova_pfn);
+ if (free_iovad && (free_iovad != iovad ||
+ iova_cnt >= MAX_FREE_IOVA)) {
+ free_iova_array(free_iovad, free_iovas, iova_cnt);
+ iova_cnt = 0;
+ }
+
+ free_iovad = iovad;
+ free_iovas[iova_cnt++] = iova_pfn;
if (deferred_flush[i].freelist[j])
dma_free_pagelist(deferred_flush[i].freelist[j]);
}
+ if (iova_cnt)
+ free_iova_array(free_iovad, free_iovas, iova_cnt);
deferred_flush[i].next = 0;
}
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index bb8a328..a4959e2 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -453,6 +453,33 @@ free_iova(struct iova_domain *iovad, unsigned long pfn)
EXPORT_SYMBOL_GPL(free_iova);
/**
+ * free_iova_array - finds and frees iovas for pfn array
+ * @iovad: - iova domain in question.
+ * @pfns: - pfn array
+ * @size - array size
+ * This function free iovas of givien pfn array
+ */
+void
+free_iova_array(struct iova_domain *iovad, unsigned long *pfns, int size)
+{
+ unsigned long flags;
+ struct iova *iova;
+ int i;
+
+ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ for (i = 0; i < size; i++) {
+ iova = __find_iova(iovad, pfns[i]);
+ if (iova) {
+ __cached_rbnode_delete_update(iovad, iova);
+ rb_erase(&iova->node, &iovad->rbroot);
+ }
+ free_iova_mem(iova);
+ }
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+}
+EXPORT_SYMBOL_GPL(free_iova_array);
+
+/**
* put_iova_domain - destroys the iova doamin
* @iovad: - iova domain in question.
* All the iova's in that domain are destroyed.
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 1386a7b..5a11304 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -83,6 +83,7 @@ void iova_cache_put(void);
struct iova *alloc_iova_mem(void);
void free_iova_mem(struct iova *iova);
void free_iova(struct iova_domain *iovad, unsigned long pfn);
+void free_iova_array(struct iova_domain *iovad, unsigned long *pfns, int size);
void __free_iova(struct iova_domain *iovad, struct iova *iova);
struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
unsigned long limit_pfn,
--
2.4.6
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH V2 0/4]IOMMU: avoid lock contention in iova allocation
[not found] ` <cover.1452297604.git.shli-b10kYP2dOMg@public.gmane.org>
` (3 preceding siblings ...)
2016-01-09 0:44 ` [PATCH V2 4/4] intel-iommu: do batch iova free Shaohua Li
@ 2016-01-10 22:56 ` Adam Morrison
[not found] ` <20160110225610.GA16778-FrESSTt7Abv7r6psnUbsSmZHpeb/A1Y/@public.gmane.org>
4 siblings, 1 reply; 14+ messages in thread
From: Adam Morrison @ 2016-01-10 22:56 UTC (permalink / raw)
To: Shaohua Li
Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
Kernel-team-b10kYP2dOMg
Hi,
> iova alloc/free causes big lock contention, which could be easily demonstrated
> with iperf workload. Previously I posted a patchset:
> http://lists.linuxfoundation.org/pipermail/iommu/2015-November/014984.html
>
> the concern is it's not generic. This is another try for the issue. This
> version implements a per-cpu iova cache for small size DMA allocation (<= 64k),
> which should be generic enough and so we can do batch allocation. iova free
> could be easily batched too. With batch alloc/free, iova lock contention
> disappears. Performance result with this patchset is nearly the same as the
> previous one in the same test.
>
> After this patchset, async_umap_flush_lock becomes the hotest lock in
> intel-iommu, but not very bad. That will be something we need work on in the
> future.
There can still be significant spinlock contention with this patchset.
For example, here's the throughput obtained when accessing 16 memcached
instances running on a 16-core Sandy Bridge with an Intel XL710 NIC.
The client machine has iommu=off and runs memslap with the default
config (64-byte keys, 1024-byte values, and 10%/90% SET/GET ops):
stock (4.4.0-rc5) iommu=off:
1,088,996 memcached transactions/sec (=100%, median of 10 runs).
this patch, iommu=on:
313,161 memcached transactions/sec (=29%).
perf: 21.87% 0.57% memcached [kernel.kallsyms] [k] _raw_spin_lock_irqsave
|
---_raw_spin_lock_irqsave
|
|--67.84%-- intel_unmap (async_umap_flush_lock)
|--17.54%-- alloc_iova
|--12.85%-- free_iova_array
For reference, the patchset I posted two weeks ago gets almost the same
throughput as with iommu=off:
http://lists.linuxfoundation.org/pipermail/iommu/2015-December/015271.html:
1,067,586 memcached transactions/sec (=98%).
perf: 0.75% 0.75% memcached [kernel.kallsyms] [k] _raw_spin_lock_irqsave]
^ permalink raw reply [flat|nested] 14+ messages in thread