* [patch 1/2] xfs: remove old vmap cache
@ 2008-10-21 8:25 Nick Piggin
2008-10-21 8:27 ` [patch 2/2] xfs: use scalable vmap API Nick Piggin
2008-10-21 12:09 ` [patch 1/2] xfs: remove old vmap cache Christoph Hellwig
0 siblings, 2 replies; 17+ messages in thread
From: Nick Piggin @ 2008-10-21 8:25 UTC (permalink / raw)
To: xfs
XFS's vmap batching simply defers a number (up to 64) of vunmaps, and keeps
track of them in a list. To purge the batch, it just goes through the list and
calls vunamp on each one. This is pretty poor: a global TLB flush is generally
still performed on each vunmap, with the most expensive parts of the operation
being the broadcast IPIs and locking involved in the SMP callouts, and the
locking involved in the vmap management -- none of these are avoided by just
batching up the calls. I'm actually surprised it ever made much difference.
(Now that the lazy vmap allocator is upstream, this description is not quite
right, but the vunmap batching still doesn't seem to do much)
Rip all this logic out of XFS completely. I will improve vmap performance
and scalability directly in subsequent patch.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
Index: linux-2.6/fs/xfs/linux-2.6/xfs_buf.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_buf.c
+++ linux-2.6/fs/xfs/linux-2.6/xfs_buf.c
@@ -166,75 +166,6 @@ test_page_region(
}
/*
- * Mapping of multi-page buffers into contiguous virtual space
- */
-
-typedef struct a_list {
- void *vm_addr;
- struct a_list *next;
-} a_list_t;
-
-static a_list_t *as_free_head;
-static int as_list_len;
-static DEFINE_SPINLOCK(as_lock);
-
-/*
- * Try to batch vunmaps because they are costly.
- */
-STATIC void
-free_address(
- void *addr)
-{
- a_list_t *aentry;
-
-#ifdef CONFIG_XEN
- /*
- * Xen needs to be able to make sure it can get an exclusive
- * RO mapping of pages it wants to turn into a pagetable. If
- * a newly allocated page is also still being vmap()ed by xfs,
- * it will cause pagetable construction to fail. This is a
- * quick workaround to always eagerly unmap pages so that Xen
- * is happy.
- */
- vunmap(addr);
- return;
-#endif
-
- aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
- if (likely(aentry)) {
- spin_lock(&as_lock);
- aentry->next = as_free_head;
- aentry->vm_addr = addr;
- as_free_head = aentry;
- as_list_len++;
- spin_unlock(&as_lock);
- } else {
- vunmap(addr);
- }
-}
-
-STATIC void
-purge_addresses(void)
-{
- a_list_t *aentry, *old;
-
- if (as_free_head == NULL)
- return;
-
- spin_lock(&as_lock);
- aentry = as_free_head;
- as_free_head = NULL;
- as_list_len = 0;
- spin_unlock(&as_lock);
-
- while ((old = aentry) != NULL) {
- vunmap(aentry->vm_addr);
- aentry = aentry->next;
- kfree(old);
- }
-}
-
-/*
* Internal xfs_buf_t object manipulation
*/
@@ -333,7 +264,7 @@ xfs_buf_free(
uint i;
if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
- free_address(bp->b_addr - bp->b_offset);
+ vunmap(bp->b_addr - bp->b_offset);
for (i = 0; i < bp->b_page_count; i++) {
struct page *page = bp->b_pages[i];
@@ -455,8 +386,6 @@ _xfs_buf_map_pages(
bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
bp->b_flags |= XBF_MAPPED;
} else if (flags & XBF_MAPPED) {
- if (as_list_len > 64)
- purge_addresses();
bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
VM_MAP, PAGE_KERNEL);
if (unlikely(bp->b_addr == NULL))
@@ -1732,8 +1661,6 @@ xfsbufd(
count++;
}
- if (as_list_len > 0)
- purge_addresses();
if (count)
blk_run_address_space(target->bt_mapping);
^ permalink raw reply [flat|nested] 17+ messages in thread
* [patch 2/2] xfs: use scalable vmap API
2008-10-21 8:25 [patch 1/2] xfs: remove old vmap cache Nick Piggin
@ 2008-10-21 8:27 ` Nick Piggin
2008-10-21 12:09 ` Christoph Hellwig
2008-10-21 12:09 ` [patch 1/2] xfs: remove old vmap cache Christoph Hellwig
1 sibling, 1 reply; 17+ messages in thread
From: Nick Piggin @ 2008-10-21 8:27 UTC (permalink / raw)
To: xfs
Implement XFS's large buffer support with the new vmap APIs. See the vmap
rewrite (db64fe02) for some numbers. The biggest improvement that comes from
using the new APIs is avoiding the global KVA allocation lock on every call.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
Index: linux-2.6/fs/xfs/linux-2.6/xfs_buf.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_buf.c
+++ linux-2.6/fs/xfs/linux-2.6/xfs_buf.c
@@ -264,7 +264,7 @@ xfs_buf_free(
uint i;
if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
- vunmap(bp->b_addr - bp->b_offset);
+ vm_unmap_ram(bp->b_addr - bp->b_offset, bp->b_page_count);
for (i = 0; i < bp->b_page_count; i++) {
struct page *page = bp->b_pages[i];
@@ -386,8 +386,8 @@ _xfs_buf_map_pages(
bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
bp->b_flags |= XBF_MAPPED;
} else if (flags & XBF_MAPPED) {
- bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
- VM_MAP, PAGE_KERNEL);
+ bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
+ -1, PAGE_KERNEL);
if (unlikely(bp->b_addr == NULL))
return -ENOMEM;
bp->b_addr += bp->b_offset;
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 1/2] xfs: remove old vmap cache
2008-10-21 8:25 [patch 1/2] xfs: remove old vmap cache Nick Piggin
2008-10-21 8:27 ` [patch 2/2] xfs: use scalable vmap API Nick Piggin
@ 2008-10-21 12:09 ` Christoph Hellwig
1 sibling, 0 replies; 17+ messages in thread
From: Christoph Hellwig @ 2008-10-21 12:09 UTC (permalink / raw)
To: Nick Piggin; +Cc: xfs
Looks good.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2008-10-21 8:27 ` [patch 2/2] xfs: use scalable vmap API Nick Piggin
@ 2008-10-21 12:09 ` Christoph Hellwig
2008-10-22 9:30 ` Nick Piggin
0 siblings, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2008-10-21 12:09 UTC (permalink / raw)
To: Nick Piggin; +Cc: xfs
On Tue, Oct 21, 2008 at 10:27:35AM +0200, Nick Piggin wrote:
> + bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
> + -1, PAGE_KERNEL);
What does the -1 stand for?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2008-10-21 12:09 ` Christoph Hellwig
@ 2008-10-22 9:30 ` Nick Piggin
2010-01-19 12:15 ` Christoph Hellwig
0 siblings, 1 reply; 17+ messages in thread
From: Nick Piggin @ 2008-10-22 9:30 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Tue, Oct 21, 2008 at 08:09:32AM -0400, Christoph Hellwig wrote:
> On Tue, Oct 21, 2008 at 10:27:35AM +0200, Nick Piggin wrote:
> > + bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
> > + -1, PAGE_KERNEL);
>
> What does the -1 stand for?
Default node.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2008-10-22 9:30 ` Nick Piggin
@ 2010-01-19 12:15 ` Christoph Hellwig
2010-01-25 7:54 ` Nick Piggin
0 siblings, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2010-01-19 12:15 UTC (permalink / raw)
To: Nick Piggin; +Cc: Christoph Hellwig, linux-mm, xfs
Hi Nick,
I've looked into retesting and re-enabling the swithc to your
scalabale vmap API (original commit 95f8e302c04c0b0c6de35ab399a5551605eeb006).
The good thing is that I can't reproduce the original regressions in
xfstests I've seen. The bad news is that starting from the second
consequitive xfstests run we're not able to vmalloc the log buffers
anymore. It seems the use of this API introduces some leak of vmalloc
space. Any idea how to debug this further?
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-19 12:15 ` Christoph Hellwig
@ 2010-01-25 7:54 ` Nick Piggin
2010-01-25 8:17 ` Christoph Hellwig
2010-01-25 8:30 ` Nick Piggin
0 siblings, 2 replies; 17+ messages in thread
From: Nick Piggin @ 2010-01-25 7:54 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-mm, xfs
On Tue, Jan 19, 2010 at 07:15:05AM -0500, Christoph Hellwig wrote:
> Hi Nick,
>
> I've looked into retesting and re-enabling the swithc to your
> scalabale vmap API (original commit 95f8e302c04c0b0c6de35ab399a5551605eeb006).
>
> The good thing is that I can't reproduce the original regressions in
> xfstests I've seen. The bad news is that starting from the second
> consequitive xfstests run we're not able to vmalloc the log buffers
> anymore. It seems the use of this API introduces some leak of vmalloc
> space. Any idea how to debug this further?
Hi Christoph,
OK, that's worrying. It's silly have all that vmap layer work in the
tree and no users, not surprised to have a bug there.
Is this on a 32-bit system with small vmalloc area?
Basically in the scalable API implementation, we allocate per-CPU vmap
chunks (struct vmap_block) from the normal global allocator, and then
subsequently do allocations from those chunks using the simple bitmap
allocator.
Now there is more room for fragmentation with this approach, and a few
problems that I should really fix: firstly, a chunk with say a 1 page
hole left in it that is never used will never get freed. So it will be
good to free those chunks. Secondly, one CPU should be able to steal
from others if it can't find more memory.
So if you have small vmalloc space, it could be just these issues making
vmalloc consumption worse. Otherwise yes there could be a real leak there
unrelated to fragmentation.
When the vmap allocation fails, it would be good to basically see the
alloc_map and dirty_map for each of the vmap_blocks. This is going to be
a lot of information. Basically for all blocks with
free+dirty == VMAP_BBMAP_BITS are ones that could be released and you
could try the alloc again.
Thanks,
Nick
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-25 7:54 ` Nick Piggin
@ 2010-01-25 8:17 ` Christoph Hellwig
2010-01-25 8:33 ` Nick Piggin
2010-01-25 8:30 ` Nick Piggin
1 sibling, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2010-01-25 8:17 UTC (permalink / raw)
To: Nick Piggin; +Cc: Christoph Hellwig, linux-mm, xfs
On Mon, Jan 25, 2010 at 06:54:45PM +1100, Nick Piggin wrote:
> Is this on a 32-bit system with small vmalloc area?
Yes.
> When the vmap allocation fails, it would be good to basically see the
> alloc_map and dirty_map for each of the vmap_blocks. This is going to be
> a lot of information. Basically for all blocks with
> free+dirty == VMAP_BBMAP_BITS are ones that could be released and you
> could try the alloc again.
Any easy way to get them? Sorry, not uptodate on your new vmalloc
implementation anymore.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-25 7:54 ` Nick Piggin
2010-01-25 8:17 ` Christoph Hellwig
@ 2010-01-25 8:30 ` Nick Piggin
1 sibling, 0 replies; 17+ messages in thread
From: Nick Piggin @ 2010-01-25 8:30 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-mm, xfs
On Mon, Jan 25, 2010 at 06:54:45PM +1100, Nick Piggin wrote:
> When the vmap allocation fails, it would be good to basically see the
> alloc_map and dirty_map for each of the vmap_blocks. This is going to be
> a lot of information. Basically for all blocks with
> free+dirty == VMAP_BBMAP_BITS are ones that could be released and you
> could try the alloc again.
Something like this (untested) is what I'm thinking of. I'll try the XFS
patch again and get something testable over here.
There are RCU bugs in the vmap block list I noticed too which I will
split out and submit seperately.
--
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void
static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+/* for per-CPU blocks */
+static void purge_fragmented_blocks(void);
+
/*
* Purges all lazily-freed vmap areas.
*
@@ -539,6 +542,8 @@ static void __purge_vmap_area_lazy(unsig
} else
spin_lock(&purge_lock);
+ purge_fragmented_blocks();
+
rcu_read_lock();
list_for_each_entry_rcu(va, &vmap_area_list, list) {
if (va->flags & VM_LAZY_FREE) {
@@ -669,8 +674,6 @@ static bool vmap_initialized __read_most
struct vmap_block_queue {
spinlock_t lock;
struct list_head free;
- struct list_head dirty;
- unsigned int nr_dirty;
};
struct vmap_block {
@@ -680,10 +683,9 @@ struct vmap_block {
unsigned long free, dirty;
DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
- union {
- struct list_head free_list;
- struct rcu_head rcu_head;
- };
+ struct list_head free_list;
+ struct rcu_head rcu_head;
+ struct list_head purge;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -759,7 +761,7 @@ static struct vmap_block *new_vmap_block
vbq = &get_cpu_var(vmap_block_queue);
vb->vbq = vbq;
spin_lock(&vbq->lock);
- list_add(&vb->free_list, &vbq->free);
+ list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
put_cpu_var(vmap_block_queue);
@@ -808,23 +810,27 @@ again:
int i;
spin_lock(&vb->lock);
+ if (vb->free < 1UL << order)
+ goto next;
+
i = bitmap_find_free_region(vb->alloc_map,
VMAP_BBMAP_BITS, order);
- if (i >= 0) {
- addr = vb->va->va_start + (i << PAGE_SHIFT);
- BUG_ON(addr_to_vb_idx(addr) !=
- addr_to_vb_idx(vb->va->va_start));
- vb->free -= 1UL << order;
- if (vb->free == 0) {
- spin_lock(&vbq->lock);
- list_del_init(&vb->free_list);
- spin_unlock(&vbq->lock);
- }
- spin_unlock(&vb->lock);
- break;
+ if (i < 0)
+ goto next;
+ addr = vb->va->va_start + (i << PAGE_SHIFT);
+ BUG_ON(addr_to_vb_idx(addr) !=
+ addr_to_vb_idx(vb->va->va_start));
+ vb->free -= 1UL << order;
+ if (vb->free == 0) {
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
}
spin_unlock(&vb->lock);
+ break;
+next:
+ spin_unlock(&vb->lock);
}
put_cpu_var(vmap_block_queue);
rcu_read_unlock();
@@ -873,6 +879,43 @@ static void vb_free(const void *addr, un
spin_unlock(&vb->lock);
}
+static void purge_fragmented_blocks(void)
+{
+ LIST_HEAD(purge);
+ int cpu;
+ struct vmap_block *vb;
+ struct vmap_block *n_vb;
+
+ for_each_possible_cpu(cpu) {
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+
+ if (vb->free + vb->dirty != VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)
+ continue;
+
+ spin_lock(&vb->lock);
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
+ vb->free = 0; /* prevent further allocs after releasing lock */
+ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ spin_unlock(&vb->lock);
+ list_add_tail(&vb->purge, &purge);
+ } else
+ spin_unlock(&vb->lock);
+ }
+ rcu_read_unlock();
+ }
+
+ list_for_each_entry_safe(vb, n_vb, &purge, purge) {
+ list_del(&vb->purge);
+ free_vmap_block(vb);
+ }
+}
+
/**
* vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
*
@@ -1035,8 +1078,6 @@ void __init vmalloc_init(void)
vbq = &per_cpu(vmap_block_queue, i);
spin_lock_init(&vbq->lock);
INIT_LIST_HEAD(&vbq->free);
- INIT_LIST_HEAD(&vbq->dirty);
- vbq->nr_dirty = 0;
}
/* Import existing vmlist entries. */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-25 8:17 ` Christoph Hellwig
@ 2010-01-25 8:33 ` Nick Piggin
2010-01-25 12:37 ` Nick Piggin
0 siblings, 1 reply; 17+ messages in thread
From: Nick Piggin @ 2010-01-25 8:33 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-mm, xfs
On Mon, Jan 25, 2010 at 03:17:50AM -0500, Christoph Hellwig wrote:
> On Mon, Jan 25, 2010 at 06:54:45PM +1100, Nick Piggin wrote:
> > Is this on a 32-bit system with small vmalloc area?
>
> Yes.
OK, I would say it could easily be just due to fragmentation then.
> > When the vmap allocation fails, it would be good to basically see the
> > alloc_map and dirty_map for each of the vmap_blocks. This is going to be
> > a lot of information. Basically for all blocks with
> > free+dirty == VMAP_BBMAP_BITS are ones that could be released and you
> > could try the alloc again.
>
> Any easy way to get them? Sorry, not uptodate on your new vmalloc
> implementation anymore.
Let me try writing a few (tested) patches here first that I can send you.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-25 8:33 ` Nick Piggin
@ 2010-01-25 12:37 ` Nick Piggin
2010-01-25 12:39 ` Christoph Hellwig
2010-01-25 21:34 ` Christoph Hellwig
0 siblings, 2 replies; 17+ messages in thread
From: Nick Piggin @ 2010-01-25 12:37 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-mm, xfs
On Mon, Jan 25, 2010 at 07:33:09PM +1100, Nick Piggin wrote:
> > Any easy way to get them? Sorry, not uptodate on your new vmalloc
> > implementation anymore.
>
> Let me try writing a few (tested) patches here first that I can send you.
Well is it easy to reproduce the vmap failure? Here is a better tested
patch if you can try it. It fixes a couple of bugs and does some purging
of fragmented blocks.
If it does not help, can you tell me how many CPUs in your system?
Thanks,
Nick
--
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c 2010-01-25 23:35:03.000000000 +1100
+++ linux-2.6/mm/vmalloc.c 2010-01-25 23:35:15.000000000 +1100
@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void
static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+/* for per-CPU blocks */
+static void purge_fragmented_blocks_allcpus(void);
+
/*
* Purges all lazily-freed vmap areas.
*
@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsig
} else
spin_lock(&purge_lock);
+ if (sync)
+ purge_fragmented_blocks_allcpus();
+
rcu_read_lock();
list_for_each_entry_rcu(va, &vmap_area_list, list) {
if (va->flags & VM_LAZY_FREE) {
@@ -667,8 +673,6 @@ static bool vmap_initialized __read_most
struct vmap_block_queue {
spinlock_t lock;
struct list_head free;
- struct list_head dirty;
- unsigned int nr_dirty;
};
struct vmap_block {
@@ -678,10 +682,9 @@ struct vmap_block {
unsigned long free, dirty;
DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
- union {
- struct list_head free_list;
- struct rcu_head rcu_head;
- };
+ struct list_head free_list;
+ struct rcu_head rcu_head;
+ struct list_head purge;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -757,7 +760,7 @@ static struct vmap_block *new_vmap_block
vbq = &get_cpu_var(vmap_block_queue);
vb->vbq = vbq;
spin_lock(&vbq->lock);
- list_add(&vb->free_list, &vbq->free);
+ list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
put_cpu_var(vmap_block_queue);
@@ -776,8 +779,6 @@ static void free_vmap_block(struct vmap_
struct vmap_block *tmp;
unsigned long vb_idx;
- BUG_ON(!list_empty(&vb->free_list));
-
vb_idx = addr_to_vb_idx(vb->va->va_start);
spin_lock(&vmap_block_tree_lock);
tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
@@ -788,12 +789,61 @@ static void free_vmap_block(struct vmap_
call_rcu(&vb->rcu_head, rcu_free_vb);
}
+static void purge_fragmented_blocks(int cpu)
+{
+ LIST_HEAD(purge);
+ struct vmap_block *vb;
+ struct vmap_block *n_vb;
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+
+ if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
+ continue;
+
+ spin_lock(&vb->lock);
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
+ vb->free = 0; /* prevent further allocs after releasing lock */
+ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
+ bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
+ bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ spin_unlock(&vb->lock);
+ list_add_tail(&vb->purge, &purge);
+ } else
+ spin_unlock(&vb->lock);
+ }
+ rcu_read_unlock();
+
+ list_for_each_entry_safe(vb, n_vb, &purge, purge) {
+ list_del(&vb->purge);
+ free_vmap_block(vb);
+ }
+}
+
+static void purge_fragmented_blocks_thiscpu(void)
+{
+ purge_fragmented_blocks(smp_processor_id());
+}
+
+static void purge_fragmented_blocks_allcpus(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ purge_fragmented_blocks(cpu);
+}
+
static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
{
struct vmap_block_queue *vbq;
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
+ int purge = 0;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -806,24 +856,38 @@ again:
int i;
spin_lock(&vb->lock);
+ if (vb->free < 1UL << order)
+ goto next;
+
i = bitmap_find_free_region(vb->alloc_map,
VMAP_BBMAP_BITS, order);
- if (i >= 0) {
- addr = vb->va->va_start + (i << PAGE_SHIFT);
- BUG_ON(addr_to_vb_idx(addr) !=
- addr_to_vb_idx(vb->va->va_start));
- vb->free -= 1UL << order;
- if (vb->free == 0) {
- spin_lock(&vbq->lock);
- list_del_init(&vb->free_list);
- spin_unlock(&vbq->lock);
+ if (i < 0) {
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
+ /* fragmented and no outstanding allocations */
+ BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
+ purge = 1;
}
- spin_unlock(&vb->lock);
- break;
+ goto next;
}
+ addr = vb->va->va_start + (i << PAGE_SHIFT);
+ BUG_ON(addr_to_vb_idx(addr) !=
+ addr_to_vb_idx(vb->va->va_start));
+ vb->free -= 1UL << order;
+ if (vb->free == 0) {
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ }
+ spin_unlock(&vb->lock);
+ break;
+next:
spin_unlock(&vb->lock);
}
+
+ if (purge)
+ purge_fragmented_blocks_thiscpu();
+
put_cpu_var(vmap_block_queue);
rcu_read_unlock();
@@ -860,11 +924,11 @@ static void vb_free(const void *addr, un
BUG_ON(!vb);
spin_lock(&vb->lock);
- bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+ BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
vb->dirty += 1UL << order;
if (vb->dirty == VMAP_BBMAP_BITS) {
- BUG_ON(vb->free || !list_empty(&vb->free_list));
+ BUG_ON(vb->free);
spin_unlock(&vb->lock);
free_vmap_block(vb);
} else
@@ -1033,8 +1097,6 @@ void __init vmalloc_init(void)
vbq = &per_cpu(vmap_block_queue, i);
spin_lock_init(&vbq->lock);
INIT_LIST_HEAD(&vbq->free);
- INIT_LIST_HEAD(&vbq->dirty);
- vbq->nr_dirty = 0;
}
/* Import existing vmlist entries. */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-25 12:37 ` Nick Piggin
@ 2010-01-25 12:39 ` Christoph Hellwig
2010-01-25 21:34 ` Christoph Hellwig
1 sibling, 0 replies; 17+ messages in thread
From: Christoph Hellwig @ 2010-01-25 12:39 UTC (permalink / raw)
To: Nick Piggin; +Cc: Christoph Hellwig, linux-mm, xfs
On Mon, Jan 25, 2010 at 11:37:46PM +1100, Nick Piggin wrote:
> On Mon, Jan 25, 2010 at 07:33:09PM +1100, Nick Piggin wrote:
> > > Any easy way to get them? Sorry, not uptodate on your new vmalloc
> > > implementation anymore.
> >
> > Let me try writing a few (tested) patches here first that I can send you.
>
> Well is it easy to reproduce the vmap failure? Here is a better tested
> patch if you can try it. It fixes a couple of bugs and does some purging
> of fragmented blocks.
>
> If it does not help, can you tell me how many CPUs in your system?
The simplest one to reproduce it is a 1 cpu kvm virtual machine. Will
give your patch a try ASAP - while it's easy to reproduce it takes some
time as I appears only when doing a second xfstests run after a first
finished fine, which makes it look like a leak to me.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-25 12:37 ` Nick Piggin
2010-01-25 12:39 ` Christoph Hellwig
@ 2010-01-25 21:34 ` Christoph Hellwig
2010-01-27 8:38 ` Nick Piggin
1 sibling, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2010-01-25 21:34 UTC (permalink / raw)
To: Nick Piggin; +Cc: Christoph Hellwig, linux-mm, xfs
On Mon, Jan 25, 2010 at 11:37:46PM +1100, Nick Piggin wrote:
> On Mon, Jan 25, 2010 at 07:33:09PM +1100, Nick Piggin wrote:
> > > Any easy way to get them? Sorry, not uptodate on your new vmalloc
> > > implementation anymore.
> >
> > Let me try writing a few (tested) patches here first that I can send you.
>
> Well is it easy to reproduce the vmap failure? Here is a better tested
> patch if you can try it. It fixes a couple of bugs and does some purging
> of fragmented blocks.
So far I've not run out of vmalloc space yet with quite a few xfstests
iterations and not encountered any other problems either.
Thanks for looking into this!
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-25 21:34 ` Christoph Hellwig
@ 2010-01-27 8:38 ` Nick Piggin
2010-02-01 11:01 ` Christoph Hellwig
0 siblings, 1 reply; 17+ messages in thread
From: Nick Piggin @ 2010-01-27 8:38 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-mm, xfs
On Mon, Jan 25, 2010 at 04:34:03PM -0500, Christoph Hellwig wrote:
> On Mon, Jan 25, 2010 at 11:37:46PM +1100, Nick Piggin wrote:
> > On Mon, Jan 25, 2010 at 07:33:09PM +1100, Nick Piggin wrote:
> > > > Any easy way to get them? Sorry, not uptodate on your new vmalloc
> > > > implementation anymore.
> > >
> > > Let me try writing a few (tested) patches here first that I can send you.
> >
> > Well is it easy to reproduce the vmap failure? Here is a better tested
> > patch if you can try it. It fixes a couple of bugs and does some purging
> > of fragmented blocks.
>
> So far I've not run out of vmalloc space yet with quite a few xfstests
> iterations and not encountered any other problems either.
>
> Thanks for looking into this!
OK thanks for testing. I'll send it upstream if you haven't had any
problems so far.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-01-27 8:38 ` Nick Piggin
@ 2010-02-01 11:01 ` Christoph Hellwig
2010-02-01 11:28 ` Nick Piggin
0 siblings, 1 reply; 17+ messages in thread
From: Christoph Hellwig @ 2010-02-01 11:01 UTC (permalink / raw)
To: Nick Piggin; +Cc: Christoph Hellwig, linux-mm, xfs
On Wed, Jan 27, 2010 at 07:38:19PM +1100, Nick Piggin wrote:
> > So far I've not run out of vmalloc space yet with quite a few xfstests
> > iterations and not encountered any other problems either.
> >
> > Thanks for looking into this!
>
> OK thanks for testing. I'll send it upstream if you haven't had any
> problems so far.
Still working fine, so please send it upstream ASAP. That'll make
re-eabling the scalable vmap API in XFS much more easier for 2.6.34.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [patch 2/2] xfs: use scalable vmap API
2010-02-01 11:01 ` Christoph Hellwig
@ 2010-02-01 11:28 ` Nick Piggin
0 siblings, 0 replies; 17+ messages in thread
From: Nick Piggin @ 2010-02-01 11:28 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-mm, xfs
On Mon, Feb 01, 2010 at 06:01:54AM -0500, Christoph Hellwig wrote:
> On Wed, Jan 27, 2010 at 07:38:19PM +1100, Nick Piggin wrote:
> > > So far I've not run out of vmalloc space yet with quite a few xfstests
> > > iterations and not encountered any other problems either.
> > >
> > > Thanks for looking into this!
> >
> > OK thanks for testing. I'll send it upstream if you haven't had any
> > problems so far.
>
> Still working fine, so please send it upstream ASAP. That'll make
> re-eabling the scalable vmap API in XFS much more easier for 2.6.34.
Done. Thanks for testing this.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 1/2] xfs: remove old vmap cache
@ 2010-03-16 18:55 Alex Elder
0 siblings, 0 replies; 17+ messages in thread
From: Alex Elder @ 2010-03-16 18:55 UTC (permalink / raw)
To: XFS Mailing List; +Cc: hch
Re-apply a commit that had been reverted due to regressions
that have since been fixed.
Original commit: d2859751cd0bf586941ffa7308635a293f943c17
Author: Nick Piggin <npiggin@suse.de>
Date: Tue, 6 Jan 2009 14:40:44 +1100
XFS's vmap batching simply defers a number (up to 64) of vunmaps,
and keeps track of them in a list. To purge the batch, it just goes
through the list and calls vunamp on each one. This is pretty poor:
a global TLB flush is generally still performed on each vunmap, with
the most expensive parts of the operation being the broadcast IPIs
and locking involved in the SMP callouts, and the locking involved
in the vmap management -- none of these are avoided by just batching
up the calls. I'm actually surprised it ever made much difference.
(Now that the lazy vmap allocator is upstream, this description is
not quite right, but the vunmap batching still doesn't seem to do
much).
Rip all this logic out of XFS completely. I will improve vmap
performance and scalability directly in subsequent patch.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
The only change I made was to use the "new" xfs_buf_is_vmapped()
function in a place it had been open-coded in the original.
Modified-by: Alex Elder <aelder@sgi.com>
---
fs/xfs/linux-2.6/xfs_buf.c | 76 ---------------------------------------------
1 file changed, 1 insertion(+), 75 deletions(-)
Index: b/fs/xfs/linux-2.6/xfs_buf.c
===================================================================
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -168,75 +168,6 @@ test_page_region(
}
/*
- * Mapping of multi-page buffers into contiguous virtual space
- */
-
-typedef struct a_list {
- void *vm_addr;
- struct a_list *next;
-} a_list_t;
-
-static a_list_t *as_free_head;
-static int as_list_len;
-static DEFINE_SPINLOCK(as_lock);
-
-/*
- * Try to batch vunmaps because they are costly.
- */
-STATIC void
-free_address(
- void *addr)
-{
- a_list_t *aentry;
-
-#ifdef CONFIG_XEN
- /*
- * Xen needs to be able to make sure it can get an exclusive
- * RO mapping of pages it wants to turn into a pagetable. If
- * a newly allocated page is also still being vmap()ed by xfs,
- * it will cause pagetable construction to fail. This is a
- * quick workaround to always eagerly unmap pages so that Xen
- * is happy.
- */
- vunmap(addr);
- return;
-#endif
-
- aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
- if (likely(aentry)) {
- spin_lock(&as_lock);
- aentry->next = as_free_head;
- aentry->vm_addr = addr;
- as_free_head = aentry;
- as_list_len++;
- spin_unlock(&as_lock);
- } else {
- vunmap(addr);
- }
-}
-
-STATIC void
-purge_addresses(void)
-{
- a_list_t *aentry, *old;
-
- if (as_free_head == NULL)
- return;
-
- spin_lock(&as_lock);
- aentry = as_free_head;
- as_free_head = NULL;
- as_list_len = 0;
- spin_unlock(&as_lock);
-
- while ((old = aentry) != NULL) {
- vunmap(aentry->vm_addr);
- aentry = aentry->next;
- kfree(old);
- }
-}
-
-/*
* Internal xfs_buf_t object manipulation
*/
@@ -337,7 +268,7 @@ xfs_buf_free(
uint i;
if (xfs_buf_is_vmapped(bp))
- free_address(bp->b_addr - bp->b_offset);
+ vunmap(bp->b_addr - bp->b_offset);
for (i = 0; i < bp->b_page_count; i++) {
struct page *page = bp->b_pages[i];
@@ -457,8 +388,6 @@ _xfs_buf_map_pages(
bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
bp->b_flags |= XBF_MAPPED;
} else if (flags & XBF_MAPPED) {
- if (as_list_len > 64)
- purge_addresses();
bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
VM_MAP, PAGE_KERNEL);
if (unlikely(bp->b_addr == NULL))
@@ -1955,9 +1884,6 @@ xfsbufd(
xfs_buf_iostrategy(bp);
count++;
}
-
- if (as_list_len > 0)
- purge_addresses();
if (count)
blk_run_address_space(target->bt_mapping);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2010-03-16 18:54 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-21 8:25 [patch 1/2] xfs: remove old vmap cache Nick Piggin
2008-10-21 8:27 ` [patch 2/2] xfs: use scalable vmap API Nick Piggin
2008-10-21 12:09 ` Christoph Hellwig
2008-10-22 9:30 ` Nick Piggin
2010-01-19 12:15 ` Christoph Hellwig
2010-01-25 7:54 ` Nick Piggin
2010-01-25 8:17 ` Christoph Hellwig
2010-01-25 8:33 ` Nick Piggin
2010-01-25 12:37 ` Nick Piggin
2010-01-25 12:39 ` Christoph Hellwig
2010-01-25 21:34 ` Christoph Hellwig
2010-01-27 8:38 ` Nick Piggin
2010-02-01 11:01 ` Christoph Hellwig
2010-02-01 11:28 ` Nick Piggin
2010-01-25 8:30 ` Nick Piggin
2008-10-21 12:09 ` [patch 1/2] xfs: remove old vmap cache Christoph Hellwig
-- strict thread matches above, loose matches on Subject: below --
2010-03-16 18:55 [PATCH " Alex Elder
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox