From: Nick Piggin <npiggin@suse.de>
To: Christoph Hellwig <hch@infradead.org>
Cc: linux-mm@kvack.org, xfs@oss.sgi.com
Subject: Re: [patch 2/2] xfs: use scalable vmap API
Date: Mon, 25 Jan 2010 23:37:46 +1100 [thread overview]
Message-ID: <20100125123746.GA24406@laptop> (raw)
In-Reply-To: <20100125083309.GF19664@laptop>
On Mon, Jan 25, 2010 at 07:33:09PM +1100, Nick Piggin wrote:
> > Any easy way to get them? Sorry, not uptodate on your new vmalloc
> > implementation anymore.
>
> Let me try writing a few (tested) patches here first that I can send you.
Well is it easy to reproduce the vmap failure? Here is a better tested
patch if you can try it. It fixes a couple of bugs and does some purging
of fragmented blocks.
If it does not help, can you tell me how many CPUs in your system?
Thanks,
Nick
--
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c 2010-01-25 23:35:03.000000000 +1100
+++ linux-2.6/mm/vmalloc.c 2010-01-25 23:35:15.000000000 +1100
@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void
static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+/* for per-CPU blocks */
+static void purge_fragmented_blocks_allcpus(void);
+
/*
* Purges all lazily-freed vmap areas.
*
@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsig
} else
spin_lock(&purge_lock);
+ if (sync)
+ purge_fragmented_blocks_allcpus();
+
rcu_read_lock();
list_for_each_entry_rcu(va, &vmap_area_list, list) {
if (va->flags & VM_LAZY_FREE) {
@@ -667,8 +673,6 @@ static bool vmap_initialized __read_most
struct vmap_block_queue {
spinlock_t lock;
struct list_head free;
- struct list_head dirty;
- unsigned int nr_dirty;
};
struct vmap_block {
@@ -678,10 +682,9 @@ struct vmap_block {
unsigned long free, dirty;
DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
- union {
- struct list_head free_list;
- struct rcu_head rcu_head;
- };
+ struct list_head free_list;
+ struct rcu_head rcu_head;
+ struct list_head purge;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -757,7 +760,7 @@ static struct vmap_block *new_vmap_block
vbq = &get_cpu_var(vmap_block_queue);
vb->vbq = vbq;
spin_lock(&vbq->lock);
- list_add(&vb->free_list, &vbq->free);
+ list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
put_cpu_var(vmap_block_queue);
@@ -776,8 +779,6 @@ static void free_vmap_block(struct vmap_
struct vmap_block *tmp;
unsigned long vb_idx;
- BUG_ON(!list_empty(&vb->free_list));
-
vb_idx = addr_to_vb_idx(vb->va->va_start);
spin_lock(&vmap_block_tree_lock);
tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
@@ -788,12 +789,61 @@ static void free_vmap_block(struct vmap_
call_rcu(&vb->rcu_head, rcu_free_vb);
}
+static void purge_fragmented_blocks(int cpu)
+{
+ LIST_HEAD(purge);
+ struct vmap_block *vb;
+ struct vmap_block *n_vb;
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+
+ if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
+ continue;
+
+ spin_lock(&vb->lock);
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
+ vb->free = 0; /* prevent further allocs after releasing lock */
+ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
+ bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
+ bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ spin_unlock(&vb->lock);
+ list_add_tail(&vb->purge, &purge);
+ } else
+ spin_unlock(&vb->lock);
+ }
+ rcu_read_unlock();
+
+ list_for_each_entry_safe(vb, n_vb, &purge, purge) {
+ list_del(&vb->purge);
+ free_vmap_block(vb);
+ }
+}
+
+static void purge_fragmented_blocks_thiscpu(void)
+{
+ purge_fragmented_blocks(smp_processor_id());
+}
+
+static void purge_fragmented_blocks_allcpus(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ purge_fragmented_blocks(cpu);
+}
+
static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
{
struct vmap_block_queue *vbq;
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
+ int purge = 0;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -806,24 +856,38 @@ again:
int i;
spin_lock(&vb->lock);
+ if (vb->free < 1UL << order)
+ goto next;
+
i = bitmap_find_free_region(vb->alloc_map,
VMAP_BBMAP_BITS, order);
- if (i >= 0) {
- addr = vb->va->va_start + (i << PAGE_SHIFT);
- BUG_ON(addr_to_vb_idx(addr) !=
- addr_to_vb_idx(vb->va->va_start));
- vb->free -= 1UL << order;
- if (vb->free == 0) {
- spin_lock(&vbq->lock);
- list_del_init(&vb->free_list);
- spin_unlock(&vbq->lock);
+ if (i < 0) {
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
+ /* fragmented and no outstanding allocations */
+ BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
+ purge = 1;
}
- spin_unlock(&vb->lock);
- break;
+ goto next;
}
+ addr = vb->va->va_start + (i << PAGE_SHIFT);
+ BUG_ON(addr_to_vb_idx(addr) !=
+ addr_to_vb_idx(vb->va->va_start));
+ vb->free -= 1UL << order;
+ if (vb->free == 0) {
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ }
+ spin_unlock(&vb->lock);
+ break;
+next:
spin_unlock(&vb->lock);
}
+
+ if (purge)
+ purge_fragmented_blocks_thiscpu();
+
put_cpu_var(vmap_block_queue);
rcu_read_unlock();
@@ -860,11 +924,11 @@ static void vb_free(const void *addr, un
BUG_ON(!vb);
spin_lock(&vb->lock);
- bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+ BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
vb->dirty += 1UL << order;
if (vb->dirty == VMAP_BBMAP_BITS) {
- BUG_ON(vb->free || !list_empty(&vb->free_list));
+ BUG_ON(vb->free);
spin_unlock(&vb->lock);
free_vmap_block(vb);
} else
@@ -1033,8 +1097,6 @@ void __init vmalloc_init(void)
vbq = &per_cpu(vmap_block_queue, i);
spin_lock_init(&vbq->lock);
INIT_LIST_HEAD(&vbq->free);
- INIT_LIST_HEAD(&vbq->dirty);
- vbq->nr_dirty = 0;
}
/* Import existing vmlist entries. */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
WARNING: multiple messages have this Message-ID (diff)
From: Nick Piggin <npiggin@suse.de>
To: Christoph Hellwig <hch@infradead.org>
Cc: xfs@oss.sgi.com, linux-mm@kvack.org
Subject: Re: [patch 2/2] xfs: use scalable vmap API
Date: Mon, 25 Jan 2010 23:37:46 +1100 [thread overview]
Message-ID: <20100125123746.GA24406@laptop> (raw)
In-Reply-To: <20100125083309.GF19664@laptop>
On Mon, Jan 25, 2010 at 07:33:09PM +1100, Nick Piggin wrote:
> > Any easy way to get them? Sorry, not uptodate on your new vmalloc
> > implementation anymore.
>
> Let me try writing a few (tested) patches here first that I can send you.
Well is it easy to reproduce the vmap failure? Here is a better tested
patch if you can try it. It fixes a couple of bugs and does some purging
of fragmented blocks.
If it does not help, can you tell me how many CPUs in your system?
Thanks,
Nick
--
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c 2010-01-25 23:35:03.000000000 +1100
+++ linux-2.6/mm/vmalloc.c 2010-01-25 23:35:15.000000000 +1100
@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void
static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+/* for per-CPU blocks */
+static void purge_fragmented_blocks_allcpus(void);
+
/*
* Purges all lazily-freed vmap areas.
*
@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsig
} else
spin_lock(&purge_lock);
+ if (sync)
+ purge_fragmented_blocks_allcpus();
+
rcu_read_lock();
list_for_each_entry_rcu(va, &vmap_area_list, list) {
if (va->flags & VM_LAZY_FREE) {
@@ -667,8 +673,6 @@ static bool vmap_initialized __read_most
struct vmap_block_queue {
spinlock_t lock;
struct list_head free;
- struct list_head dirty;
- unsigned int nr_dirty;
};
struct vmap_block {
@@ -678,10 +682,9 @@ struct vmap_block {
unsigned long free, dirty;
DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
- union {
- struct list_head free_list;
- struct rcu_head rcu_head;
- };
+ struct list_head free_list;
+ struct rcu_head rcu_head;
+ struct list_head purge;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -757,7 +760,7 @@ static struct vmap_block *new_vmap_block
vbq = &get_cpu_var(vmap_block_queue);
vb->vbq = vbq;
spin_lock(&vbq->lock);
- list_add(&vb->free_list, &vbq->free);
+ list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
put_cpu_var(vmap_block_queue);
@@ -776,8 +779,6 @@ static void free_vmap_block(struct vmap_
struct vmap_block *tmp;
unsigned long vb_idx;
- BUG_ON(!list_empty(&vb->free_list));
-
vb_idx = addr_to_vb_idx(vb->va->va_start);
spin_lock(&vmap_block_tree_lock);
tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
@@ -788,12 +789,61 @@ static void free_vmap_block(struct vmap_
call_rcu(&vb->rcu_head, rcu_free_vb);
}
+static void purge_fragmented_blocks(int cpu)
+{
+ LIST_HEAD(purge);
+ struct vmap_block *vb;
+ struct vmap_block *n_vb;
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+
+ if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
+ continue;
+
+ spin_lock(&vb->lock);
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
+ vb->free = 0; /* prevent further allocs after releasing lock */
+ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
+ bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
+ bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ spin_unlock(&vb->lock);
+ list_add_tail(&vb->purge, &purge);
+ } else
+ spin_unlock(&vb->lock);
+ }
+ rcu_read_unlock();
+
+ list_for_each_entry_safe(vb, n_vb, &purge, purge) {
+ list_del(&vb->purge);
+ free_vmap_block(vb);
+ }
+}
+
+static void purge_fragmented_blocks_thiscpu(void)
+{
+ purge_fragmented_blocks(smp_processor_id());
+}
+
+static void purge_fragmented_blocks_allcpus(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ purge_fragmented_blocks(cpu);
+}
+
static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
{
struct vmap_block_queue *vbq;
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
+ int purge = 0;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -806,24 +856,38 @@ again:
int i;
spin_lock(&vb->lock);
+ if (vb->free < 1UL << order)
+ goto next;
+
i = bitmap_find_free_region(vb->alloc_map,
VMAP_BBMAP_BITS, order);
- if (i >= 0) {
- addr = vb->va->va_start + (i << PAGE_SHIFT);
- BUG_ON(addr_to_vb_idx(addr) !=
- addr_to_vb_idx(vb->va->va_start));
- vb->free -= 1UL << order;
- if (vb->free == 0) {
- spin_lock(&vbq->lock);
- list_del_init(&vb->free_list);
- spin_unlock(&vbq->lock);
+ if (i < 0) {
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
+ /* fragmented and no outstanding allocations */
+ BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
+ purge = 1;
}
- spin_unlock(&vb->lock);
- break;
+ goto next;
}
+ addr = vb->va->va_start + (i << PAGE_SHIFT);
+ BUG_ON(addr_to_vb_idx(addr) !=
+ addr_to_vb_idx(vb->va->va_start));
+ vb->free -= 1UL << order;
+ if (vb->free == 0) {
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ }
+ spin_unlock(&vb->lock);
+ break;
+next:
spin_unlock(&vb->lock);
}
+
+ if (purge)
+ purge_fragmented_blocks_thiscpu();
+
put_cpu_var(vmap_block_queue);
rcu_read_unlock();
@@ -860,11 +924,11 @@ static void vb_free(const void *addr, un
BUG_ON(!vb);
spin_lock(&vb->lock);
- bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+ BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
vb->dirty += 1UL << order;
if (vb->dirty == VMAP_BBMAP_BITS) {
- BUG_ON(vb->free || !list_empty(&vb->free_list));
+ BUG_ON(vb->free);
spin_unlock(&vb->lock);
free_vmap_block(vb);
} else
@@ -1033,8 +1097,6 @@ void __init vmalloc_init(void)
vbq = &per_cpu(vmap_block_queue, i);
spin_lock_init(&vbq->lock);
INIT_LIST_HEAD(&vbq->free);
- INIT_LIST_HEAD(&vbq->dirty);
- vbq->nr_dirty = 0;
}
/* Import existing vmlist entries. */
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-01-25 12:36 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-21 8:25 [patch 1/2] xfs: remove old vmap cache Nick Piggin
2008-10-21 8:27 ` [patch 2/2] xfs: use scalable vmap API Nick Piggin
2008-10-21 12:09 ` Christoph Hellwig
2008-10-22 9:30 ` Nick Piggin
2010-01-19 12:15 ` Christoph Hellwig
2010-01-19 12:15 ` Christoph Hellwig
2010-01-25 7:54 ` Nick Piggin
2010-01-25 7:54 ` Nick Piggin
2010-01-25 8:17 ` Christoph Hellwig
2010-01-25 8:17 ` Christoph Hellwig
2010-01-25 8:33 ` Nick Piggin
2010-01-25 8:33 ` Nick Piggin
2010-01-25 12:37 ` Nick Piggin [this message]
2010-01-25 12:37 ` Nick Piggin
2010-01-25 12:39 ` Christoph Hellwig
2010-01-25 12:39 ` Christoph Hellwig
2010-01-25 21:34 ` Christoph Hellwig
2010-01-25 21:34 ` Christoph Hellwig
2010-01-27 8:38 ` Nick Piggin
2010-01-27 8:38 ` Nick Piggin
2010-02-01 11:01 ` Christoph Hellwig
2010-02-01 11:01 ` Christoph Hellwig
2010-02-01 11:28 ` Nick Piggin
2010-02-01 11:28 ` Nick Piggin
2010-01-25 8:30 ` Nick Piggin
2010-01-25 8:30 ` Nick Piggin
2008-10-21 12:09 ` [patch 1/2] xfs: remove old vmap cache Christoph Hellwig
-- strict thread matches above, loose matches on Subject: below --
2010-03-16 18:55 [PATCH 2/2] xfs: use scalable vmap API Alex Elder
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100125123746.GA24406@laptop \
--to=npiggin@suse.de \
--cc=hch@infradead.org \
--cc=linux-mm@kvack.org \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.