[PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics
@ 2015-08-22 10:44 Ingo Molnar
  2015-08-22 10:44 ` [PATCH 1/3] mm/vmalloc: Abstract out vmap_area_lock lock/unlock operations Ingo Molnar
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Ingo Molnar @ 2015-08-22 10:44 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: Dave Hansen, Peter Zijlstra, David Rientjes, Rik van Riel,
	Rasmus Villemoes, Linus Torvalds

This series is a variant of Linus's jiffies based caching approach in the:

   "get_vmalloc_info() and /proc/meminfo insanely expensive"

thread on lkml.

The idea is to track modifications to the vmalloc list by wrapping the
lock/unlock primitives, and to put a flag next to the spinlock. If the
spinlock is taken then it's cheap to modify this flag, and if it has
not been taken (the cached case) it will be a read-mostly variable
for every CPU in essence.

It seems to work for me, but it's only very (very!) lightly tested.

Would something like this be acceptable (and is it correct)?

Thanks,

    Ingo

Ingo Molnar (3):
  mm/vmalloc: Abstract out vmap_area_lock lock/unlock operations
  mm/vmalloc: Track vmalloc info changes
  mm/vmalloc: Cache the vmalloc memory info

 mm/vmalloc.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 57 insertions(+), 25 deletions(-)

-- 
2.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] mm/vmalloc: Abstract out vmap_area_lock lock/unlock operations
  2015-08-22 10:44 [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics Ingo Molnar
@ 2015-08-22 10:44 ` Ingo Molnar
  2015-08-22 10:44 ` [PATCH 2/3] mm/vmalloc: Track vmalloc info changes Ingo Molnar
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2015-08-22 10:44 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: Dave Hansen, Peter Zijlstra, David Rientjes, Rik van Riel,
	Rasmus Villemoes, Linus Torvalds

We want to add some extra cache invalidation logic to vmalloc()
area list unlocks - for that first abstract away the vmap_area_lock
operations into vmap_lock()/vmap_unlock().

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 mm/vmalloc.c | 55 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2faaa2976447..605138083880 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -277,6 +277,17 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define VM_VM_AREA	0x04
 
 static DEFINE_SPINLOCK(vmap_area_lock);
+
+static inline void vmap_lock(void)
+{
+	spin_lock(&vmap_area_lock);
+}
+
+static inline void vmap_unlock(void)
+{
+	spin_unlock(&vmap_area_lock);
+}
+
 /* Export for kexec only */
 LIST_HEAD(vmap_area_list);
 static struct rb_root vmap_area_root = RB_ROOT;
@@ -373,7 +384,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
 
 retry:
-	spin_lock(&vmap_area_lock);
+	vmap_lock();
 	/*
 	 * Invalidate cache if we have more permissive parameters.
 	 * cached_hole_size notes the largest hole noticed _below_
@@ -452,7 +463,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	va->flags = 0;
 	__insert_vmap_area(va);
 	free_vmap_cache = &va->rb_node;
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 
 	BUG_ON(va->va_start & (align-1));
 	BUG_ON(va->va_start < vstart);
@@ -461,7 +472,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	return va;
 
 overflow:
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 	if (!purged) {
 		purge_vmap_area_lazy();
 		purged = 1;
@@ -514,9 +525,9 @@ static void __free_vmap_area(struct vmap_area *va)
  */
 static void free_vmap_area(struct vmap_area *va)
 {
-	spin_lock(&vmap_area_lock);
+	vmap_lock();
 	__free_vmap_area(va);
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 }
 
 /*
@@ -642,10 +653,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
 		flush_tlb_kernel_range(*start, *end);
 
 	if (nr) {
-		spin_lock(&vmap_area_lock);
+		vmap_lock();
 		list_for_each_entry_safe(va, n_va, &valist, purge_list)
 			__free_vmap_area(va);
-		spin_unlock(&vmap_area_lock);
+		vmap_unlock();
 	}
 	spin_unlock(&purge_lock);
 }
@@ -707,9 +718,9 @@ static struct vmap_area *find_vmap_area(unsigned long addr)
 {
 	struct vmap_area *va;
 
-	spin_lock(&vmap_area_lock);
+	vmap_lock();
 	va = __find_vmap_area(addr);
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 
 	return va;
 }
@@ -1304,14 +1315,14 @@ EXPORT_SYMBOL_GPL(map_vm_area);
 static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
 			      unsigned long flags, const void *caller)
 {
-	spin_lock(&vmap_area_lock);
+	vmap_lock();
 	vm->flags = flags;
 	vm->addr = (void *)va->va_start;
 	vm->size = va->va_end - va->va_start;
 	vm->caller = caller;
 	va->vm = vm;
 	va->flags |= VM_VM_AREA;
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 }
 
 static void clear_vm_uninitialized_flag(struct vm_struct *vm)
@@ -1433,10 +1444,10 @@ struct vm_struct *remove_vm_area(const void *addr)
 	if (va && va->flags & VM_VM_AREA) {
 		struct vm_struct *vm = va->vm;
 
-		spin_lock(&vmap_area_lock);
+		vmap_lock();
 		va->vm = NULL;
 		va->flags &= ~VM_VM_AREA;
-		spin_unlock(&vmap_area_lock);
+		vmap_unlock();
 
 		vmap_debug_free_range(va->va_start, va->va_end);
 		kasan_free_shadow(vm);
@@ -2008,7 +2019,7 @@ long vread(char *buf, char *addr, unsigned long count)
 	if ((unsigned long) addr + count < count)
 		count = -(unsigned long) addr;
 
-	spin_lock(&vmap_area_lock);
+	vmap_lock();
 	list_for_each_entry(va, &vmap_area_list, list) {
 		if (!count)
 			break;
@@ -2040,7 +2051,7 @@ long vread(char *buf, char *addr, unsigned long count)
 		count -= n;
 	}
 finished:
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 
 	if (buf == buf_start)
 		return 0;
@@ -2090,7 +2101,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
 		count = -(unsigned long) addr;
 	buflen = count;
 
-	spin_lock(&vmap_area_lock);
+	vmap_lock();
 	list_for_each_entry(va, &vmap_area_list, list) {
 		if (!count)
 			break;
@@ -2121,7 +2132,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
 		count -= n;
 	}
 finished:
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 	if (!copied)
 		return 0;
 	return buflen;
@@ -2435,7 +2446,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 			goto err_free;
 	}
 retry:
-	spin_lock(&vmap_area_lock);
+	vmap_lock();
 
 	/* start scanning - we scan from the top, begin with the last area */
 	area = term_area = last_area;
@@ -2457,7 +2468,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 		 * comparing.
 		 */
 		if (base + last_end < vmalloc_start + last_end) {
-			spin_unlock(&vmap_area_lock);
+			vmap_unlock();
 			if (!purged) {
 				purge_vmap_area_lazy();
 				purged = true;
@@ -2512,7 +2523,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 
 	vmap_area_pcpu_hole = base + offsets[last_area];
 
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 
 	/* insert all vm's */
 	for (area = 0; area < nr_vms; area++)
@@ -2557,7 +2568,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 	loff_t n = *pos;
 	struct vmap_area *va;
 
-	spin_lock(&vmap_area_lock);
+	vmap_lock();
 	va = list_entry((&vmap_area_list)->next, typeof(*va), list);
 	while (n > 0 && &va->list != &vmap_area_list) {
 		n--;
@@ -2585,7 +2596,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
 static void s_stop(struct seq_file *m, void *p)
 	__releases(&vmap_area_lock)
 {
-	spin_unlock(&vmap_area_lock);
+	vmap_unlock();
 }
 
 static void show_numa_info(struct seq_file *m, struct vm_struct *v)
-- 
2.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] mm/vmalloc: Track vmalloc info changes
  2015-08-22 10:44 [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics Ingo Molnar
  2015-08-22 10:44 ` [PATCH 1/3] mm/vmalloc: Abstract out vmap_area_lock lock/unlock operations Ingo Molnar
@ 2015-08-22 10:44 ` Ingo Molnar
  2015-08-22 10:45 ` [PATCH 3/3] mm/vmalloc: Cache the vmalloc memory info Ingo Molnar
  2015-08-22 14:36 ` [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics Linus Torvalds
  3 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2015-08-22 10:44 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: Dave Hansen, Peter Zijlstra, David Rientjes, Rik van Riel,
	Rasmus Villemoes, Linus Torvalds

Add a 'vmap_info_changed' flag to track changes to vmalloc()
statistics.

For simplicity this flag is set every time we unlock the
vmap_area_lock.

This flag is not yet used.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 mm/vmalloc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 605138083880..d21febaa557a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -276,7 +276,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define VM_LAZY_FREEING	0x02
 #define VM_VM_AREA	0x04
 
-static DEFINE_SPINLOCK(vmap_area_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(vmap_area_lock);
+
+static int vmap_info_changed;
 
 static inline void vmap_lock(void)
 {
@@ -285,6 +287,7 @@ static inline void vmap_lock(void)
 
 static inline void vmap_unlock(void)
 {
+	vmap_info_changed = 1;
 	spin_unlock(&vmap_area_lock);
 }
 
-- 
2.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] mm/vmalloc: Cache the vmalloc memory info
  2015-08-22 10:44 [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics Ingo Molnar
  2015-08-22 10:44 ` [PATCH 1/3] mm/vmalloc: Abstract out vmap_area_lock lock/unlock operations Ingo Molnar
  2015-08-22 10:44 ` [PATCH 2/3] mm/vmalloc: Track vmalloc info changes Ingo Molnar
@ 2015-08-22 10:45 ` Ingo Molnar
  2015-08-22 14:36 ` [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics Linus Torvalds
  3 siblings, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2015-08-22 10:45 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: Dave Hansen, Peter Zijlstra, David Rientjes, Rik van Riel,
	Rasmus Villemoes, Linus Torvalds

Linus reported that glibc (rather stupidly) reads /proc/meminfo
for every sysinfo() call, which causes the Git build to use
a surprising amount of CPU time, mostly due to the overhead
of get_vmalloc_info() - which walks a long list to do its
statistics.

Modify Linus's jiffies based patch to use the newly introduced
vmap_info_changed flag instead: when we cache the vmalloc-info,
we clear the flag. If the flag gets re-set then we'll calculate
the information again.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 mm/vmalloc.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d21febaa557a..ef48e557df5a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2702,7 +2702,7 @@ static int __init proc_vmalloc_init(void)
 }
 module_init(proc_vmalloc_init);
 
-void get_vmalloc_info(struct vmalloc_info *vmi)
+static void calc_vmalloc_info(struct vmalloc_info *vmi)
 {
 	struct vmap_area *va;
 	unsigned long free_area_size;
@@ -2749,5 +2749,23 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
 out:
 	rcu_read_unlock();
 }
-#endif
 
+void get_vmalloc_info(struct vmalloc_info *vmi)
+{
+	static struct vmalloc_info cached_info;
+
+	if (!vmap_info_changed) {
+		*vmi = cached_info;
+		return;
+	}
+
+	WRITE_ONCE(vmap_info_changed, 0);
+	barrier();
+
+	calc_vmalloc_info(vmi);
+
+	barrier();
+	cached_info = *vmi;
+}
+
+#endif
-- 
2.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics
  2015-08-22 10:44 [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics Ingo Molnar
                   ` (2 preceding siblings ...)
  2015-08-22 10:45 ` [PATCH 3/3] mm/vmalloc: Cache the vmalloc memory info Ingo Molnar
@ 2015-08-22 14:36 ` Linus Torvalds
  3 siblings, 0 replies; 8+ messages in thread
From: Linus Torvalds @ 2015-08-22 14:36 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linux Kernel Mailing List, linux-mm, Dave Hansen, Peter Zijlstra,
	David Rientjes, Rik van Riel, Rasmus Villemoes

On Sat, Aug 22, 2015 at 3:44 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> Would something like this be acceptable (and is it correct)?

I don't think any of this can be called "correct", in that the
unlocked accesses to the cached state are clearly racy, but I think
it's very much "acceptable".

                   Linus

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics
@ 2015-08-23  4:48 George Spelvin
  2015-08-23  6:04 ` Ingo Molnar
  0 siblings, 1 reply; 8+ messages in thread
From: George Spelvin @ 2015-08-23  4:48 UTC (permalink / raw)
  To: mingo; +Cc: linux, linux-kernel, linux-mm, torvalds

Linus wrote:
> I don't think any of this can be called "correct", in that the
> unlocked accesses to the cached state are clearly racy, but I think
> it's very much "acceptable".

I'd think you could easily fix that with a seqlock-like system.

What makes it so simple is that you can always fall back to
calc_vmalloc_info if there's any problem, rather than looping or blocking.

The basic idea is that you have a seqlock counter, but if either of
the two lsbits are set, the cached information is stale.

Basically, you need a seqlock and a spinlock.  The seqlock does
most of the work, and the spinlock ensures that there's only one
updater of the cache.

vmap_unlock() does set_bit(0, &seq->sequence).  This marks the information
as stale.

get_vmalloc_info reads the seqlock.  There are two case:
- If the two lsbits are 10, the cached information is valid.
  Copy it out, re-check the seqlock, and loop if the sequence
  number changes.
- In any other case, the cached information is
  not valid.
  - Try to obtain the spinlock.  Do not block if it's unavailable.
    - If unavailable, do not block.
    - If the lock is acquired:
      - Set the sequence to (sequence | 3) + 1 (we're the only writer)
      - This bumps the sequence number and leaves the lsbits at 00 (invalid)
      - Memory barrier TBD.  Do the RCU ops in calc_vmalloc_info do it for us?
  - Call calc_vmalloc_info
  - If we obtained the spinlock earlier:
    - Copy our vmi to cached_info
    - smp_wmb()
    - set_bit(1, &seq->sequence).  This marks the information as valid,
      as long as bit 0 is still clear.
    - Release the spinlock.

Basically, bit 0 says "vmalloc info has changed", and bit 1 says
"vmalloc cache has been updated".  This clears bit 0 before
starting the update so that an update during calc_vmalloc_info
will force a new update.

So the three case are basically:
00 - calc_vmalloc_info() in progress
01 - vmap_unlock() during calc_vmalloc_info()
10 - cached_info is valid
11 - vmap_unlock has invalidated cached_info, awaiting refresh

Logically, the sequence number should be initialized to ...01,
but the code above handles 00 okay.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics
  2015-08-23  4:48 George Spelvin
@ 2015-08-23  6:04 ` Ingo Molnar
  2015-08-23  6:46   ` George Spelvin
  0 siblings, 1 reply; 8+ messages in thread
From: Ingo Molnar @ 2015-08-23  6:04 UTC (permalink / raw)
  To: George Spelvin
  Cc: linux-kernel, linux-mm, torvalds, Dave Hansen, Peter Zijlstra,
	David Rientjes, Rik van Riel, Rasmus Villemoes


* George Spelvin <linux@horizon.com> wrote:

> Linus wrote:
> > I don't think any of this can be called "correct", in that the
> > unlocked accesses to the cached state are clearly racy, but I think
> > it's very much "acceptable".
> 
> I'd think you could easily fix that with a seqlock-like system.
> 
> What makes it so simple is that you can always fall back to
> calc_vmalloc_info if there's any problem, rather than looping or blocking.
> 
> The basic idea is that you have a seqlock counter, but if either of
> the two lsbits are set, the cached information is stale.
> 
> Basically, you need a seqlock and a spinlock.  The seqlock does
> most of the work, and the spinlock ensures that there's only one
> updater of the cache.
> 
> vmap_unlock() does set_bit(0, &seq->sequence).  This marks the information
> as stale.
> 
> get_vmalloc_info reads the seqlock.  There are two case:
> - If the two lsbits are 10, the cached information is valid.
>   Copy it out, re-check the seqlock, and loop if the sequence
>   number changes.
> - In any other case, the cached information is
>   not valid.
>   - Try to obtain the spinlock.  Do not block if it's unavailable.
>     - If unavailable, do not block.
>     - If the lock is acquired:
>       - Set the sequence to (sequence | 3) + 1 (we're the only writer)
>       - This bumps the sequence number and leaves the lsbits at 00 (invalid)
>       - Memory barrier TBD.  Do the RCU ops in calc_vmalloc_info do it for us?
>   - Call calc_vmalloc_info
>   - If we obtained the spinlock earlier:
>     - Copy our vmi to cached_info
>     - smp_wmb()
>     - set_bit(1, &seq->sequence).  This marks the information as valid,
>       as long as bit 0 is still clear.
>     - Release the spinlock.
> 
> Basically, bit 0 says "vmalloc info has changed", and bit 1 says
> "vmalloc cache has been updated".  This clears bit 0 before
> starting the update so that an update during calc_vmalloc_info
> will force a new update.
> 
> So the three case are basically:
> 00 - calc_vmalloc_info() in progress
> 01 - vmap_unlock() during calc_vmalloc_info()
> 10 - cached_info is valid
> 11 - vmap_unlock has invalidated cached_info, awaiting refresh
> 
> Logically, the sequence number should be initialized to ...01,
> but the code above handles 00 okay.

I think this is too complex.

How about something simple like the patch below (on top of the third patch)?

It makes the vmalloc info transactional - /proc/meminfo will always print a 
consistent set of numbers. (Not that we really care about races there, but it 
looks really simple to solve so why not.)

( I also moved the function-static cache next to the flag and seqlock - this
  should further compress the cache footprint. )

Have I missed anything? Very lightly tested: booted in a VM.

Thanks,

	Ingo

=========================>

 mm/vmalloc.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ef48e557df5a..66726f41e726 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -278,7 +278,15 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(vmap_area_lock);
 
+/*
+ * Seqlock and flag for the vmalloc info cache printed in /proc/meminfo.
+ *
+ * The assumption of the optimization is that it's read frequently, but
+ * modified infrequently.
+ */
+static DEFINE_SEQLOCK(vmap_info_lock);
 static int vmap_info_changed;
+static struct vmalloc_info vmap_info_cache;
 
 static inline void vmap_lock(void)
 {
@@ -2752,10 +2760,14 @@ static void calc_vmalloc_info(struct vmalloc_info *vmi)
 
 void get_vmalloc_info(struct vmalloc_info *vmi)
 {
-	static struct vmalloc_info cached_info;
+	if (!READ_ONCE(vmap_info_changed)) {
+		unsigned int seq;
+
+		do {
+			seq = read_seqbegin(&vmap_info_lock);
+			*vmi = vmap_info_cache;
+		} while (read_seqretry(&vmap_info_lock, seq));
 
-	if (!vmap_info_changed) {
-		*vmi = cached_info;
 		return;
 	}
 
@@ -2764,8 +2776,9 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
 
 	calc_vmalloc_info(vmi);
 
-	barrier();
-	cached_info = *vmi;
+	write_seqlock(&vmap_info_lock);
+	vmap_info_cache = *vmi;
+	write_sequnlock(&vmap_info_lock);
 }
 
 #endif

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics
  2015-08-23  6:04 ` Ingo Molnar
@ 2015-08-23  6:46   ` George Spelvin
  0 siblings, 0 replies; 8+ messages in thread
From: George Spelvin @ 2015-08-23  6:46 UTC (permalink / raw)
  To: linux, mingo
  Cc: dave, linux-kernel, linux-mm, linux, peterz, riel, rientjes,
	torvalds

Ingo Molnar <mingo@kernel.org> wrote:
> I think this is too complex.
> 
> How about something simple like the patch below (on top of the third patch)?

> It makes the vmalloc info transactional - /proc/meminfo will always print a 
> consistent set of numbers. (Not that we really care about races there, but it 
> looks really simple to solve so why not.)

Looks like a huge simplification!

It needs a comment about the approximate nature of the locking and
the obvious race conditions:
1) The first caller to get_vmalloc_info() clears vmap_info_changed
   before updating vmap_info_cache, so a second caller is likely to
   get stale data for the duration of a calc_vmalloc_info call.
2) Although unlikely, it's possible for two threads to race calling
   calc_vmalloc_info, and the one that computes fresher data updates
   the cache first, so the later write leaves stale data.

Other issues:
3) Me, I'd make vmap_info_changed a bool, for documentation more than
   any space saving.
4) I wish there were a trylock version of write_seqlock, so we could
   avoid blocking entirely.  (You *could* hand-roll it, but that eats
   into the simplicity.)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-08-23  6:46 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-22 10:44 [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics Ingo Molnar
2015-08-22 10:44 ` [PATCH 1/3] mm/vmalloc: Abstract out vmap_area_lock lock/unlock operations Ingo Molnar
2015-08-22 10:44 ` [PATCH 2/3] mm/vmalloc: Track vmalloc info changes Ingo Molnar
2015-08-22 10:45 ` [PATCH 3/3] mm/vmalloc: Cache the vmalloc memory info Ingo Molnar
2015-08-22 14:36 ` [PATCH 0/3] mm/vmalloc: Cache the /proc/meminfo vmalloc statistics Linus Torvalds
  -- strict thread matches above, loose matches on Subject: below --
2015-08-23  4:48 George Spelvin
2015-08-23  6:04 ` Ingo Molnar
2015-08-23  6:46   ` George Spelvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).