All of lore.kernel.org
 help / color / mirror / Atom feed
* LVM2 ./WHATS_NEW lib/format1/import-extents.c ...
From: zkabelac @ 2011-10-23 16:02 UTC (permalink / raw)
  To: lvm-devel

CVSROOT:	/cvs/lvm2
Module name:	LVM2
Changes by:	zkabelac at sourceware.org	2011-10-23 16:02:02

Modified files:
	.              : WHATS_NEW 
	lib/format1    : import-extents.c 
	lib/format_pool: import_export.c 
	lib/format_text: import_vsn1.c 
	lib/metadata   : lv_alloc.h lv_manip.c merge.c 

Log message:
	Always use vg memory pool for allocated lv segment
	
	Remove mem pool parameter from alloc_lv_segment()
	Since we should always allocate LV segment from the vg mempool.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/WHATS_NEW.diff?cvsroot=lvm2&r1=1.2168&r2=1.2169
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/format1/import-extents.c.diff?cvsroot=lvm2&r1=1.41&r2=1.42
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/format_pool/import_export.c.diff?cvsroot=lvm2&r1=1.34&r2=1.35
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/format_text/import_vsn1.c.diff?cvsroot=lvm2&r1=1.93&r2=1.94
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/lv_alloc.h.diff?cvsroot=lvm2&r1=1.30&r2=1.31
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/lv_manip.c.diff?cvsroot=lvm2&r1=1.303&r2=1.304
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/lib/metadata/merge.c.diff?cvsroot=lvm2&r1=1.53&r2=1.54

--- LVM2/WHATS_NEW	2011/10/22 16:52:00	1.2168
+++ LVM2/WHATS_NEW	2011/10/23 16:02:01	1.2169
@@ -1,5 +1,6 @@
 Version 2.02.89 - 
 ==================================
+  Always use vg memory pool for allocated lv segment.
   Remove extra 4kB buffer allocated on stack in print_log().
   Make move_lv_segment non-static function and use dm_list function.
   Pass exclusive LV locks to all nodes in the cluster.
--- LVM2/lib/format1/import-extents.c	2011/09/06 00:26:42	1.41
+++ LVM2/lib/format1/import-extents.c	2011/10/23 16:02:01	1.42
@@ -222,8 +222,8 @@
 	while (le < lvm->lv->le_count) {
 		len = _area_length(lvm, le);
 
-		if (!(seg = alloc_lv_segment(cmd->mem, segtype, lvm->lv, le,
-					     len, 0, 0, NULL, NULL, 1, len, 0, 0, 0, NULL))) {
+		if (!(seg = alloc_lv_segment(segtype, lvm->lv, le, len, 0, 0,
+					     NULL, NULL, 1, len, 0, 0, 0, NULL))) {
 			log_error("Failed to allocate linear segment.");
 			return 0;
 		}
@@ -292,7 +292,7 @@
 				     area_len, first_area_le, total_area_len))
 			area_len++;
 
-		if (!(seg = alloc_lv_segment(cmd->mem, segtype, lvm->lv,
+		if (!(seg = alloc_lv_segment(segtype, lvm->lv,
 					     lvm->stripes * first_area_le,
 					     lvm->stripes * area_len,
 					     0, lvm->stripe_size, NULL, NULL,
--- LVM2/lib/format_pool/import_export.c	2011/09/06 00:26:42	1.34
+++ LVM2/lib/format_pool/import_export.c	2011/10/23 16:02:01	1.35
@@ -193,7 +193,7 @@
 						     "striped")))
 		return_0;
 
-	if (!(seg = alloc_lv_segment(mem, segtype, lv, *le_cur,
+	if (!(seg = alloc_lv_segment(segtype, lv, *le_cur,
 				     area_len * usp->num_devs, 0,
 				     usp->striping, NULL, NULL, usp->num_devs,
 				     area_len, 0, 0, 0, NULL))) {
@@ -233,7 +233,7 @@
 	for (j = 0; j < usp->num_devs; j++) {
 		area_len = (usp->devs[j].blocks) / POOL_PE_SIZE;
 
-		if (!(seg = alloc_lv_segment(mem, segtype, lv, *le_cur,
+		if (!(seg = alloc_lv_segment(segtype, lv, *le_cur,
 					     area_len, 0, usp->striping,
 					     NULL, NULL, 1, area_len,
 					     POOL_PE_SIZE, 0, 0, NULL))) {
--- LVM2/lib/format_text/import_vsn1.c	2011/09/06 00:26:43	1.93
+++ LVM2/lib/format_text/import_vsn1.c	2011/10/23 16:02:02	1.94
@@ -328,7 +328,7 @@
 	    !segtype->ops->text_import_area_count(sn_child, &area_count))
 		return_0;
 
-	if (!(seg = alloc_lv_segment(mem, segtype, lv, start_extent,
+	if (!(seg = alloc_lv_segment(segtype, lv, start_extent,
 				     extent_count, 0, 0, NULL, NULL, area_count,
 				     extent_count, 0, 0, 0, NULL))) {
 		log_error("Segment allocation failed");
--- LVM2/lib/metadata/lv_alloc.h	2011/09/06 00:26:43	1.30
+++ LVM2/lib/metadata/lv_alloc.h	2011/10/23 16:02:02	1.31
@@ -15,8 +15,7 @@
 
 #ifndef _LVM_LV_ALLOC_H
 
-struct lv_segment *alloc_lv_segment(struct dm_pool *mem,
-				    const struct segment_type *segtype,
+struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
 				    struct logical_volume *lv,
 				    uint32_t le, uint32_t len,
 				    uint64_t status,
--- LVM2/lib/metadata/lv_manip.c	2011/10/22 16:48:59	1.303
+++ LVM2/lib/metadata/lv_manip.c	2011/10/23 16:02:02	1.304
@@ -201,8 +201,7 @@
 /*
  * All lv_segments get created here.
  */
-struct lv_segment *alloc_lv_segment(struct dm_pool *mem,
-				    const struct segment_type *segtype,
+struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
 				    struct logical_volume *lv,
 				    uint32_t le, uint32_t len,
 				    uint64_t status,
@@ -217,6 +216,7 @@
 				    struct lv_segment *pvmove_source_seg)
 {
 	struct lv_segment *seg;
+	struct dm_pool *mem = lv->vg->vgmem;
 	uint32_t areas_sz = area_count * sizeof(*seg->areas);
 
 	if (!segtype) {
@@ -277,7 +277,7 @@
 		return NULL;
 	}
 
-	if (!(seg = alloc_lv_segment(lv->vg->cmd->mem, segtype, lv, old_le_count,
+	if (!(seg = alloc_lv_segment(segtype, lv, old_le_count,
 				     lv->le_count - old_le_count, status, 0,
 				     NULL, NULL, 0, lv->le_count - old_le_count,
 				     0, 0, 0, NULL))) {
@@ -954,8 +954,7 @@
 
 	area_multiple = _calc_area_multiple(segtype, area_count, 0);
 
-	if (!(seg = alloc_lv_segment(lv->vg->cmd->mem, segtype, lv,
-				     lv->le_count,
+	if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count,
 				     aa[0].len * area_multiple,
 				     status, stripe_size, NULL, NULL,
 				     area_count,
@@ -2044,9 +2043,9 @@
 		thin_pool_lv = lvl->lv;
 	}
 
-	if (!(seg = alloc_lv_segment(lv->vg->cmd->mem, segtype, lv,
-				     lv->le_count, extents, status, 0,
-				     NULL, thin_pool_lv, 0, extents, 0, 0, 0, NULL))) {
+	if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents,
+				     status, 0, NULL, thin_pool_lv, 0,
+				     extents, 0, 0, 0, NULL))) {
 		log_error("Couldn't allocate new zero segment.");
 		return 0;
 	}
@@ -2178,8 +2177,7 @@
 		return NULL;
 	}
 
-	if (!(newseg = alloc_lv_segment(seg->lv->vg->cmd->mem,
-					get_segtype_from_string(seg->lv->vg->cmd, "mirror"),
+	if (!(newseg = alloc_lv_segment(get_segtype_from_string(seg->lv->vg->cmd, "mirror"),
 					seg->lv, seg->le, seg->len,
 					seg->status, seg->stripe_size,
 					log_lv, NULL,
@@ -2375,8 +2373,8 @@
 	/*
 	 * First, create our top-level segment for our top-level LV
 	 */
-	if (!(mapseg = alloc_lv_segment(lv->vg->cmd->mem, segtype,
-					lv, 0, 0, lv->status, stripe_size, NULL, NULL,
+	if (!(mapseg = alloc_lv_segment(segtype, lv, 0, 0, lv->status,
+					stripe_size, NULL, NULL,
 					devices, 0, 0, region_size, 0, NULL))) {
 		log_error("Failed to create mapping segment for %s", lv->name);
 		return 0;
@@ -3593,8 +3591,7 @@
 		return_NULL;
 
 	/* allocate a new linear segment */
-	if (!(mapseg = alloc_lv_segment(cmd->mem, segtype,
-					lv_where, 0, layer_lv->le_count,
+	if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, layer_lv->le_count,
 					status, 0, NULL, NULL, 1, layer_lv->le_count,
 					0, 0, 0, NULL)))
 		return_NULL;
@@ -3636,8 +3633,7 @@
 			 seg->lv->vg->name, seg->lv->name);
 
 	/* allocate a new segment */
-	if (!(mapseg = alloc_lv_segment(layer_lv->vg->cmd->mem, segtype,
-					layer_lv, layer_lv->le_count,
+	if (!(mapseg = alloc_lv_segment(segtype, layer_lv, layer_lv->le_count,
 					seg->area_len, status, 0,
 					NULL, NULL, 1, seg->area_len, 0, 0, 0, seg)))
 		return_0;
--- LVM2/lib/metadata/merge.c	2011/10/20 10:28:41	1.53
+++ LVM2/lib/metadata/merge.c	2011/10/23 16:02:02	1.54
@@ -433,7 +433,7 @@
 	}
 
 	/* Clone the existing segment */
-	if (!(split_seg = alloc_lv_segment(lv->vg->vgmem, seg->segtype,
+	if (!(split_seg = alloc_lv_segment(seg->segtype,
 					   seg->lv, seg->le, seg->len,
 					   seg->status, seg->stripe_size,
 					   seg->log_lv, seg->pool_lv,



^ permalink raw reply

* [PATCH v2 6/6] slub: only preallocate cpus_with_slabs if offstack
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

We need a cpumask to track cpus with per cpu cache pages
to know which cpu to whack during flush_all. For
CONFIG_CPUMASK_OFFSTACK=n we allocate the mask on stack.
For CONFIG_CPUMASK_OFFSTACK=y we don't want to call kmalloc
on the flush_all path, so we preallocate per kmem_cache
on cache creation and use it in flush_all.

The result is that for the common CONFIG_CPUMASK_OFFSTACK=n
case there is no memory overhead for the mask var.

Since systems where CONFIG_CPUMASK_OFFSTACK=y are the systems
which are most likely to benefit from less IPIs by tracking
which cpu pas actually has a per cpu cache, we end up paying
the overhead only in cases we enjoy the upside.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/slub_def.h |    8 ++++++-
 mm/slub.c                |   52 +++++++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index b130f61..c07f7aa 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -103,8 +103,14 @@ struct kmem_cache {
 	int remote_node_defrag_ratio;
 #endif
 
-	/* Which CPUs hold local slabs for this cache. */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	/*
+	 * Which CPUs hold local slabs for this cache.
+	 * Only updated on calling flush_all().
+	 * Defined on stack for CONFIG_CPUMASK_OFFSTACK=n.
+	 */
 	cpumask_var_t cpus_with_slabs;
+#endif
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
diff --git a/mm/slub.c b/mm/slub.c
index f8cbf2d..765be95 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1946,20 +1946,48 @@ static void flush_cpu_slab(void *d)
 	__flush_cpu_slab(s, smp_processor_id());
 }
 
+/*
+ * We need a cpumask struct to track which cpus have
+ * per cpu caches. For CONFIG_CPUMASK_OFFSTACK=n we
+ * allocate on stack. For CONFIG_CPUMASK_OFFSTACK=y
+ * we don't want to allocate in the flush_all code path
+ * so we allocate a struct for each cache structure
+ * on kmem cache creation and use it here.
+ */
 static void flush_all(struct kmem_cache *s)
 {
 	struct kmem_cache_cpu *c;
 	int cpu;
+	cpumask_var_t cpus;
 
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	cpus = s->cpus_with_slabs;
+#endif
 	for_each_online_cpu(cpu) {
 		c = per_cpu_ptr(s->cpu_slab, cpu);
 		if (c && c->page)
-			cpumask_set_cpu(cpu, s->cpus_with_slabs);
+			cpumask_set_cpu(cpu, cpus);
 		else
-			cpumask_clear_cpu(cpu, s->cpus_with_slabs);
+			cpumask_clear_cpu(cpu, cpus);
 	}
 
-	on_each_cpu_mask(s->cpus_with_slabs, flush_cpu_slab, s, 1);
+	on_each_cpu_mask(cpus, flush_cpu_slab, s, 1);
+}
+
+static inline int alloc_cpus_mask(struct kmem_cache *s, int flags)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	return alloc_cpumask_var(&s->cpus_with_slabs, flags);
+#else
+	return 1;
+#endif
+}
+
+static inline void free_cpus_mask(struct kmem_cache *s)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	free_cpumask_var(s->cpus_with_slabs);
+#endif
 }
 
 /*
@@ -3039,7 +3067,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 		if (s->flags & SLAB_DESTROY_BY_RCU)
 			rcu_barrier();
 		sysfs_slab_remove(s);
-		free_cpumask_var(s->cpus_with_slabs);
+		free_cpus_mask(s);
 	}
 	up_write(&slub_lock);
 }
@@ -3655,16 +3683,14 @@ void __init kmem_cache_init(void)
 	if (KMALLOC_MIN_SIZE <= 32) {
 		kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[1]->name);
-		ret = alloc_cpumask_var(&kmalloc_caches[1]->cpus_with_slabs,
-			GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[1], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
 	if (KMALLOC_MIN_SIZE <= 64) {
 		kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[2]->name);
-		ret = alloc_cpumask_var(&kmalloc_caches[2]->cpus_with_slabs,
-				GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[2], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
@@ -3673,8 +3699,7 @@ void __init kmem_cache_init(void)
 
 		BUG_ON(!s);
 		kmalloc_caches[i]->name = s;
-		ret = alloc_cpumask_var(&kmalloc_caches[i]->cpus_with_slabs,
-				GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[i], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
@@ -3693,8 +3718,7 @@ void __init kmem_cache_init(void)
 			BUG_ON(!name);
 			kmalloc_dma_caches[i] = create_kmalloc_cache(name,
 				s->objsize, SLAB_CACHE_DMA);
-			ret = alloc_cpumask_var(
-				&kmalloc_dma_caches[i]->cpus_with_slabs,
+			ret = alloc_cpus_mask(kmalloc_dma_caches[i],
 				GFP_NOWAIT);
 			BUG_ON(!ret);
 		}
@@ -3810,11 +3834,11 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 
 		if (kmem_cache_open(s, n,
 				size, align, flags, ctor)) {
-			alloc_cpumask_var(&s->cpus_with_slabs, GFP_KERNEL);
+			alloc_cpus_mask(s, GFP_KERNEL);
 			list_add(&s->list, &slab_caches);
 			if (sysfs_slab_add(s)) {
 				list_del(&s->list);
-				free_cpumask_var(s->cpus_with_slabs);
+				free_cpus_mask(s);
 				kfree(n);
 				kfree(s);
 				goto err;
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH v2 6/6] slub: only preallocate cpus_with_slabs if offstack
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

We need a cpumask to track cpus with per cpu cache pages
to know which cpu to whack during flush_all. For
CONFIG_CPUMASK_OFFSTACK=n we allocate the mask on stack.
For CONFIG_CPUMASK_OFFSTACK=y we don't want to call kmalloc
on the flush_all path, so we preallocate per kmem_cache
on cache creation and use it in flush_all.

The result is that for the common CONFIG_CPUMASK_OFFSTACK=n
case there is no memory overhead for the mask var.

Since systems where CONFIG_CPUMASK_OFFSTACK=y are the systems
which are most likely to benefit from less IPIs by tracking
which cpu pas actually has a per cpu cache, we end up paying
the overhead only in cases we enjoy the upside.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/slub_def.h |    8 ++++++-
 mm/slub.c                |   52 +++++++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index b130f61..c07f7aa 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -103,8 +103,14 @@ struct kmem_cache {
 	int remote_node_defrag_ratio;
 #endif
 
-	/* Which CPUs hold local slabs for this cache. */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	/*
+	 * Which CPUs hold local slabs for this cache.
+	 * Only updated on calling flush_all().
+	 * Defined on stack for CONFIG_CPUMASK_OFFSTACK=n.
+	 */
 	cpumask_var_t cpus_with_slabs;
+#endif
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
diff --git a/mm/slub.c b/mm/slub.c
index f8cbf2d..765be95 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1946,20 +1946,48 @@ static void flush_cpu_slab(void *d)
 	__flush_cpu_slab(s, smp_processor_id());
 }
 
+/*
+ * We need a cpumask struct to track which cpus have
+ * per cpu caches. For CONFIG_CPUMASK_OFFSTACK=n we
+ * allocate on stack. For CONFIG_CPUMASK_OFFSTACK=y
+ * we don't want to allocate in the flush_all code path
+ * so we allocate a struct for each cache structure
+ * on kmem cache creation and use it here.
+ */
 static void flush_all(struct kmem_cache *s)
 {
 	struct kmem_cache_cpu *c;
 	int cpu;
+	cpumask_var_t cpus;
 
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	cpus = s->cpus_with_slabs;
+#endif
 	for_each_online_cpu(cpu) {
 		c = per_cpu_ptr(s->cpu_slab, cpu);
 		if (c && c->page)
-			cpumask_set_cpu(cpu, s->cpus_with_slabs);
+			cpumask_set_cpu(cpu, cpus);
 		else
-			cpumask_clear_cpu(cpu, s->cpus_with_slabs);
+			cpumask_clear_cpu(cpu, cpus);
 	}
 
-	on_each_cpu_mask(s->cpus_with_slabs, flush_cpu_slab, s, 1);
+	on_each_cpu_mask(cpus, flush_cpu_slab, s, 1);
+}
+
+static inline int alloc_cpus_mask(struct kmem_cache *s, int flags)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	return alloc_cpumask_var(&s->cpus_with_slabs, flags);
+#else
+	return 1;
+#endif
+}
+
+static inline void free_cpus_mask(struct kmem_cache *s)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	free_cpumask_var(s->cpus_with_slabs);
+#endif
 }
 
 /*
@@ -3039,7 +3067,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 		if (s->flags & SLAB_DESTROY_BY_RCU)
 			rcu_barrier();
 		sysfs_slab_remove(s);
-		free_cpumask_var(s->cpus_with_slabs);
+		free_cpus_mask(s);
 	}
 	up_write(&slub_lock);
 }
@@ -3655,16 +3683,14 @@ void __init kmem_cache_init(void)
 	if (KMALLOC_MIN_SIZE <= 32) {
 		kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[1]->name);
-		ret = alloc_cpumask_var(&kmalloc_caches[1]->cpus_with_slabs,
-			GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[1], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
 	if (KMALLOC_MIN_SIZE <= 64) {
 		kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[2]->name);
-		ret = alloc_cpumask_var(&kmalloc_caches[2]->cpus_with_slabs,
-				GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[2], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
@@ -3673,8 +3699,7 @@ void __init kmem_cache_init(void)
 
 		BUG_ON(!s);
 		kmalloc_caches[i]->name = s;
-		ret = alloc_cpumask_var(&kmalloc_caches[i]->cpus_with_slabs,
-				GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[i], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
@@ -3693,8 +3718,7 @@ void __init kmem_cache_init(void)
 			BUG_ON(!name);
 			kmalloc_dma_caches[i] = create_kmalloc_cache(name,
 				s->objsize, SLAB_CACHE_DMA);
-			ret = alloc_cpumask_var(
-				&kmalloc_dma_caches[i]->cpus_with_slabs,
+			ret = alloc_cpus_mask(kmalloc_dma_caches[i],
 				GFP_NOWAIT);
 			BUG_ON(!ret);
 		}
@@ -3810,11 +3834,11 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 
 		if (kmem_cache_open(s, n,
 				size, align, flags, ctor)) {
-			alloc_cpumask_var(&s->cpus_with_slabs, GFP_KERNEL);
+			alloc_cpus_mask(s, GFP_KERNEL);
 			list_add(&s->list, &slab_caches);
 			if (sysfs_slab_add(s)) {
 				list_del(&s->list);
-				free_cpumask_var(s->cpus_with_slabs);
+				free_cpus_mask(s);
 				kfree(n);
 				kfree(s);
 				goto err;
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 5/6] slub: Only IPI CPUs that have per cpu obj to flush
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

flush_all() is called for each kmem_cahce_destroy(). So every cache
being destroyed dynamically ended up sending an IPI to each CPU in the
system, regardless if the cache has ever been used there.

For example, if you close the Infinband ipath driver char device file,
the close file ops calls kmem_cache_destroy(). So running some
infiniband config tool on one a single CPU dedicated to system tasks
might interrupt the rest of the 127 CPUs I dedicated to some CPU
intensive task.

I suspect there is a good chance that every line in the output of "git
grep kmem_cache_destroy linux/ | grep '\->'" has a similar scenario.

This patch attempts to rectify this issue by sending an IPI to flush
the per cpu objects back to the free lists only to CPUs that seems to
have such objects.

The check which CPU to IPI is racy but we don't care since
asking a CPU without per cpu objects to flush does no
damage and as far as I can tell the flush_all by itself is
racy against allocs on remote CPUs anyway, so if you meant
the flush_all to be determinstic, you had to arrange for
locking regardless.

Also note that it is fine for concurrent uses of the cpumask var
on different cpus since they end up tracking the same thing. The
only downside to a race is asking a CPU with not per cpu cache
to flush, which before this patch happens all the time any way.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/slub_def.h |    3 +++
 mm/slub.c                |   37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f58d641..b130f61 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -102,6 +102,9 @@ struct kmem_cache {
 	 */
 	int remote_node_defrag_ratio;
 #endif
+
+	/* Which CPUs hold local slabs for this cache. */
+	cpumask_var_t cpus_with_slabs;
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
diff --git a/mm/slub.c b/mm/slub.c
index 7c54fe8..f8cbf2d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1948,7 +1948,18 @@ static void flush_cpu_slab(void *d)
 
 static void flush_all(struct kmem_cache *s)
 {
-	on_each_cpu(flush_cpu_slab, s, 1);
+	struct kmem_cache_cpu *c;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		c = per_cpu_ptr(s->cpu_slab, cpu);
+		if (c && c->page)
+			cpumask_set_cpu(cpu, s->cpus_with_slabs);
+		else
+			cpumask_clear_cpu(cpu, s->cpus_with_slabs);
+	}
+
+	on_each_cpu_mask(s->cpus_with_slabs, flush_cpu_slab, s, 1);
 }
 
 /*
@@ -3028,6 +3039,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 		if (s->flags & SLAB_DESTROY_BY_RCU)
 			rcu_barrier();
 		sysfs_slab_remove(s);
+		free_cpumask_var(s->cpus_with_slabs);
 	}
 	up_write(&slub_lock);
 }
@@ -3528,6 +3540,7 @@ void __init kmem_cache_init(void)
 	int order;
 	struct kmem_cache *temp_kmem_cache_node;
 	unsigned long kmalloc_size;
+	int ret;
 
 	kmem_size = offsetof(struct kmem_cache, node) +
 				nr_node_ids * sizeof(struct kmem_cache_node *);
@@ -3635,15 +3648,24 @@ void __init kmem_cache_init(void)
 
 	slab_state = UP;
 
-	/* Provide the correct kmalloc names now that the caches are up */
+	/*
+	 * Provide the correct kmalloc names and the cpus_with_slabs cpumasks
+	 * for CONFIG_CPUMASK_OFFSTACK=y case now that the caches are up.
+	 */
 	if (KMALLOC_MIN_SIZE <= 32) {
 		kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[1]->name);
+		ret = alloc_cpumask_var(&kmalloc_caches[1]->cpus_with_slabs,
+			GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 	if (KMALLOC_MIN_SIZE <= 64) {
 		kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[2]->name);
+		ret = alloc_cpumask_var(&kmalloc_caches[2]->cpus_with_slabs,
+				GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
@@ -3651,6 +3673,9 @@ void __init kmem_cache_init(void)
 
 		BUG_ON(!s);
 		kmalloc_caches[i]->name = s;
+		ret = alloc_cpumask_var(&kmalloc_caches[i]->cpus_with_slabs,
+				GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 #ifdef CONFIG_SMP
@@ -3668,6 +3693,10 @@ void __init kmem_cache_init(void)
 			BUG_ON(!name);
 			kmalloc_dma_caches[i] = create_kmalloc_cache(name,
 				s->objsize, SLAB_CACHE_DMA);
+			ret = alloc_cpumask_var(
+				&kmalloc_dma_caches[i]->cpus_with_slabs,
+				GFP_NOWAIT);
+			BUG_ON(!ret);
 		}
 	}
 #endif
@@ -3778,15 +3807,19 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 
 	s = kmalloc(kmem_size, GFP_KERNEL);
 	if (s) {
+
 		if (kmem_cache_open(s, n,
 				size, align, flags, ctor)) {
+			alloc_cpumask_var(&s->cpus_with_slabs, GFP_KERNEL);
 			list_add(&s->list, &slab_caches);
 			if (sysfs_slab_add(s)) {
 				list_del(&s->list);
+				free_cpumask_var(s->cpus_with_slabs);
 				kfree(n);
 				kfree(s);
 				goto err;
 			}
+
 			up_write(&slub_lock);
 			return s;
 		}
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH v2 5/6] slub: Only IPI CPUs that have per cpu obj to flush
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

flush_all() is called for each kmem_cahce_destroy(). So every cache
being destroyed dynamically ended up sending an IPI to each CPU in the
system, regardless if the cache has ever been used there.

For example, if you close the Infinband ipath driver char device file,
the close file ops calls kmem_cache_destroy(). So running some
infiniband config tool on one a single CPU dedicated to system tasks
might interrupt the rest of the 127 CPUs I dedicated to some CPU
intensive task.

I suspect there is a good chance that every line in the output of "git
grep kmem_cache_destroy linux/ | grep '\->'" has a similar scenario.

This patch attempts to rectify this issue by sending an IPI to flush
the per cpu objects back to the free lists only to CPUs that seems to
have such objects.

The check which CPU to IPI is racy but we don't care since
asking a CPU without per cpu objects to flush does no
damage and as far as I can tell the flush_all by itself is
racy against allocs on remote CPUs anyway, so if you meant
the flush_all to be determinstic, you had to arrange for
locking regardless.

Also note that it is fine for concurrent uses of the cpumask var
on different cpus since they end up tracking the same thing. The
only downside to a race is asking a CPU with not per cpu cache
to flush, which before this patch happens all the time any way.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/slub_def.h |    3 +++
 mm/slub.c                |   37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f58d641..b130f61 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -102,6 +102,9 @@ struct kmem_cache {
 	 */
 	int remote_node_defrag_ratio;
 #endif
+
+	/* Which CPUs hold local slabs for this cache. */
+	cpumask_var_t cpus_with_slabs;
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
diff --git a/mm/slub.c b/mm/slub.c
index 7c54fe8..f8cbf2d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1948,7 +1948,18 @@ static void flush_cpu_slab(void *d)
 
 static void flush_all(struct kmem_cache *s)
 {
-	on_each_cpu(flush_cpu_slab, s, 1);
+	struct kmem_cache_cpu *c;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		c = per_cpu_ptr(s->cpu_slab, cpu);
+		if (c && c->page)
+			cpumask_set_cpu(cpu, s->cpus_with_slabs);
+		else
+			cpumask_clear_cpu(cpu, s->cpus_with_slabs);
+	}
+
+	on_each_cpu_mask(s->cpus_with_slabs, flush_cpu_slab, s, 1);
 }
 
 /*
@@ -3028,6 +3039,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 		if (s->flags & SLAB_DESTROY_BY_RCU)
 			rcu_barrier();
 		sysfs_slab_remove(s);
+		free_cpumask_var(s->cpus_with_slabs);
 	}
 	up_write(&slub_lock);
 }
@@ -3528,6 +3540,7 @@ void __init kmem_cache_init(void)
 	int order;
 	struct kmem_cache *temp_kmem_cache_node;
 	unsigned long kmalloc_size;
+	int ret;
 
 	kmem_size = offsetof(struct kmem_cache, node) +
 				nr_node_ids * sizeof(struct kmem_cache_node *);
@@ -3635,15 +3648,24 @@ void __init kmem_cache_init(void)
 
 	slab_state = UP;
 
-	/* Provide the correct kmalloc names now that the caches are up */
+	/*
+	 * Provide the correct kmalloc names and the cpus_with_slabs cpumasks
+	 * for CONFIG_CPUMASK_OFFSTACK=y case now that the caches are up.
+	 */
 	if (KMALLOC_MIN_SIZE <= 32) {
 		kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[1]->name);
+		ret = alloc_cpumask_var(&kmalloc_caches[1]->cpus_with_slabs,
+			GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 	if (KMALLOC_MIN_SIZE <= 64) {
 		kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[2]->name);
+		ret = alloc_cpumask_var(&kmalloc_caches[2]->cpus_with_slabs,
+				GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
@@ -3651,6 +3673,9 @@ void __init kmem_cache_init(void)
 
 		BUG_ON(!s);
 		kmalloc_caches[i]->name = s;
+		ret = alloc_cpumask_var(&kmalloc_caches[i]->cpus_with_slabs,
+				GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 #ifdef CONFIG_SMP
@@ -3668,6 +3693,10 @@ void __init kmem_cache_init(void)
 			BUG_ON(!name);
 			kmalloc_dma_caches[i] = create_kmalloc_cache(name,
 				s->objsize, SLAB_CACHE_DMA);
+			ret = alloc_cpumask_var(
+				&kmalloc_dma_caches[i]->cpus_with_slabs,
+				GFP_NOWAIT);
+			BUG_ON(!ret);
 		}
 	}
 #endif
@@ -3778,15 +3807,19 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 
 	s = kmalloc(kmem_size, GFP_KERNEL);
 	if (s) {
+
 		if (kmem_cache_open(s, n,
 				size, align, flags, ctor)) {
+			alloc_cpumask_var(&s->cpus_with_slabs, GFP_KERNEL);
 			list_add(&s->list, &slab_caches);
 			if (sysfs_slab_add(s)) {
 				list_del(&s->list);
+				free_cpumask_var(s->cpus_with_slabs);
 				kfree(n);
 				kfree(s);
 				goto err;
 			}
+
 			up_write(&slub_lock);
 			return s;
 		}
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 4/6] mm: Only IPI CPUs to drain local pages if they exist
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

Use a cpumask to track CPUs with per-cpu pages in any zone
and only send an IPI requesting CPUs to drain these pages
to the buddy allocator if they actually have pages.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 mm/page_alloc.c |   64 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e8ecb6..9551b90 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,11 +57,17 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/percpu.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
+#include <asm/local.h>
 #include "internal.h"
 
+/* Which CPUs have per cpu pages  */
+cpumask_var_t cpus_with_pcp;
+static DEFINE_PER_CPU(unsigned long, total_cpu_pcp_count);
+
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DEFINE_PER_CPU(int, numa_node);
 EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -224,6 +230,36 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
+/*
+ * The following two functions track page counts both per zone/per CPU
+ * and globaly per CPU.
+ *
+ * They must be called with interrupts disabled and either pinned to specific
+ * CPU or for offline CPUs or under stop_machine.
+ */
+
+static inline void inc_pcp_count(int cpu, struct per_cpu_pages *pcp, int count)
+{
+	unsigned long *tot = &per_cpu(total_cpu_pcp_count, cpu);
+
+	if (unlikely(!*tot))
+		cpumask_set_cpu(cpu, cpus_with_pcp);
+
+	*tot += count;
+	pcp->count += count;
+}
+
+static inline void dec_pcp_count(int cpu, struct per_cpu_pages *pcp, int count)
+{
+	unsigned long *tot = &per_cpu(total_cpu_pcp_count, cpu);
+
+	pcp->count -= count;
+	*tot -= count;
+
+	if (unlikely(!*tot))
+		cpumask_clear_cpu(cpu, cpus_with_pcp);
+}
+
 static void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 
@@ -1072,7 +1108,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 	else
 		to_drain = pcp->count;
 	free_pcppages_bulk(zone, to_drain, pcp);
-	pcp->count -= to_drain;
+	dec_pcp_count(smp_processor_id(), pcp, to_drain);
 	local_irq_restore(flags);
 }
 #endif
@@ -1099,7 +1135,7 @@ static void drain_pages(unsigned int cpu)
 		pcp = &pset->pcp;
 		if (pcp->count) {
 			free_pcppages_bulk(zone, pcp->count, pcp);
-			pcp->count = 0;
+			dec_pcp_count(cpu, pcp, pcp->count);
 		}
 		local_irq_restore(flags);
 	}
@@ -1118,7 +1154,7 @@ void drain_local_pages(void *arg)
  */
 void drain_all_pages(void)
 {
-	on_each_cpu(drain_local_pages, NULL, 1);
+	on_each_cpu_mask(cpus_with_pcp, drain_local_pages, NULL, 1);
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -1166,7 +1202,7 @@ void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
-	int migratetype;
+	int migratetype, cpu;
 	int wasMlocked = __TestClearPageMlocked(page);
 
 	if (!free_pages_prepare(page, 0))
@@ -1194,15 +1230,16 @@ void free_hot_cold_page(struct page *page, int cold)
 		migratetype = MIGRATE_MOVABLE;
 	}
 
+	cpu = smp_processor_id();
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	if (cold)
 		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	else
 		list_add(&page->lru, &pcp->lists[migratetype]);
-	pcp->count++;
+	inc_pcp_count(cpu, pcp, 1);
 	if (pcp->count >= pcp->high) {
 		free_pcppages_bulk(zone, pcp->batch, pcp);
-		pcp->count -= pcp->batch;
+		dec_pcp_count(cpu, pcp, pcp->batch);
 	}
 
 out:
@@ -1305,9 +1342,10 @@ again:
 		pcp = &this_cpu_ptr(zone->pageset)->pcp;
 		list = &pcp->lists[migratetype];
 		if (list_empty(list)) {
-			pcp->count += rmqueue_bulk(zone, 0,
+			inc_pcp_count(smp_processor_id(), pcp,
+					rmqueue_bulk(zone, 0,
 					pcp->batch, list,
-					migratetype, cold);
+					migratetype, cold));
 			if (unlikely(list_empty(list)))
 				goto failed;
 		}
@@ -1318,7 +1356,7 @@ again:
 			page = list_entry(list->next, struct page, lru);
 
 		list_del(&page->lru);
-		pcp->count--;
+		dec_pcp_count(smp_processor_id(), pcp, 1);
 	} else {
 		if (unlikely(gfp_flags & __GFP_NOFAIL)) {
 			/*
@@ -3553,6 +3591,10 @@ static int zone_batchsize(struct zone *zone)
 #endif
 }
 
+/*
+ * NOTE: If you call this function on a pcp of a populated zone you
+ * need to worry about syncing cpus_with_pcp state as well.
+ */
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
 	struct per_cpu_pages *pcp;
@@ -3673,6 +3715,7 @@ static int __zone_pcp_update(void *data)
 
 		local_irq_save(flags);
 		free_pcppages_bulk(zone, pcp->count, pcp);
+		dec_pcp_count(cpu, pcp, pcp->count);
 		setup_pageset(pset, batch);
 		local_irq_restore(flags);
 	}
@@ -5040,6 +5083,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
 void __init page_alloc_init(void)
 {
 	hotcpu_notifier(page_alloc_cpu_notify, 0);
+
+	/* Allocate the cpus_with_pcp var if CONFIG_CPUMASK_OFFSTACK */
+	alloc_bootmem_cpumask_var(&cpus_with_pcp);
 }
 
 /*
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH v2 4/6] mm: Only IPI CPUs to drain local pages if they exist
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

Use a cpumask to track CPUs with per-cpu pages in any zone
and only send an IPI requesting CPUs to drain these pages
to the buddy allocator if they actually have pages.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 mm/page_alloc.c |   64 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e8ecb6..9551b90 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,11 +57,17 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/percpu.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
+#include <asm/local.h>
 #include "internal.h"
 
+/* Which CPUs have per cpu pages  */
+cpumask_var_t cpus_with_pcp;
+static DEFINE_PER_CPU(unsigned long, total_cpu_pcp_count);
+
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DEFINE_PER_CPU(int, numa_node);
 EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -224,6 +230,36 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
+/*
+ * The following two functions track page counts both per zone/per CPU
+ * and globaly per CPU.
+ *
+ * They must be called with interrupts disabled and either pinned to specific
+ * CPU or for offline CPUs or under stop_machine.
+ */
+
+static inline void inc_pcp_count(int cpu, struct per_cpu_pages *pcp, int count)
+{
+	unsigned long *tot = &per_cpu(total_cpu_pcp_count, cpu);
+
+	if (unlikely(!*tot))
+		cpumask_set_cpu(cpu, cpus_with_pcp);
+
+	*tot += count;
+	pcp->count += count;
+}
+
+static inline void dec_pcp_count(int cpu, struct per_cpu_pages *pcp, int count)
+{
+	unsigned long *tot = &per_cpu(total_cpu_pcp_count, cpu);
+
+	pcp->count -= count;
+	*tot -= count;
+
+	if (unlikely(!*tot))
+		cpumask_clear_cpu(cpu, cpus_with_pcp);
+}
+
 static void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 
@@ -1072,7 +1108,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 	else
 		to_drain = pcp->count;
 	free_pcppages_bulk(zone, to_drain, pcp);
-	pcp->count -= to_drain;
+	dec_pcp_count(smp_processor_id(), pcp, to_drain);
 	local_irq_restore(flags);
 }
 #endif
@@ -1099,7 +1135,7 @@ static void drain_pages(unsigned int cpu)
 		pcp = &pset->pcp;
 		if (pcp->count) {
 			free_pcppages_bulk(zone, pcp->count, pcp);
-			pcp->count = 0;
+			dec_pcp_count(cpu, pcp, pcp->count);
 		}
 		local_irq_restore(flags);
 	}
@@ -1118,7 +1154,7 @@ void drain_local_pages(void *arg)
  */
 void drain_all_pages(void)
 {
-	on_each_cpu(drain_local_pages, NULL, 1);
+	on_each_cpu_mask(cpus_with_pcp, drain_local_pages, NULL, 1);
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -1166,7 +1202,7 @@ void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
-	int migratetype;
+	int migratetype, cpu;
 	int wasMlocked = __TestClearPageMlocked(page);
 
 	if (!free_pages_prepare(page, 0))
@@ -1194,15 +1230,16 @@ void free_hot_cold_page(struct page *page, int cold)
 		migratetype = MIGRATE_MOVABLE;
 	}
 
+	cpu = smp_processor_id();
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	if (cold)
 		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	else
 		list_add(&page->lru, &pcp->lists[migratetype]);
-	pcp->count++;
+	inc_pcp_count(cpu, pcp, 1);
 	if (pcp->count >= pcp->high) {
 		free_pcppages_bulk(zone, pcp->batch, pcp);
-		pcp->count -= pcp->batch;
+		dec_pcp_count(cpu, pcp, pcp->batch);
 	}
 
 out:
@@ -1305,9 +1342,10 @@ again:
 		pcp = &this_cpu_ptr(zone->pageset)->pcp;
 		list = &pcp->lists[migratetype];
 		if (list_empty(list)) {
-			pcp->count += rmqueue_bulk(zone, 0,
+			inc_pcp_count(smp_processor_id(), pcp,
+					rmqueue_bulk(zone, 0,
 					pcp->batch, list,
-					migratetype, cold);
+					migratetype, cold));
 			if (unlikely(list_empty(list)))
 				goto failed;
 		}
@@ -1318,7 +1356,7 @@ again:
 			page = list_entry(list->next, struct page, lru);
 
 		list_del(&page->lru);
-		pcp->count--;
+		dec_pcp_count(smp_processor_id(), pcp, 1);
 	} else {
 		if (unlikely(gfp_flags & __GFP_NOFAIL)) {
 			/*
@@ -3553,6 +3591,10 @@ static int zone_batchsize(struct zone *zone)
 #endif
 }
 
+/*
+ * NOTE: If you call this function on a pcp of a populated zone you
+ * need to worry about syncing cpus_with_pcp state as well.
+ */
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
 	struct per_cpu_pages *pcp;
@@ -3673,6 +3715,7 @@ static int __zone_pcp_update(void *data)
 
 		local_irq_save(flags);
 		free_pcppages_bulk(zone, pcp->count, pcp);
+		dec_pcp_count(cpu, pcp, pcp->count);
 		setup_pageset(pset, batch);
 		local_irq_restore(flags);
 	}
@@ -5040,6 +5083,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
 void __init page_alloc_init(void)
 {
 	hotcpu_notifier(page_alloc_cpu_notify, 0);
+
+	/* Allocate the cpus_with_pcp var if CONFIG_CPUMASK_OFFSTACK */
+	alloc_bootmem_cpumask_var(&cpus_with_pcp);
 }
 
 /*
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 3/6] tile: Move tile to use generic on_each_cpu_mask
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

The API is the same as the tile private one, so just remove
the private version of the functions

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 arch/tile/include/asm/smp.h |    7 -------
 arch/tile/kernel/smp.c      |   19 -------------------
 2 files changed, 0 insertions(+), 26 deletions(-)

diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 532124a..1aa759a 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -43,10 +43,6 @@ void evaluate_message(int tag);
 /* Boot a secondary cpu */
 void online_secondary(void);
 
-/* Call a function on a specified set of CPUs (may include this one). */
-extern void on_each_cpu_mask(const struct cpumask *mask,
-			     void (*func)(void *), void *info, bool wait);
-
 /* Topology of the supervisor tile grid, and coordinates of boot processor */
 extern HV_Topology smp_topology;
 
@@ -91,9 +87,6 @@ void print_disabled_cpus(void);
 
 #else /* !CONFIG_SMP */
 
-#define on_each_cpu_mask(mask, func, info, wait)		\
-  do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0)
-
 #define smp_master_cpu		0
 #define smp_height		1
 #define smp_width		1
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index c52224d..a44e103 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -87,25 +87,6 @@ void send_IPI_allbutself(int tag)
 	send_IPI_many(&mask, tag);
 }
 
-
-/*
- * Provide smp_call_function_mask, but also run function locally
- * if specified in the mask.
- */
-void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
-		      void *info, bool wait)
-{
-	int cpu = get_cpu();
-	smp_call_function_many(mask, func, info, wait);
-	if (cpumask_test_cpu(cpu, mask)) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-	}
-	put_cpu();
-}
-
-
 /*
  * Functions related to starting/stopping cpus.
  */
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 3/6] tile: Move tile to use generic on_each_cpu_mask
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

The API is the same as the tile private one, so just remove
the private version of the functions

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 arch/tile/include/asm/smp.h |    7 -------
 arch/tile/kernel/smp.c      |   19 -------------------
 2 files changed, 0 insertions(+), 26 deletions(-)

diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 532124a..1aa759a 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -43,10 +43,6 @@ void evaluate_message(int tag);
 /* Boot a secondary cpu */
 void online_secondary(void);
 
-/* Call a function on a specified set of CPUs (may include this one). */
-extern void on_each_cpu_mask(const struct cpumask *mask,
-			     void (*func)(void *), void *info, bool wait);
-
 /* Topology of the supervisor tile grid, and coordinates of boot processor */
 extern HV_Topology smp_topology;
 
@@ -91,9 +87,6 @@ void print_disabled_cpus(void);
 
 #else /* !CONFIG_SMP */
 
-#define on_each_cpu_mask(mask, func, info, wait)		\
-  do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0)
-
 #define smp_master_cpu		0
 #define smp_height		1
 #define smp_width		1
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index c52224d..a44e103 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -87,25 +87,6 @@ void send_IPI_allbutself(int tag)
 	send_IPI_many(&mask, tag);
 }
 
-
-/*
- * Provide smp_call_function_mask, but also run function locally
- * if specified in the mask.
- */
-void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
-		      void *info, bool wait)
-{
-	int cpu = get_cpu();
-	smp_call_function_many(mask, func, info, wait);
-	if (cpumask_test_cpu(cpu, mask)) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-	}
-	put_cpu();
-}
-
-
 /*
  * Functions related to starting/stopping cpus.
  */
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH v2 2/6] arm: Move arm over to generic on_each_cpu_mask
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

Note the generic version has the mask as first parameter

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 arch/arm/kernel/smp_tlb.c |   20 +++++---------------
 1 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 7dcb352..02c5d2c 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -13,18 +13,6 @@
 #include <asm/smp_plat.h>
 #include <asm/tlbflush.h>
 
-static void on_each_cpu_mask(void (*func)(void *), void *info, int wait,
-	const struct cpumask *mask)
-{
-	preempt_disable();
-
-	smp_call_function_many(mask, func, info, wait);
-	if (cpumask_test_cpu(smp_processor_id(), mask))
-		func(info);
-
-	preempt_enable();
-}
-
 /**********************************************************************/
 
 /*
@@ -87,7 +75,7 @@ void flush_tlb_all(void)
 void flush_tlb_mm(struct mm_struct *mm)
 {
 	if (tlb_ops_need_broadcast())
-		on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
+		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
 	else
 		local_flush_tlb_mm(mm);
 }
@@ -98,7 +86,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 		struct tlb_args ta;
 		ta.ta_vma = vma;
 		ta.ta_start = uaddr;
-		on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
+		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
+					&ta, 1);
 	} else
 		local_flush_tlb_page(vma, uaddr);
 }
@@ -121,7 +110,8 @@ void flush_tlb_range(struct vm_area_struct *vma,
 		ta.ta_vma = vma;
 		ta.ta_start = start;
 		ta.ta_end = end;
-		on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
+		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+					&ta, 1);
 	} else
 		local_flush_tlb_range(vma, start, end);
 }
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH v2 2/6] arm: Move arm over to generic on_each_cpu_mask
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

Note the generic version has the mask as first parameter

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 arch/arm/kernel/smp_tlb.c |   20 +++++---------------
 1 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 7dcb352..02c5d2c 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -13,18 +13,6 @@
 #include <asm/smp_plat.h>
 #include <asm/tlbflush.h>
 
-static void on_each_cpu_mask(void (*func)(void *), void *info, int wait,
-	const struct cpumask *mask)
-{
-	preempt_disable();
-
-	smp_call_function_many(mask, func, info, wait);
-	if (cpumask_test_cpu(smp_processor_id(), mask))
-		func(info);
-
-	preempt_enable();
-}
-
 /**********************************************************************/
 
 /*
@@ -87,7 +75,7 @@ void flush_tlb_all(void)
 void flush_tlb_mm(struct mm_struct *mm)
 {
 	if (tlb_ops_need_broadcast())
-		on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
+		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
 	else
 		local_flush_tlb_mm(mm);
 }
@@ -98,7 +86,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 		struct tlb_args ta;
 		ta.ta_vma = vma;
 		ta.ta_start = uaddr;
-		on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
+		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
+					&ta, 1);
 	} else
 		local_flush_tlb_page(vma, uaddr);
 }
@@ -121,7 +110,8 @@ void flush_tlb_range(struct vm_area_struct *vma,
 		ta.ta_vma = vma;
 		ta.ta_start = start;
 		ta.ta_end = end;
-		on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
+		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+					&ta, 1);
 	} else
 		local_flush_tlb_range(vma, start, end);
 }
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 1/6] smp: Introduce a generic on_each_cpu_mask function
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

on_each_cpu_mask calls a function on processors specified my cpumask,
which may include the local processor.

All the limitation specified in smp_call_function_many apply.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/smp.h |   16 ++++++++++++++++
 kernel/smp.c        |   20 ++++++++++++++++++++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 8cc38d3..60628d7 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -102,6 +102,13 @@ static inline void call_function_init(void) { }
 int on_each_cpu(smp_call_func_t func, void *info, int wait);
 
 /*
+ * Call a function on processors specified by mask, which might include
+ * the local one.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
+		void *info, bool wait);
+
+/*
  * Mark the boot cpu "online" so that it can call console drivers in
  * printk() and can access its per-cpu storage.
  */
@@ -132,6 +139,15 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)
 		local_irq_enable();		\
 		0;				\
 	})
+#define on_each_cpu_mask(mask, func, info, wait) \
+	do {						\
+		if (cpumask_test_cpu(0, (mask))) {	\
+			local_irq_disable();		\
+			(func)(info);			\
+			local_irq_enable();		\
+		}					\
+	} while (0)
+
 static inline void smp_send_reschedule(int cpu) { }
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
diff --git a/kernel/smp.c b/kernel/smp.c
index fb67dfa..df37c08 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -701,3 +701,23 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
 	return ret;
 }
 EXPORT_SYMBOL(on_each_cpu);
+
+/*
+ * Call a function on processors specified by cpumask, which may include
+ * the local processor. All the limitation specified in smp_call_function_many
+ * apply.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
+			void *info, bool wait)
+{
+	int cpu = get_cpu();
+
+	smp_call_function_many(mask, func, info, wait);
+	if (cpumask_test_cpu(cpu, mask)) {
+		local_irq_disable();
+		func(info);
+		local_irq_enable();
+	}
+	put_cpu();
+}
+EXPORT_SYMBOL(on_each_cpu_mask);
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH v2 1/6] smp: Introduce a generic on_each_cpu_mask function
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319385413-29665-1-git-send-email-gilad@benyossef.com>

on_each_cpu_mask calls a function on processors specified my cpumask,
which may include the local processor.

All the limitation specified in smp_call_function_many apply.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/smp.h |   16 ++++++++++++++++
 kernel/smp.c        |   20 ++++++++++++++++++++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 8cc38d3..60628d7 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -102,6 +102,13 @@ static inline void call_function_init(void) { }
 int on_each_cpu(smp_call_func_t func, void *info, int wait);
 
 /*
+ * Call a function on processors specified by mask, which might include
+ * the local one.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
+		void *info, bool wait);
+
+/*
  * Mark the boot cpu "online" so that it can call console drivers in
  * printk() and can access its per-cpu storage.
  */
@@ -132,6 +139,15 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)
 		local_irq_enable();		\
 		0;				\
 	})
+#define on_each_cpu_mask(mask, func, info, wait) \
+	do {						\
+		if (cpumask_test_cpu(0, (mask))) {	\
+			local_irq_disable();		\
+			(func)(info);			\
+			local_irq_enable();		\
+		}					\
+	} while (0)
+
 static inline void smp_send_reschedule(int cpu) { }
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
diff --git a/kernel/smp.c b/kernel/smp.c
index fb67dfa..df37c08 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -701,3 +701,23 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
 	return ret;
 }
 EXPORT_SYMBOL(on_each_cpu);
+
+/*
+ * Call a function on processors specified by cpumask, which may include
+ * the local processor. All the limitation specified in smp_call_function_many
+ * apply.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
+			void *info, bool wait)
+{
+	int cpu = get_cpu();
+
+	smp_call_function_many(mask, func, info, wait);
+	if (cpumask_test_cpu(cpu, mask)) {
+		local_irq_disable();
+		func(info);
+		local_irq_enable();
+	}
+	put_cpu();
+}
+EXPORT_SYMBOL(on_each_cpu_mask);
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 0/6] Reduce cross CPU IPI interference
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin

We have lots of infrastructure in place to partition a multi-core system such
that we have a group of CPUs that are dedicated to specific task: cgroups,
scheduler and interrupt affinity and cpuisol boot parameter. Still, kernel
code will some time interrupt all CPUs in the system via IPIs for various
needs. These IPIs are useful and cannot be avoided altogether, but in certain
cases it is possible to interrupt only specific CPUs that have useful work to
do and not the entire system.

This patch set, inspired by discussions with Peter Zijlstra and Frederic
Weisbecker when testing the nohz task patch set, is a first stab at trying to
explore doing this by locating the places where such global IPI calls are
being made and turning a global IPI into an IPI for a specific group of CPUs.
The purpose of the patch set is to get  feedback if this is the right way to
go for dealing with this issue and indeed, if the issue is even worth dealing
with at all.

The patch creates an on_each_cpu_mask infrastructure API (derived from
existing arch specific versions in Tile and Arm) and uses it to turn two global
IPI invocation to per CPU group invocations.

This second version incorporates changes due to reviewers feedback and 
additional testing. The major changes from the previous version of the patch 
are:

- Better description for some of the patches with examples of what I am
  trying to solve.
- Better coding style for on_each_cpu based on review comments by Peter 
  Zijlstra and Sasha Levin.
- Fixed pcp_count handling to take into account which cpu the accounting 
  is done for. Sadly, AFAIK this negates using this_cpu_add/sub as 
  suggested by Peter Z.
- Removed kmalloc from the flush_all() path as per review comment by 
  Pekka Enberg.
- Moved cpumask allocations for CONFIG_CPUMASK_OFFSTACK=y to a point previous
  to first use during boot as testing revealed we no longer boot under 
  CONFIG_CPUMASK_OFFSTACK=y with original code.

The patch was compiled for arm and boot tested on x86 in UP, SMP, with and without 
CONFIG_CPUMASK_OFFSTACK and was further tested by running hackbench on x86 in 
SMP mode in a 4 CPUs VM for several hours with no obvious regressions.

I also artificially exercised SLUB flush_all via the debug interface and observed 
the difference in IPI count across processors  with and without the patch - from 
an IPI on all processors but one without the patch to a subset (and often no IPI 
at all) with the patch.


Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>

Gilad Ben-Yossef (6):
  smp: Introduce a generic on_each_cpu_mask function
  arm: Move arm over to generic on_each_cpu_mask
  tile: Move tile to use generic on_each_cpu_mask
  mm: Only IPI CPUs to drain local pages if they exist
  slub: Only IPI CPUs that have per cpu obj to flush
  slub: only preallocate cpus_with_slabs if offstack

 arch/arm/kernel/smp_tlb.c   |   20 +++----------
 arch/tile/include/asm/smp.h |    7 -----
 arch/tile/kernel/smp.c      |   19 -------------
 include/linux/slub_def.h    |    9 ++++++
 include/linux/smp.h         |   16 +++++++++++
 kernel/smp.c                |   20 +++++++++++++
 mm/page_alloc.c             |   64 +++++++++++++++++++++++++++++++++++++------
 mm/slub.c                   |   61 +++++++++++++++++++++++++++++++++++++++-
 8 files changed, 164 insertions(+), 52 deletions(-)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [PATCH v2 0/6] Reduce cross CPU IPI interference
From: Gilad Ben-Yossef @ 2011-10-23 15:56 UTC (permalink / raw)
  To: linux-kernel
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin

We have lots of infrastructure in place to partition a multi-core system such
that we have a group of CPUs that are dedicated to specific task: cgroups,
scheduler and interrupt affinity and cpuisol boot parameter. Still, kernel
code will some time interrupt all CPUs in the system via IPIs for various
needs. These IPIs are useful and cannot be avoided altogether, but in certain
cases it is possible to interrupt only specific CPUs that have useful work to
do and not the entire system.

This patch set, inspired by discussions with Peter Zijlstra and Frederic
Weisbecker when testing the nohz task patch set, is a first stab at trying to
explore doing this by locating the places where such global IPI calls are
being made and turning a global IPI into an IPI for a specific group of CPUs.
The purpose of the patch set is to get  feedback if this is the right way to
go for dealing with this issue and indeed, if the issue is even worth dealing
with at all.

The patch creates an on_each_cpu_mask infrastructure API (derived from
existing arch specific versions in Tile and Arm) and uses it to turn two global
IPI invocation to per CPU group invocations.

This second version incorporates changes due to reviewers feedback and 
additional testing. The major changes from the previous version of the patch 
are:

- Better description for some of the patches with examples of what I am
  trying to solve.
- Better coding style for on_each_cpu based on review comments by Peter 
  Zijlstra and Sasha Levin.
- Fixed pcp_count handling to take into account which cpu the accounting 
  is done for. Sadly, AFAIK this negates using this_cpu_add/sub as 
  suggested by Peter Z.
- Removed kmalloc from the flush_all() path as per review comment by 
  Pekka Enberg.
- Moved cpumask allocations for CONFIG_CPUMASK_OFFSTACK=y to a point previous
  to first use during boot as testing revealed we no longer boot under 
  CONFIG_CPUMASK_OFFSTACK=y with original code.

The patch was compiled for arm and boot tested on x86 in UP, SMP, with and without 
CONFIG_CPUMASK_OFFSTACK and was further tested by running hackbench on x86 in 
SMP mode in a 4 CPUs VM for several hours with no obvious regressions.

I also artificially exercised SLUB flush_all via the debug interface and observed 
the difference in IPI count across processors  with and without the patch - from 
an IPI on all processors but one without the patch to a subset (and often no IPI 
at all) with the patch.


Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>

Gilad Ben-Yossef (6):
  smp: Introduce a generic on_each_cpu_mask function
  arm: Move arm over to generic on_each_cpu_mask
  tile: Move tile to use generic on_each_cpu_mask
  mm: Only IPI CPUs to drain local pages if they exist
  slub: Only IPI CPUs that have per cpu obj to flush
  slub: only preallocate cpus_with_slabs if offstack

 arch/arm/kernel/smp_tlb.c   |   20 +++----------
 arch/tile/include/asm/smp.h |    7 -----
 arch/tile/kernel/smp.c      |   19 -------------
 include/linux/slub_def.h    |    9 ++++++
 include/linux/smp.h         |   16 +++++++++++
 kernel/smp.c                |   20 +++++++++++++
 mm/page_alloc.c             |   64 +++++++++++++++++++++++++++++++++++++------
 mm/slub.c                   |   61 +++++++++++++++++++++++++++++++++++++++-
 8 files changed, 164 insertions(+), 52 deletions(-)


^ permalink raw reply

* Re: [PATCH 1/2] nfs: writeback pages wait queue
From: Wu Fengguang @ 2011-10-23 15:54 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel@vger.kernel.org, Andrew Morton,
	Jan Kara, Christoph Hellwig, Dave Chinner, Greg Thelen,
	Minchan Kim, Vivek Goyal, Andrea Righi, linux-mm, LKML
In-Reply-To: <20111020160530.GC7054@localhost>

[-- Attachment #1: Type: text/plain, Size: 808 bytes --]

On Fri, Oct 21, 2011 at 12:05:30AM +0800, Wu Fengguang wrote:
> Trond,
> 
> After applying these two patches, the IO-less patchset performances
> 45% better than the vanilla kernel and the average commit size only
> decreases by -16% in the common NFS-thresh=1G/nfs-1dd case :)

To better understand how the NFS writeback wait queue helps, I
visualized the network traffic over time. Attached are the graphs for
the vanilla kernel and the one with the IO-less + NFS wait queue
patches.

nfs-1dd-4k-32p-32016M-1024M:10-3.1.0-rc8-vanilla+/dstat-bw.png
nfs-1dd-4k-32p-31951M-1024M:10-3.1.0-rc8-nfs-wq4+/dstat-bw.png

The obvious difference is, the network traffic become now more
distributed and the "zero traffic" periods are mostly reduced.

The other 2dd, 10dd cases have similar results.

Thanks,
Fengguang

[-- Attachment #2: dstat-bw.png --]
[-- Type: image/png, Size: 23197 bytes --]

[-- Attachment #3: dstat-bw.png --]
[-- Type: image/png, Size: 22149 bytes --]

^ permalink raw reply

* Re: [Qemu-devel] [PATCH 1/1] Introduce a new bus "ICC" to connect APIC
From: Blue Swirl @ 2011-10-23 15:54 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: aliguori, pingfank, qemu-devel
In-Reply-To: <4EA436A6.4050702@web.de>

On Sun, Oct 23, 2011 at 15:45, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2011-10-23 14:40, Blue Swirl wrote:
>> I'm not sure that a full bus is needed for now, even if it could match
>> real HW better, since the memory API already provides the separation
>> needed. Perhaps this would be needed later to make IRQs per-CPU also,
>> or to put IOAPIC also to the bus?
>
> The ICC interconnects LAPICs and IOAPICs.

But not between CPU core and its LAPIC?

> So it should next take over
> the management of the local_apics array from apic.c and the ioapics
> array from ioapic.c. It could implement generic message delivery
> services. Every bus participant would then have a reception handler that
> first checks the type and additional fields of a generic ICC message
> and, on match, forwards it to the corresponding device model functions.
> That would allow for something nicer than global apic_deliver_irq or
> ioapic_eoi_broadcast.
>
> That's clearly beyond the scope of this series but a good reason to
> model the ICC as accurately as qdev allows right from the start.

OK then, ICC could be a major cleanup.

^ permalink raw reply

* [ath9k-devel] Can't associate with a particular AP
From: Adrian Chadd @ 2011-10-23 15:54 UTC (permalink / raw)
  To: ath9k-devel
In-Reply-To: <20111023150240.882.qmail@stuge.se>

I'm kind of surprised that you're seeing different AP MACs, where did
you take that capture from?



Adrian

^ permalink raw reply

* Re: [RFC/PATCH 2/2] sparse: Use native sizes for data types
From: Jeff Garzik @ 2011-10-23 15:53 UTC (permalink / raw)
  To: penberg
  Cc: linux-sparse, Pekka Enberg, Christopher Li, Jeff Garzik,
	Linus Torvalds
In-Reply-To: <1319373420-8967-2-git-send-email-penberg@cs.helsinki.fi>

On 10/23/2011 08:37 AM, penberg@cs.helsinki.fi wrote:
> From: Pekka Enberg<penberg@kernel.org>
>
> This patch is needed to fix the sparsec LLVM backend data type sizes.
>
> Cc: Christopher Li<sparse@chrisli.org>
> Cc: Jeff Garzik<jgarzik@redhat.com>
> Cc: Linus Torvalds<torvalds@linux-foundation.org>
> Signed-off-by: Pekka Enberg<penberg@kernel.org>
> ---
>   target.c |   10 +++++-----
>   1 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/target.c b/target.c
> index 6a535bc..009002f 100644
> --- a/target.c
> +++ b/target.c
> @@ -17,9 +17,9 @@ int max_alignment = 16;
>   int bits_in_bool = 8;
>   int bits_in_char = 8;
>   int bits_in_short = 16;
> -int bits_in_int = 32;
> -int bits_in_long = 32;
> -int bits_in_longlong = 64;
> +int bits_in_int = sizeof(int) * 8;
> +int bits_in_long = sizeof(long) * 8;
> +int bits_in_longlong = sizeof(long long) * 8;
>   int bits_in_longlonglong = 128;
>
>   int max_int_alignment = 4;
> @@ -36,8 +36,8 @@ int max_fp_alignment = 8;
>   /*
>    * Pointer data type
>    */
> -int bits_in_pointer = 32;
> -int pointer_alignment = 4;
> +int bits_in_pointer = sizeof(void *) * 8;
> +int pointer_alignment = sizeof(void *);

No objection, but ideally we should select from a target template.

We don't want to start down the road of making runtime target switching 
(i386/x86-64) difficult.

	Jeff



^ permalink raw reply

* Re: [patch net-next V2] net: introduce ethernet teaming device
From: Jiri Pirko @ 2011-10-23 15:50 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, davem, bhutchings, shemminger, fubar, andy, tgraf,
	ebiederm, mirqus, kaber, greearb, jesse, fbl, benjamin.poirier,
	jzupka
In-Reply-To: <1319380668.27507.19.camel@edumazet-laptop>

Sun, Oct 23, 2011 at 04:37:48PM CEST, eric.dumazet@gmail.com wrote:
>Le dimanche 23 octobre 2011 à 14:51 +0200, Jiri Pirko a écrit :
>
>> Yes. And team->mode_ops.receive can change only after synchronize_rcu is
>> done. It's not possible it changes within the window you are talking about.
>
>If it was true, you would not need the synchronize_rcu() call you added
>in __team_change_mode() :
>
>
>----------------------------------------------------------------------
>
>static int __team_change_mode(struct team *team,
>                             const struct team_mode *new_mode)
>{
>       /* Check if mode was previously set and do cleanup if so */
>       if (team->mode_kind) {
>               void (*exit_op)(struct team *team) = team->mode_ops.exit;
>
>               /* Clear ops area so no callback is called any longer */
>               team_mode_ops_clear(&team->mode_ops);
>
>               synchronize_rcu();
>
>               if (exit_op)
>                       exit_op(team);
>
>
>-----------------------------------------------------------------------
>
>So the question is : Why do you have this synchronize_rcu() call here ?

You are right. This call is redundant here. I'll remove it.
This also means I can use memset & memcpy for mode_ops after all.

I'll change that and add comment about this locking situation to avoid
confusion.

Thanks a lot Eric!

Jirka

>
>
>

^ permalink raw reply

* Re: [RFC][PATCH 0/2] PM / Sleep: Extended control of suspend/hibernate interfaces
From: Alan Stern @ 2011-10-23 15:50 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: NeilBrown, Linux PM list, mark gross, LKML, John Stultz
In-Reply-To: <201110230007.33683.rjw@sisk.pl>

On Sun, 23 Oct 2011, Rafael J. Wysocki wrote:

> Moreover, the race is real, because if you have two processes trying to use
> /sys/power/wakeup_count at the same time, you can get:
> 
> Process A		Process B
> read from wakeup_count
> talk to apps
> write to wakeup_count
> --------- wakeup event ----------
> 			read from wakeup_count
> 			talk to apps
> 			write to wakeup_count
> try to suspend -> success (should be failure, because the wakeup event
> may still be processed by applications at this point and Process A hasn't
> checked that).
> 
> Now, there are systems running two (or more) desktop environments each of
> which has a power manager that may want to suspend on it's own.  They both
> will probably use pm-utils, but then I somehow doubt that pm-utils is well
> prepared to handle such concurrency.

I have no objection to adding a kernel-based mechanism for restricting
the suspend interface to one process at a time.  However, that's just
part of your most recent proposal.  The other part involves
coordinating the requirements of all the processes that may want to
prevent the system from suspending, which is a harder job.


> I have one more rule.  If my would-be user space solution has the following
> properties:
> 
> * It is supposed to be used by all of the existing variants of user space
>   (i.e. all existing variants of user space are expected to use the very same
>   thing).
> 
> * It requires all of those user space variants to be modified to work with it
>   correctly.
> 
> * It includes a daemon process having to be started on boot and run permanently.
> 
> then it likely is better to handle the problem in the kernel.

This reasoning doesn't apply to the second problem of allowing
processes to block suspend.  Whether the solution is implemented in the
kernel or as a daemon, other programs will have to be modified to
accomodate it.

In fact, if it's done properly then these other programs should each
need only a single set of modifications; the differences involved in 
communicating with the kernel vs. a daemon could be encapsulated in a 
shared library.


Overall, I think the discussion is getting a little muddled because of
a significant problem that has not yet been addressed sufficiently.

There is a big difference between Android's kernel wakelocks and the
currently proposed use of wakeup_sources.  In Android, a kernel
wakelock associated with an input device isn't released until the
device's queue becomes empty, whereas we have been talking about
releasing the corresponding wakeup_source as soon as data added to
the queue becomes visible to userspace.

This is quite a significant difference.  It means there's a window of
time (from when the data is added to the queue to when it is removed)  
during which userspace is forced to cope with suspend races, instead of
letting the kernel handle things.  This is what leads to our problems
about sending fd's to the daemon process and sending a request to each
client before the daemon starts a suspend.

(Other aspects of this problem that haven't been mentioned before: What
happens when a client program using the notify-fd API wants to close
one of the wakeup-capable fd's?  It would have to tell the daemon to
close its copy of the fd as well.  And likewise, a client would have to 
inform the daemon whenever it opened a new wakeup-capable device file.)

Now, in the end, I think our approach makes more sense in a general 
setting.  The Android approach is okay for a restricted environment 
where you know beforehand exactly which devices will be wakeup-capable 
and which wakeup events will be monitored by userspace programs.  But 
for the whole range of Linux-based systems, the kernel can't rely on 
such information.

(If you think back to the original wakelock patches, for example,
you'll remember that the patch descriptions were expressed in terms of
what happens as the screen is turned on and off.  Obviously this is
meaningless for systems that, unlike an Android phone, don't have a
built-in screen.  I complained about this at the time, and the Android
people seemed to have a hard time understanding what I was objecting
to.)

So this is really our biggest problem.  If we can figure out a really
good way to solve it, I predict we'll find that the kernel-based and
daemon-based suspend solutions are extremely similar.

Alan Stern


^ permalink raw reply

* [PATCH v2 6/6] slub: only preallocate cpus_with_slabs if offstack
From: Gilad Ben-Yossef @ 2011-10-23 15:48 UTC (permalink / raw)
  To: lkml
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319384922-29632-1-git-send-email-gilad@benyossef.com>

We need a cpumask to track cpus with per cpu cache pages
to know which cpu to whack during flush_all. For
CONFIG_CPUMASK_OFFSTACK=n we allocate the mask on stack.
For CONFIG_CPUMASK_OFFSTACK=y we don't want to call kmalloc
on the flush_all path, so we preallocate per kmem_cache
on cache creation and use it in flush_all.

The result is that for the common CONFIG_CPUMASK_OFFSTACK=n
case there is no memory overhead for the mask var.

Since systems where CONFIG_CPUMASK_OFFSTACK=y are the systems
which are most likely to benefit from less IPIs by tracking
which cpu pas actually has a per cpu cache, we end up paying
the overhead only in cases we enjoy the upside.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/slub_def.h |    8 ++++++-
 mm/slub.c                |   52 +++++++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index b130f61..c07f7aa 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -103,8 +103,14 @@ struct kmem_cache {
 	int remote_node_defrag_ratio;
 #endif
 
-	/* Which CPUs hold local slabs for this cache. */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	/*
+	 * Which CPUs hold local slabs for this cache.
+	 * Only updated on calling flush_all().
+	 * Defined on stack for CONFIG_CPUMASK_OFFSTACK=n.
+	 */
 	cpumask_var_t cpus_with_slabs;
+#endif
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
diff --git a/mm/slub.c b/mm/slub.c
index f8cbf2d..765be95 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1946,20 +1946,48 @@ static void flush_cpu_slab(void *d)
 	__flush_cpu_slab(s, smp_processor_id());
 }
 
+/*
+ * We need a cpumask struct to track which cpus have
+ * per cpu caches. For CONFIG_CPUMASK_OFFSTACK=n we
+ * allocate on stack. For CONFIG_CPUMASK_OFFSTACK=y
+ * we don't want to allocate in the flush_all code path
+ * so we allocate a struct for each cache structure
+ * on kmem cache creation and use it here.
+ */
 static void flush_all(struct kmem_cache *s)
 {
 	struct kmem_cache_cpu *c;
 	int cpu;
+	cpumask_var_t cpus;
 
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	cpus = s->cpus_with_slabs;
+#endif
 	for_each_online_cpu(cpu) {
 		c = per_cpu_ptr(s->cpu_slab, cpu);
 		if (c && c->page)
-			cpumask_set_cpu(cpu, s->cpus_with_slabs);
+			cpumask_set_cpu(cpu, cpus);
 		else
-			cpumask_clear_cpu(cpu, s->cpus_with_slabs);
+			cpumask_clear_cpu(cpu, cpus);
 	}
 
-	on_each_cpu_mask(s->cpus_with_slabs, flush_cpu_slab, s, 1);
+	on_each_cpu_mask(cpus, flush_cpu_slab, s, 1);
+}
+
+static inline int alloc_cpus_mask(struct kmem_cache *s, int flags)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	return alloc_cpumask_var(&s->cpus_with_slabs, flags);
+#else
+	return 1;
+#endif
+}
+
+static inline void free_cpus_mask(struct kmem_cache *s)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	free_cpumask_var(s->cpus_with_slabs);
+#endif
 }
 
 /*
@@ -3039,7 +3067,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 		if (s->flags & SLAB_DESTROY_BY_RCU)
 			rcu_barrier();
 		sysfs_slab_remove(s);
-		free_cpumask_var(s->cpus_with_slabs);
+		free_cpus_mask(s);
 	}
 	up_write(&slub_lock);
 }
@@ -3655,16 +3683,14 @@ void __init kmem_cache_init(void)
 	if (KMALLOC_MIN_SIZE <= 32) {
 		kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[1]->name);
-		ret = alloc_cpumask_var(&kmalloc_caches[1]->cpus_with_slabs,
-			GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[1], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
 	if (KMALLOC_MIN_SIZE <= 64) {
 		kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[2]->name);
-		ret = alloc_cpumask_var(&kmalloc_caches[2]->cpus_with_slabs,
-				GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[2], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
@@ -3673,8 +3699,7 @@ void __init kmem_cache_init(void)
 
 		BUG_ON(!s);
 		kmalloc_caches[i]->name = s;
-		ret = alloc_cpumask_var(&kmalloc_caches[i]->cpus_with_slabs,
-				GFP_NOWAIT);
+		ret = alloc_cpus_mask(kmalloc_caches[i], GFP_NOWAIT);
 		BUG_ON(!ret);
 	}
 
@@ -3693,8 +3718,7 @@ void __init kmem_cache_init(void)
 			BUG_ON(!name);
 			kmalloc_dma_caches[i] = create_kmalloc_cache(name,
 				s->objsize, SLAB_CACHE_DMA);
-			ret = alloc_cpumask_var(
-				&kmalloc_dma_caches[i]->cpus_with_slabs,
+			ret = alloc_cpus_mask(kmalloc_dma_caches[i],
 				GFP_NOWAIT);
 			BUG_ON(!ret);
 		}
@@ -3810,11 +3834,11 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 
 		if (kmem_cache_open(s, n,
 				size, align, flags, ctor)) {
-			alloc_cpumask_var(&s->cpus_with_slabs, GFP_KERNEL);
+			alloc_cpus_mask(s, GFP_KERNEL);
 			list_add(&s->list, &slab_caches);
 			if (sysfs_slab_add(s)) {
 				list_del(&s->list);
-				free_cpumask_var(s->cpus_with_slabs);
+				free_cpus_mask(s);
 				kfree(n);
 				kfree(s);
 				goto err;
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 5/6] slub: Only IPI CPUs that have per cpu obj to flush
From: Gilad Ben-Yossef @ 2011-10-23 15:48 UTC (permalink / raw)
  To: lkml
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319384922-29632-1-git-send-email-gilad@benyossef.com>

flush_all() is called for each kmem_cahce_destroy(). So every cache
being destroyed dynamically ended up sending an IPI to each CPU in the
system, regardless if the cache has ever been used there.

For example, if you close the Infinband ipath driver char device file,
the close file ops calls kmem_cache_destroy(). So running some
infiniband config tool on one a single CPU dedicated to system tasks
might interrupt the rest of the 127 CPUs I dedicated to some CPU
intensive task.

I suspect there is a good chance that every line in the output of "git
grep kmem_cache_destroy linux/ | grep '\->'" has a similar scenario.

This patch attempts to rectify this issue by sending an IPI to flush
the per cpu objects back to the free lists only to CPUs that seems to
have such objects.

The check which CPU to IPI is racy but we don't care since
asking a CPU without per cpu objects to flush does no
damage and as far as I can tell the flush_all by itself is
racy against allocs on remote CPUs anyway, so if you meant
the flush_all to be determinstic, you had to arrange for
locking regardless.

Also note that it is fine for concurrent uses of the cpumask var
on different cpus since they end up tracking the same thing. The
only downside to a race is asking a CPU with not per cpu cache
to flush, which before this patch happens all the time any way.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/slub_def.h |    3 +++
 mm/slub.c                |   37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f58d641..b130f61 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -102,6 +102,9 @@ struct kmem_cache {
 	 */
 	int remote_node_defrag_ratio;
 #endif
+
+	/* Which CPUs hold local slabs for this cache. */
+	cpumask_var_t cpus_with_slabs;
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
diff --git a/mm/slub.c b/mm/slub.c
index 7c54fe8..f8cbf2d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1948,7 +1948,18 @@ static void flush_cpu_slab(void *d)
 
 static void flush_all(struct kmem_cache *s)
 {
-	on_each_cpu(flush_cpu_slab, s, 1);
+	struct kmem_cache_cpu *c;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		c = per_cpu_ptr(s->cpu_slab, cpu);
+		if (c && c->page)
+			cpumask_set_cpu(cpu, s->cpus_with_slabs);
+		else
+			cpumask_clear_cpu(cpu, s->cpus_with_slabs);
+	}
+
+	on_each_cpu_mask(s->cpus_with_slabs, flush_cpu_slab, s, 1);
 }
 
 /*
@@ -3028,6 +3039,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 		if (s->flags & SLAB_DESTROY_BY_RCU)
 			rcu_barrier();
 		sysfs_slab_remove(s);
+		free_cpumask_var(s->cpus_with_slabs);
 	}
 	up_write(&slub_lock);
 }
@@ -3528,6 +3540,7 @@ void __init kmem_cache_init(void)
 	int order;
 	struct kmem_cache *temp_kmem_cache_node;
 	unsigned long kmalloc_size;
+	int ret;
 
 	kmem_size = offsetof(struct kmem_cache, node) +
 				nr_node_ids * sizeof(struct kmem_cache_node *);
@@ -3635,15 +3648,24 @@ void __init kmem_cache_init(void)
 
 	slab_state = UP;
 
-	/* Provide the correct kmalloc names now that the caches are up */
+	/*
+	 * Provide the correct kmalloc names and the cpus_with_slabs cpumasks
+	 * for CONFIG_CPUMASK_OFFSTACK=y case now that the caches are up.
+	 */
 	if (KMALLOC_MIN_SIZE <= 32) {
 		kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[1]->name);
+		ret = alloc_cpumask_var(&kmalloc_caches[1]->cpus_with_slabs,
+			GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 	if (KMALLOC_MIN_SIZE <= 64) {
 		kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
 		BUG_ON(!kmalloc_caches[2]->name);
+		ret = alloc_cpumask_var(&kmalloc_caches[2]->cpus_with_slabs,
+				GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
@@ -3651,6 +3673,9 @@ void __init kmem_cache_init(void)
 
 		BUG_ON(!s);
 		kmalloc_caches[i]->name = s;
+		ret = alloc_cpumask_var(&kmalloc_caches[i]->cpus_with_slabs,
+				GFP_NOWAIT);
+		BUG_ON(!ret);
 	}
 
 #ifdef CONFIG_SMP
@@ -3668,6 +3693,10 @@ void __init kmem_cache_init(void)
 			BUG_ON(!name);
 			kmalloc_dma_caches[i] = create_kmalloc_cache(name,
 				s->objsize, SLAB_CACHE_DMA);
+			ret = alloc_cpumask_var(
+				&kmalloc_dma_caches[i]->cpus_with_slabs,
+				GFP_NOWAIT);
+			BUG_ON(!ret);
 		}
 	}
 #endif
@@ -3778,15 +3807,19 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 
 	s = kmalloc(kmem_size, GFP_KERNEL);
 	if (s) {
+
 		if (kmem_cache_open(s, n,
 				size, align, flags, ctor)) {
+			alloc_cpumask_var(&s->cpus_with_slabs, GFP_KERNEL);
 			list_add(&s->list, &slab_caches);
 			if (sysfs_slab_add(s)) {
 				list_del(&s->list);
+				free_cpumask_var(s->cpus_with_slabs);
 				kfree(n);
 				kfree(s);
 				goto err;
 			}
+
 			up_write(&slub_lock);
 			return s;
 		}
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 4/6] mm: Only IPI CPUs to drain local pages if they exist
From: Gilad Ben-Yossef @ 2011-10-23 15:48 UTC (permalink / raw)
  To: lkml
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319384922-29632-1-git-send-email-gilad@benyossef.com>

Use a cpumask to track CPUs with per-cpu pages in any zone
and only send an IPI requesting CPUs to drain these pages
to the buddy allocator if they actually have pages.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 mm/page_alloc.c |   64 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e8ecb6..9551b90 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,11 +57,17 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/percpu.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
+#include <asm/local.h>
 #include "internal.h"
 
+/* Which CPUs have per cpu pages  */
+cpumask_var_t cpus_with_pcp;
+static DEFINE_PER_CPU(unsigned long, total_cpu_pcp_count);
+
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DEFINE_PER_CPU(int, numa_node);
 EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -224,6 +230,36 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
+/*
+ * The following two functions track page counts both per zone/per CPU
+ * and globaly per CPU.
+ *
+ * They must be called with interrupts disabled and either pinned to specific
+ * CPU or for offline CPUs or under stop_machine.
+ */
+
+static inline void inc_pcp_count(int cpu, struct per_cpu_pages *pcp, int count)
+{
+	unsigned long *tot = &per_cpu(total_cpu_pcp_count, cpu);
+
+	if (unlikely(!*tot))
+		cpumask_set_cpu(cpu, cpus_with_pcp);
+
+	*tot += count;
+	pcp->count += count;
+}
+
+static inline void dec_pcp_count(int cpu, struct per_cpu_pages *pcp, int count)
+{
+	unsigned long *tot = &per_cpu(total_cpu_pcp_count, cpu);
+
+	pcp->count -= count;
+	*tot -= count;
+
+	if (unlikely(!*tot))
+		cpumask_clear_cpu(cpu, cpus_with_pcp);
+}
+
 static void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 
@@ -1072,7 +1108,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 	else
 		to_drain = pcp->count;
 	free_pcppages_bulk(zone, to_drain, pcp);
-	pcp->count -= to_drain;
+	dec_pcp_count(smp_processor_id(), pcp, to_drain);
 	local_irq_restore(flags);
 }
 #endif
@@ -1099,7 +1135,7 @@ static void drain_pages(unsigned int cpu)
 		pcp = &pset->pcp;
 		if (pcp->count) {
 			free_pcppages_bulk(zone, pcp->count, pcp);
-			pcp->count = 0;
+			dec_pcp_count(cpu, pcp, pcp->count);
 		}
 		local_irq_restore(flags);
 	}
@@ -1118,7 +1154,7 @@ void drain_local_pages(void *arg)
  */
 void drain_all_pages(void)
 {
-	on_each_cpu(drain_local_pages, NULL, 1);
+	on_each_cpu_mask(cpus_with_pcp, drain_local_pages, NULL, 1);
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -1166,7 +1202,7 @@ void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
-	int migratetype;
+	int migratetype, cpu;
 	int wasMlocked = __TestClearPageMlocked(page);
 
 	if (!free_pages_prepare(page, 0))
@@ -1194,15 +1230,16 @@ void free_hot_cold_page(struct page *page, int cold)
 		migratetype = MIGRATE_MOVABLE;
 	}
 
+	cpu = smp_processor_id();
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	if (cold)
 		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	else
 		list_add(&page->lru, &pcp->lists[migratetype]);
-	pcp->count++;
+	inc_pcp_count(cpu, pcp, 1);
 	if (pcp->count >= pcp->high) {
 		free_pcppages_bulk(zone, pcp->batch, pcp);
-		pcp->count -= pcp->batch;
+		dec_pcp_count(cpu, pcp, pcp->batch);
 	}
 
 out:
@@ -1305,9 +1342,10 @@ again:
 		pcp = &this_cpu_ptr(zone->pageset)->pcp;
 		list = &pcp->lists[migratetype];
 		if (list_empty(list)) {
-			pcp->count += rmqueue_bulk(zone, 0,
+			inc_pcp_count(smp_processor_id(), pcp,
+					rmqueue_bulk(zone, 0,
 					pcp->batch, list,
-					migratetype, cold);
+					migratetype, cold));
 			if (unlikely(list_empty(list)))
 				goto failed;
 		}
@@ -1318,7 +1356,7 @@ again:
 			page = list_entry(list->next, struct page, lru);
 
 		list_del(&page->lru);
-		pcp->count--;
+		dec_pcp_count(smp_processor_id(), pcp, 1);
 	} else {
 		if (unlikely(gfp_flags & __GFP_NOFAIL)) {
 			/*
@@ -3553,6 +3591,10 @@ static int zone_batchsize(struct zone *zone)
 #endif
 }
 
+/*
+ * NOTE: If you call this function on a pcp of a populated zone you
+ * need to worry about syncing cpus_with_pcp state as well.
+ */
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
 	struct per_cpu_pages *pcp;
@@ -3673,6 +3715,7 @@ static int __zone_pcp_update(void *data)
 
 		local_irq_save(flags);
 		free_pcppages_bulk(zone, pcp->count, pcp);
+		dec_pcp_count(cpu, pcp, pcp->count);
 		setup_pageset(pset, batch);
 		local_irq_restore(flags);
 	}
@@ -5040,6 +5083,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
 void __init page_alloc_init(void)
 {
 	hotcpu_notifier(page_alloc_cpu_notify, 0);
+
+	/* Allocate the cpus_with_pcp var if CONFIG_CPUMASK_OFFSTACK */
+	alloc_bootmem_cpumask_var(&cpus_with_pcp);
 }
 
 /*
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related

* [PATCH v2 3/6] tile: Move tile to use generic on_each_cpu_mask
From: Gilad Ben-Yossef @ 2011-10-23 15:48 UTC (permalink / raw)
  To: lkml
  Cc: Gilad Ben-Yossef, Peter Zijlstra, Frederic Weisbecker,
	Russell King, linux-mm, Christoph Lameter, Pekka Enberg,
	Matt Mackall, Sasha Levin
In-Reply-To: <1319384922-29632-1-git-send-email-gilad@benyossef.com>

The API is the same as the tile private one, so just remove
the private version of the functions

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: linux-mm@kvack.org
CC: Christoph Lameter <cl@linux-foundation.org>
CC: Pekka Enberg <penberg@kernel.org>
CC: Matt Mackall <mpm@selenic.com>
CC: Sasha Levin <levinsasha928@gmail.com>
---
 arch/tile/include/asm/smp.h |    7 -------
 arch/tile/kernel/smp.c      |   19 -------------------
 2 files changed, 0 insertions(+), 26 deletions(-)

diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 532124a..1aa759a 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -43,10 +43,6 @@ void evaluate_message(int tag);
 /* Boot a secondary cpu */
 void online_secondary(void);
 
-/* Call a function on a specified set of CPUs (may include this one). */
-extern void on_each_cpu_mask(const struct cpumask *mask,
-			     void (*func)(void *), void *info, bool wait);
-
 /* Topology of the supervisor tile grid, and coordinates of boot processor */
 extern HV_Topology smp_topology;
 
@@ -91,9 +87,6 @@ void print_disabled_cpus(void);
 
 #else /* !CONFIG_SMP */
 
-#define on_each_cpu_mask(mask, func, info, wait)		\
-  do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0)
-
 #define smp_master_cpu		0
 #define smp_height		1
 #define smp_width		1
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index c52224d..a44e103 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -87,25 +87,6 @@ void send_IPI_allbutself(int tag)
 	send_IPI_many(&mask, tag);
 }
 
-
-/*
- * Provide smp_call_function_mask, but also run function locally
- * if specified in the mask.
- */
-void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
-		      void *info, bool wait)
-{
-	int cpu = get_cpu();
-	smp_call_function_many(mask, func, info, wait);
-	if (cpumask_test_cpu(cpu, mask)) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-	}
-	put_cpu();
-}
-
-
 /*
  * Functions related to starting/stopping cpus.
  */
-- 
1.7.0.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related


This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.