From: Christoph Lameter <cl@linux-foundation.org>
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org,
Christoph Lameter <cl@linux-foundation.org>
Cc: linux-mm@vger.kernel.org
Cc: jeremy@goop.org
Cc: ebiederm@xmission.com
Cc: travis@sgi.com
Cc: herbert@gondor.apana.org.au
Cc: xemul@openvz.org
Subject: [patch 3/4] cpu alloc: The allocator
Date: Thu, 18 Sep 2008 16:36:51 -0700 [thread overview]
Message-ID: <20080918233701.599037712@quilx.com> (raw)
In-Reply-To: 20080918233648.581696416@quilx.com
[-- Attachment #1: cpu_alloc_base --]
[-- Type: text/plain, Size: 10735 bytes --]
The per cpu allocator allows dynamic allocation of memory on all
processors simultaneously. A bitmap is used to track used areas.
The allocator implements tight packing to reduce the cache footprint
and increase speed since cacheline contention is typically not a concern
for memory mainly used by a single cpu. Small objects will fill up gaps
left by larger allocations that required alignments.
The size of the cpu_alloc area can be changed via the percpu=xxx
kernel parameter.
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
---
include/linux/percpu.h | 46 ++++++++++++
include/linux/vmstat.h | 2
mm/Makefile | 2
mm/cpu_alloc.c | 181 +++++++++++++++++++++++++++++++++++++++++++++++++
mm/vmstat.c | 1
5 files changed, 230 insertions(+), 2 deletions(-)
create mode 100644 include/linux/cpu_alloc.h
create mode 100644 mm/cpu_alloc.c
Index: linux-2.6/include/linux/vmstat.h
===================================================================
--- linux-2.6.orig/include/linux/vmstat.h 2008-09-16 18:13:44.000000000 -0700
+++ linux-2.6/include/linux/vmstat.h 2008-09-16 18:30:24.000000000 -0700
@@ -37,7 +37,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS
FOR_ALL_ZONES(PGSCAN_KSWAPD),
FOR_ALL_ZONES(PGSCAN_DIRECT),
PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
- PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+ PAGEOUTRUN, ALLOCSTALL, PGROTATED, CPU_BYTES,
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
#endif
Index: linux-2.6/mm/Makefile
===================================================================
--- linux-2.6.orig/mm/Makefile 2008-09-16 18:13:44.000000000 -0700
+++ linux-2.6/mm/Makefile 2008-09-16 18:30:24.000000000 -0700
@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o
maccess.o page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
- page_isolation.o mm_init.o $(mmu-y)
+ page_isolation.o mm_init.o cpu_alloc.o $(mmu-y)
obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
obj-$(CONFIG_BOUNCE) += bounce.o
Index: linux-2.6/mm/cpu_alloc.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/mm/cpu_alloc.c 2008-09-16 19:04:09.000000000 -0700
@@ -0,0 +1,177 @@
+/*
+ * Cpu allocator - Manage objects allocated for each processor
+ *
+ * (C) 2008 SGI, Christoph Lameter <clameter@sgi.com>
+ * Basic implementation with allocation and free from a dedicated per
+ * cpu area.
+ *
+ * The per cpu allocator allows dynamic allocation of memory on all
+ * processor simultaneously. A bitmap is used to track used areas.
+ * The allocator implements tight packing to reduce the cache footprint
+ * and increase speed since cacheline contention is typically not a concern
+ * for memory mainly used by a single cpu. Small objects will fill up gaps
+ * left by larger allocations that required alignments.
+ */
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/bitmap.h>
+#include <asm/sections.h>
+#include <linux/bootmem.h>
+
+/*
+ * Basic allocation unit. A bit map is created to track the use of each
+ * UNIT_SIZE element in the cpu area.
+ */
+#define UNIT_TYPE int
+#define UNIT_SIZE sizeof(UNIT_TYPE)
+
+int units; /* Actual available units */
+
+/*
+ * How many units are needed for an object of a given size
+ */
+static int size_to_units(unsigned long size)
+{
+ return DIV_ROUND_UP(size, UNIT_SIZE);
+}
+
+/*
+ * Lock to protect the bitmap and the meta data for the cpu allocator.
+ */
+static DEFINE_SPINLOCK(cpu_alloc_map_lock);
+static unsigned long *cpu_alloc_map;
+static int nr_units; /* Number of available units */
+static int first_free; /* First known free unit */
+
+/*
+ * Mark an object as used in the cpu_alloc_map
+ *
+ * Must hold cpu_alloc_map_lock
+ */
+static void set_map(int start, int length)
+{
+ while (length-- > 0)
+ __set_bit(start++, cpu_alloc_map);
+}
+
+/*
+ * Mark an area as freed.
+ *
+ * Must hold cpu_alloc_map_lock
+ */
+static void clear_map(int start, int length)
+{
+ while (length-- > 0)
+ __clear_bit(start++, cpu_alloc_map);
+}
+
+/*
+ * Allocate an object of a certain size
+ *
+ * Returns a special pointer that can be used with CPU_PTR to find the
+ * address of the object for a certain cpu.
+ */
+void *cpu_alloc(unsigned long size, gfp_t gfpflags, unsigned long align)
+{
+ unsigned long start;
+ int units = size_to_units(size);
+ void *ptr;
+ int first;
+ unsigned long flags;
+
+ if (!size)
+ return ZERO_SIZE_PTR;
+
+ spin_lock_irqsave(&cpu_alloc_map_lock, flags);
+
+ first = 1;
+ start = first_free;
+
+ for ( ; ; ) {
+
+ start = find_next_zero_bit(cpu_alloc_map, nr_units, start);
+ if (start >= nr_units)
+ goto out_of_memory;
+
+ if (first)
+ first_free = start;
+
+ /*
+ * Check alignment and that there is enough space after
+ * the starting unit.
+ */
+ if (start % (align / UNIT_SIZE) == 0 &&
+ find_next_bit(cpu_alloc_map, nr_units, start + 1)
+ >= start + units)
+ break;
+ start++;
+ first = 0;
+ }
+
+ if (first)
+ first_free = start + units;
+
+ if (start + units > nr_units)
+ goto out_of_memory;
+
+ set_map(start, units);
+ __count_vm_events(CPU_BYTES, units * UNIT_SIZE);
+
+ spin_unlock_irqrestore(&cpu_alloc_map_lock, flags);
+
+ ptr = __per_cpu_end + start;
+
+ if (gfpflags & __GFP_ZERO) {
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ memset(CPU_PTR(ptr, cpu), 0, size);
+ }
+
+ return ptr;
+
+out_of_memory:
+ spin_unlock_irqrestore(&cpu_alloc_map_lock, flags);
+ return NULL;
+}
+EXPORT_SYMBOL(cpu_alloc);
+
+/*
+ * Free an object. The pointer must be a cpu pointer allocated
+ * via cpu_alloc.
+ */
+void cpu_free(void *start, unsigned long size)
+{
+ unsigned long units = size_to_units(size);
+ unsigned long index = (int *)start - (int *)__per_cpu_end;
+ unsigned long flags;
+
+ if (!start || start == ZERO_SIZE_PTR)
+ return;
+
+ BUG_ON(index >= nr_units ||
+ !test_bit(index, cpu_alloc_map) ||
+ !test_bit(index + units - 1, cpu_alloc_map));
+
+ spin_lock_irqsave(&cpu_alloc_map_lock, flags);
+
+ clear_map(index, units);
+ __count_vm_events(CPU_BYTES, -units * UNIT_SIZE);
+
+ if (index < first_free)
+ first_free = index;
+
+ spin_unlock_irqrestore(&cpu_alloc_map_lock, flags);
+}
+EXPORT_SYMBOL(cpu_free);
+
+
+void cpu_alloc_init(void)
+{
+ nr_units = percpu_reserve / UNIT_SIZE;
+
+ cpu_alloc_map = alloc_bootmem(BITS_TO_LONGS(nr_units));
+}
+
Index: linux-2.6/mm/vmstat.c
===================================================================
--- linux-2.6.orig/mm/vmstat.c 2008-09-16 18:13:44.000000000 -0700
+++ linux-2.6/mm/vmstat.c 2008-09-16 18:30:24.000000000 -0700
@@ -671,6 +671,7 @@ static const char * const vmstat_text[]
"allocstall",
"pgrotated",
+ "cpu_bytes",
#ifdef CONFIG_HUGETLB_PAGE
"htlb_buddy_alloc_success",
"htlb_buddy_alloc_fail",
Index: linux-2.6/include/linux/percpu.h
===================================================================
--- linux-2.6.orig/include/linux/percpu.h 2008-09-16 18:28:55.000000000 -0700
+++ linux-2.6/include/linux/percpu.h 2008-09-16 19:05:39.000000000 -0700
@@ -107,4 +107,52 @@ static inline void percpu_free(void *__p
#define free_percpu(ptr) percpu_free((ptr))
#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
+
+/*
+ * cpu allocator definitions
+ *
+ * The cpu allocator allows allocating an instance of an object for each
+ * processor and the use of a single pointer to access all instances
+ * of the object. cpu_alloc provides optimized means for accessing the
+ * instance of the object belonging to the currently executing processor
+ * as well as special atomic operations on fields of objects of the
+ * currently executing processor.
+ *
+ * Cpu objects are typically small. The allocator packs them tightly
+ * to increase the chance on each access that a per cpu object is already
+ * cached. Alignments may be specified but the intent is to align the data
+ * properly due to cpu alignment constraints and not to avoid cacheline
+ * contention. Any holes left by aligning objects are filled up with smaller
+ * objects that are allocated later.
+ *
+ * Cpu data can be allocated using CPU_ALLOC. The resulting pointer is
+ * pointing to the instance of the variable in the per cpu area provided
+ * by the loader. It is generally an error to use the pointer directly
+ * unless we are booting the system.
+ *
+ * __GFP_ZERO may be passed as a flag to zero the allocated memory.
+ */
+
+/* Return a pointer to the instance of a object for a particular processor */
+#define CPU_PTR(__p, __cpu) SHIFT_PERCPU_PTR((__p), per_cpu_offset(__cpu))
+
+/*
+ * Return a pointer to the instance of the object belonging to the processor
+ * running the current code.
+ */
+#define THIS_CPU(__p) SHIFT_PERCPU_PTR((__p), my_cpu_offset)
+#define __THIS_CPU(__p) SHIFT_PERCPU_PTR((__p), __my_cpu_offset)
+
+#define CPU_ALLOC(type, flags) ((typeof(type) *)cpu_alloc(sizeof(type), (flags), \
+ __alignof__(type)))
+#define CPU_FREE(pointer) cpu_free((pointer), sizeof(*(pointer)))
+
+/*
+ * Raw calls
+ */
+void *cpu_alloc(unsigned long size, gfp_t flags, unsigned long align);
+void cpu_free(void *cpu_pointer, unsigned long size);
+
+void cpu_alloc_init(void);
+
#endif /* __LINUX_PERCPU_H */
Index: linux-2.6/init/main.c
===================================================================
--- linux-2.6.orig/init/main.c 2008-09-16 18:29:40.000000000 -0700
+++ linux-2.6/init/main.c 2008-09-16 18:30:24.000000000 -0700
@@ -368,7 +368,11 @@ static void __init smp_init(void)
#define smp_init() do { } while (0)
#endif
-static inline void setup_per_cpu_areas(void) { }
+static inline void setup_per_cpu_areas(void)
+{
+ cpu_alloc_init();
+}
+
static inline void setup_nr_cpu_ids(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { }
@@ -405,6 +409,7 @@ static void __init setup_per_cpu_areas(v
char *ptr;
unsigned long nr_possible_cpus = num_possible_cpus();
+ cpu_alloc_init();
/* Copy section for each CPU (we discard the original) */
size = ALIGN(PERCPU_AREA_SIZE, PAGE_SIZE);
printk(KERN_INFO "percpu area: %d bytes total, %d available.\n",
Index: linux-2.6/arch/x86/kernel/setup_percpu.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_percpu.c 2008-09-16 19:04:30.000000000 -0700
+++ linux-2.6/arch/x86/kernel/setup_percpu.c 2008-09-16 19:04:48.000000000 -0700
@@ -144,6 +144,7 @@ void __init setup_per_cpu_areas(void)
char *ptr;
int cpu;
+ cpu_alloc_init();
/* Setup cpu_pda map */
setup_cpu_pda_map();
--
next prev parent reply other threads:[~2008-09-18 23:46 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-09-18 23:36 [patch 0/4] Cpu alloc V4: Replace percpu allocator in modules.c Christoph Lameter
2008-09-18 23:36 ` [patch 1/4] Make the per cpu reserve configurable Christoph Lameter
2008-09-19 7:12 ` Pekka Enberg
2008-09-19 12:56 ` Christoph Lameter
2008-09-18 23:36 ` [patch 2/4] percpu: Rename variables PERCPU_ENOUGH_ROOM -> PERCPU_AREA_SIZE Christoph Lameter
2008-09-18 23:36 ` Christoph Lameter [this message]
2008-09-19 7:58 ` [patch 3/4] cpu alloc: The allocator Pekka Enberg
2008-09-19 13:11 ` Christoph Lameter
2008-09-19 14:12 ` Pekka Enberg
2008-09-19 14:27 ` Christoph Lameter
2008-09-19 14:35 ` Pekka Enberg
2008-09-18 23:36 ` [patch 4/4] cpu alloc: Use cpu allocator instead of the builtin modules per cpu allocator Christoph Lameter
2008-09-19 7:39 ` Eric Dumazet
2008-09-19 12:59 ` Christoph Lameter
2008-09-19 13:23 ` Eric Dumazet
-- strict thread matches above, loose matches on Subject: below --
2008-09-19 14:58 [patch 0/4] Cpu alloc V5: Replace percpu allocator in modules.c Christoph Lameter
2008-09-19 14:59 ` [patch 3/4] cpu alloc: The allocator Christoph Lameter
2008-09-19 14:59 ` Christoph Lameter
2008-09-19 15:23 ` KOSAKI Motohiro
2008-09-19 15:23 ` KOSAKI Motohiro
2008-09-19 16:27 ` Eric Dumazet
2008-09-19 16:27 ` Eric Dumazet
2008-09-19 16:49 ` Christoph Lameter
2008-09-19 16:49 ` Christoph Lameter
2008-09-19 17:00 ` Christoph Lameter
2008-09-19 17:00 ` Christoph Lameter
2008-09-19 20:32 ` Christoph Lameter
2008-09-19 20:32 ` Christoph Lameter
2008-09-29 19:35 [patch 0/4] Cpu alloc V6: Replace percpu allocator in modules.c Christoph Lameter
2008-09-29 19:35 ` [patch 3/4] cpu alloc: The allocator Christoph Lameter
2008-09-29 19:35 ` Christoph Lameter
2008-09-30 6:35 ` Pekka Enberg
2008-09-30 6:35 ` Pekka Enberg
2008-09-30 11:38 ` Christoph Lameter
2008-09-30 11:38 ` Christoph Lameter
2008-09-30 11:48 ` Pekka Enberg
2008-09-30 11:48 ` Pekka Enberg
2008-09-30 12:12 ` Christoph Lameter
2008-09-30 12:12 ` Christoph Lameter
2008-10-03 7:33 ` Andrew Morton
2008-10-03 7:33 ` Andrew Morton
2008-10-03 7:43 ` Pekka Enberg
2008-10-03 7:43 ` Pekka Enberg
2008-10-03 8:20 ` Andrew Morton
2008-10-03 8:20 ` Andrew Morton
2008-10-03 14:15 ` Christoph Lameter
2008-10-03 14:15 ` Christoph Lameter
2008-10-03 12:48 ` Christoph Lameter
2008-10-03 12:48 ` Christoph Lameter
2008-10-05 21:10 ` Rusty Russell
2008-10-05 21:10 ` Rusty Russell
2008-10-07 13:27 ` Christoph Lameter
2008-10-07 13:27 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080918233701.599037712@quilx.com \
--to=cl@linux-foundation.org \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.