From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@ozlabs.org, Kumar Gala <kumar.gala@freescale.com>
Subject: [PATCH 8/10] powerpc/mm: Runtime allocation of mmu context maps for nohash CPUs v2
Date: Fri, 19 Dec 2008 16:13:48 +1100 [thread overview]
Message-ID: <20081219051452.E3FE2DDFDC@ozlabs.org> (raw)
In-Reply-To: <1229663599.904385.502157196243.qpush@grosgo>
This makes the MMU context code used for CPUs with no hash table
(except 603) dynamically allocate the various maps used to track
the state of contexts.
Only the main free map and CPU 0 stale map are allocated at boot
time. Other CPU maps are allocated when those CPUs are brought up
and freed if they are unplugged.
This also moves the initialization of the MMU context management
slightly later during the boot process, which should be fine as
it's really only needed when userland if first started anyways.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2. rebased and add some more debug
arch/powerpc/kernel/setup_32.c | 5 +
arch/powerpc/mm/init_32.c | 4
arch/powerpc/mm/mmu_context_nohash.c | 161 ++++++++++++++++++++++++-----------
3 files changed, 116 insertions(+), 54 deletions(-)
--- linux-work.orig/arch/powerpc/mm/mmu_context_nohash.c 2008-12-19 16:06:46.000000000 +1100
+++ linux-work/arch/powerpc/mm/mmu_context_nohash.c 2008-12-19 16:07:05.000000000 +1100
@@ -28,54 +28,30 @@
#undef DEBUG
#define DEBUG_STEAL_ONLY
#undef DEBUG_MAP_CONSISTENCY
+/*#define DEBUG_CLAMP_LAST_CONTEXT 15 */
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
-#include <linux/spinlock.h>
-
-/*
- * The MPC8xx has only 16 contexts. We rotate through them on each
- * task switch. A better way would be to keep track of tasks that
- * own contexts, and implement an LRU usage. That way very active
- * tasks don't always have to pay the TLB reload overhead. The
- * kernel pages are mapped shared, so the kernel can run on behalf
- * of any task that makes a kernel entry. Shared does not mean they
- * are not protected, just that the ASID comparison is not performed.
- * -- Dan
- *
- * The IBM4xx has 256 contexts, so we can just rotate through these
- * as a way of "switching" contexts. If the TID of the TLB is zero,
- * the PID/TID comparison is disabled, so we can use a TID of zero
- * to represent all kernel pages as shared among all contexts.
- * -- Dan
- */
-
-#ifdef CONFIG_8xx
-#define LAST_CONTEXT 15
-#define FIRST_CONTEXT 0
-
-#elif defined(CONFIG_4xx)
-#define LAST_CONTEXT 255
-#define FIRST_CONTEXT 1
-
-#elif defined(CONFIG_E200) || defined(CONFIG_E500)
-#define LAST_CONTEXT 255
-#define FIRST_CONTEXT 1
-
-#else
-#error Unsupported processor type
-#endif
+static unsigned int first_context, last_context;
static unsigned int next_context, nr_free_contexts;
-static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
-static unsigned long stale_map[NR_CPUS][LAST_CONTEXT / BITS_PER_LONG + 1];
-static struct mm_struct *context_mm[LAST_CONTEXT+1];
+static unsigned long *context_map;
+static unsigned long *stale_map[NR_CPUS];
+static struct mm_struct **context_mm;
static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
+#define CTX_MAP_SIZE \
+ (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
+
+
/* Steal a context from a task that has one at the moment.
*
* This is used when we are running out of available PID numbers
@@ -98,7 +74,7 @@ static unsigned int steal_context_smp(un
unsigned int cpu, max;
again:
- max = LAST_CONTEXT - FIRST_CONTEXT;
+ max = last_context - first_context;
/* Attempt to free next_context first and then loop until we manage */
while (max--) {
@@ -110,8 +86,8 @@ static unsigned int steal_context_smp(un
*/
if (mm->context.active) {
id ++;
- if (id > LAST_CONTEXT)
- id = FIRST_CONTEXT;
+ if (id > last_context)
+ id = first_context;
continue;
}
pr_debug("[%d] steal context %d from mm @%p\n",
@@ -169,7 +145,7 @@ static void context_check_map(void)
unsigned int id, nrf, nact;
nrf = nact = 0;
- for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
+ for (id = first_context; id <= last_context; id++) {
int used = test_bit(id, context_map);
if (!used)
nrf++;
@@ -187,6 +163,8 @@ static void context_check_map(void)
if (nact > num_online_cpus())
pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
nact, num_online_cpus());
+ if (first_context > 0 && !test_bit(0, context_map))
+ pr_err("MMU: Context 0 has been freed !!!\n");
}
#else
static void context_check_map(void) { }
@@ -209,6 +187,10 @@ void switch_mmu_context(struct mm_struct
/* Mark us active and the previous one not anymore */
next->context.active++;
if (prev) {
+#ifndef DEBUG_STEAL_ONLY
+ pr_debug(" old context %p active was: %d\n",
+ prev, prev->context.active);
+#endif
WARN_ON(prev->context.active < 1);
prev->context.active--;
}
@@ -221,8 +203,8 @@ void switch_mmu_context(struct mm_struct
/* We really don't have a context, let's try to acquire one */
id = next_context;
- if (id > LAST_CONTEXT)
- id = FIRST_CONTEXT;
+ if (id > last_context)
+ id = first_context;
map = context_map;
/* No more free contexts, let's try to steal one */
@@ -240,9 +222,9 @@ void switch_mmu_context(struct mm_struct
/* We know there's at least one free context, try to find it */
while (__test_and_set_bit(id, map)) {
- id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
- if (id > LAST_CONTEXT)
- id = FIRST_CONTEXT;
+ id = find_next_zero_bit(map, last_context+1, id);
+ if (id > last_context)
+ id = first_context;
}
stolen:
next_context = id + 1;
@@ -311,6 +293,42 @@ void destroy_context(struct mm_struct *m
spin_unlock(&context_lock);
}
+#ifdef CONFIG_SMP
+
+static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+
+ /* We don't touch CPU 0 map, it's allocated at aboot and kept
+ * around forever
+ */
+ if (cpu == 0)
+ return NOTIFY_OK;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ pr_debug("MMU: Allocating stale context map for CPU %d\n", cpu);
+ stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ pr_debug("MMU: Freeing stale context map for CPU %d\n", cpu);
+ kfree(stale_map[cpu]);
+ stale_map[cpu] = NULL;
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata mmu_context_cpu_nb = {
+ .notifier_call = mmu_context_cpu_notify,
+};
+
+#endif /* CONFIG_SMP */
/*
* Initialize the context management stuff.
@@ -324,13 +342,56 @@ void __init mmu_context_init(void)
init_mm.context.active = NR_CPUS;
/*
+ * The MPC8xx has only 16 contexts. We rotate through them on each
+ * task switch. A better way would be to keep track of tasks that
+ * own contexts, and implement an LRU usage. That way very active
+ * tasks don't always have to pay the TLB reload overhead. The
+ * kernel pages are mapped shared, so the kernel can run on behalf
+ * of any task that makes a kernel entry. Shared does not mean they
+ * are not protected, just that the ASID comparison is not performed.
+ * -- Dan
+ *
+ * The IBM4xx has 256 contexts, so we can just rotate through these
+ * as a way of "switching" contexts. If the TID of the TLB is zero,
+ * the PID/TID comparison is disabled, so we can use a TID of zero
+ * to represent all kernel pages as shared among all contexts.
+ * -- Dan
+ */
+ if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
+ first_context = 0;
+ last_context = 15;
+ } else {
+ first_context = 1;
+ last_context = 255;
+ }
+
+#ifdef DEBUG_CLAMP_LAST_CONTEXT
+ last_context = DEBUG_CLAMP_LAST_CONTEXT;
+#endif
+ /*
+ * Allocate the maps used by context management
+ */
+ context_map = alloc_bootmem(CTX_MAP_SIZE);
+ context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
+ stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
+
+#ifdef CONFIG_SMP
+ register_cpu_notifier(&mmu_context_cpu_nb);
+#endif
+
+ printk(KERN_INFO
+ "MMU: Allocated %d bytes of context maps for %d contexts\n",
+ 2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
+ last_context - first_context + 1);
+
+ /*
* Some processors have too few contexts to reserve one for
* init_mm, and require using context 0 for a normal task.
* Other processors reserve the use of context zero for the kernel.
- * This code assumes FIRST_CONTEXT < 32.
+ * This code assumes first_context < 32.
*/
- context_map[0] = (1 << FIRST_CONTEXT) - 1;
- next_context = FIRST_CONTEXT;
- nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
+ context_map[0] = (1 << first_context) - 1;
+ next_context = first_context;
+ nr_free_contexts = last_context - first_context + 1;
}
Index: linux-work/arch/powerpc/kernel/setup_32.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/setup_32.c 2008-12-19 16:07:01.000000000 +1100
+++ linux-work/arch/powerpc/kernel/setup_32.c 2008-12-19 16:07:05.000000000 +1100
@@ -38,6 +38,7 @@
#include <asm/time.h>
#include <asm/serial.h>
#include <asm/udbg.h>
+#include <asm/mmu_context.h>
#include "setup.h"
@@ -330,4 +331,8 @@ void __init setup_arch(char **cmdline_p)
if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
paging_init();
+
+ /* Initialize the MMU context management stuff */
+ mmu_context_init();
+
}
Index: linux-work/arch/powerpc/mm/init_32.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/init_32.c 2008-12-19 15:42:28.000000000 +1100
+++ linux-work/arch/powerpc/mm/init_32.c 2008-12-19 16:07:05.000000000 +1100
@@ -35,7 +35,6 @@
#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
-#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/smp.h>
@@ -180,9 +179,6 @@ void __init MMU_init(void)
if (ppc_md.progress)
ppc_md.progress("MMU:setio", 0x302);
- /* Initialize the context management stuff */
- mmu_context_init();
-
if (ppc_md.progress)
ppc_md.progress("MMU:exit", 0x211);
next prev parent reply other threads:[~2008-12-19 5:13 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-12-19 5:13 [PATCH 0/10] powerpc: Preliminary work to enable SMP BookE (v3) Benjamin Herrenschmidt
2008-12-19 5:13 ` [PATCH 1/10] powerpc/4xx: Extended DCR support v2 Benjamin Herrenschmidt
2008-12-19 12:36 ` Josh Boyer
2008-12-19 5:13 ` [PATCH 2/10] powerpc/mm: Split mmu_context handling v3 Benjamin Herrenschmidt
2008-12-19 5:13 ` [PATCH 3/10] powerpc/mm: Rework context management for CPUs with no hash table v2 Benjamin Herrenschmidt
2008-12-19 5:13 ` [PATCH 4/10] powerpc/mm: Introduce MMU features v3 Benjamin Herrenschmidt
2008-12-19 5:13 ` [PATCH 5/10] powerpc/mm: Add SMP support to no-hash TLB handling v5 Benjamin Herrenschmidt
2008-12-19 5:13 ` [PATCH 6/10] powerpc/mm: Split low level tlb invalidate for nohash processors Benjamin Herrenschmidt
2008-12-19 5:13 ` [PATCH 7/10] powerpc/44x: No need to mask MSR:CE, ME or DE in _tlbil_va on 440 Benjamin Herrenschmidt
2008-12-19 12:37 ` [PATCH 7/10] powerpc/44x: No need to mask MSR:CE,ME " Josh Boyer
2008-12-19 5:13 ` Benjamin Herrenschmidt [this message]
2008-12-19 5:13 ` [PATCH 9/10] powerpc/mm: Rework usage of _PAGE_COHERENT/NO_CACHE/GUARDED v2 Benjamin Herrenschmidt
2009-01-18 19:43 ` Gerhard Pircher
2009-01-18 19:55 ` Benjamin Herrenschmidt
2009-01-18 20:29 ` Gerhard Pircher
2009-01-23 16:51 ` [PATCH] powerpc/mm: Fix handling of _PAGE_COHERENT in BAT setup code Gerhard Pircher
2008-12-19 5:13 ` [PATCH 10/10] powerpc/44x: 44x TLB doesn't need "Guarded" set for all pages Benjamin Herrenschmidt
2008-12-19 12:38 ` Josh Boyer
2008-12-19 16:05 ` [PATCH 0/10] powerpc: Preliminary work to enable SMP BookE (v3) Kumar Gala
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081219051452.E3FE2DDFDC@ozlabs.org \
--to=benh@kernel.crashing.org \
--cc=kumar.gala@freescale.com \
--cc=linuxppc-dev@ozlabs.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).