From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: <linuxppc-dev@ozlabs.org>
Cc: Kumar Gala <kumar.gala@freescale.com>
Subject: [PATCH 14/16] powerpc/mm: Runtime allocation of mmu context maps for nohash CPUs v2
Date: Mon, 15 Dec 2008 16:45:08 +1100 [thread overview]
Message-ID: <20081215054607.5DF5DDDFB7@ozlabs.org> (raw)
In-Reply-To: <1229319836.100184.344640589620.qpush@grosgo>
This makes the MMU context code used for CPUs with no hash table
(except 603) dynamically allocate the various maps used to track
the state of contexts.
Only the main free map and CPU 0 stale map are allocated at boot
time. Other CPU maps are allocated when those CPUs are brought up
and freed if they are unplugged.
This also moves the initialization of the MMU context management
slightly later during the boot process, which should be fine as
it's really only needed when userland if first started anyways.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2. rebased and add some more debug
arch/powerpc/kernel/setup_32.c | 5 +
arch/powerpc/mm/init_32.c | 4
arch/powerpc/mm/mmu_context_nohash.c | 161 ++++++++++++++++++++++++-----------
3 files changed, 116 insertions(+), 54 deletions(-)
--- linux-work.orig/arch/powerpc/mm/mmu_context_nohash.c 2008-12-11 14:57:50.000000000 +1100
+++ linux-work/arch/powerpc/mm/mmu_context_nohash.c 2008-12-12 17:28:31.000000000 +1100
@@ -28,54 +28,30 @@
#undef DEBUG
#define DEBUG_STEAL_ONLY
#undef DEBUG_MAP_CONSISTENCY
+/*#define DEBUG_CLAMP_LAST_CONTEXT 15 */
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
-#include <linux/spinlock.h>
-
-/*
- * The MPC8xx has only 16 contexts. We rotate through them on each
- * task switch. A better way would be to keep track of tasks that
- * own contexts, and implement an LRU usage. That way very active
- * tasks don't always have to pay the TLB reload overhead. The
- * kernel pages are mapped shared, so the kernel can run on behalf
- * of any task that makes a kernel entry. Shared does not mean they
- * are not protected, just that the ASID comparison is not performed.
- * -- Dan
- *
- * The IBM4xx has 256 contexts, so we can just rotate through these
- * as a way of "switching" contexts. If the TID of the TLB is zero,
- * the PID/TID comparison is disabled, so we can use a TID of zero
- * to represent all kernel pages as shared among all contexts.
- * -- Dan
- */
-
-#ifdef CONFIG_8xx
-#define LAST_CONTEXT 15
-#define FIRST_CONTEXT 0
-
-#elif defined(CONFIG_4xx)
-#define LAST_CONTEXT 255
-#define FIRST_CONTEXT 1
-
-#elif defined(CONFIG_E200) || defined(CONFIG_E500)
-#define LAST_CONTEXT 255
-#define FIRST_CONTEXT 1
-
-#else
-#error Unsupported processor type
-#endif
+static unsigned int first_context, last_context;
static unsigned int next_context, nr_free_contexts;
-static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
-static unsigned long stale_map[NR_CPUS][LAST_CONTEXT / BITS_PER_LONG + 1];
-static struct mm_struct *context_mm[LAST_CONTEXT+1];
+static unsigned long *context_map;
+static unsigned long *stale_map[NR_CPUS];
+static struct mm_struct **context_mm;
static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
+#define CTX_MAP_SIZE \
+ (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
+
+
/* Steal a context from a task that has one at the moment.
* This is only used on 8xx and 4xx and we presently assume that
* they don't do SMP. If they do then this will have to check
@@ -100,7 +76,7 @@ static unsigned int steal_context_smp(un
unsigned int cpu, max;
again:
- max = LAST_CONTEXT - FIRST_CONTEXT;
+ max = last_context - first_context;
/* Attempt to free next_context first and then loop until we manage */
while (max--) {
@@ -112,8 +88,8 @@ static unsigned int steal_context_smp(un
*/
if (mm->context.active) {
id ++;
- if (id > LAST_CONTEXT)
- id = FIRST_CONTEXT;
+ if (id > last_context)
+ id = first_context;
continue;
}
pr_debug("[%d] steal context %d from mm @%p\n",
@@ -171,7 +147,7 @@ static void context_check_map(void)
unsigned int id, nrf, nact;
nrf = nact = 0;
- for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
+ for (id = first_context; id <= last_context; id++) {
int used = test_bit(id, context_map);
if (!used)
nrf++;
@@ -189,6 +165,8 @@ static void context_check_map(void)
if (nact > num_online_cpus())
pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
nact, num_online_cpus());
+ if (first_context > 0 && !test_bit(0, context_map))
+ pr_err("MMU: Context 0 has been freed !!!\n");
}
#else
static void context_check_map(void) { }
@@ -211,6 +189,10 @@ void switch_mmu_context(struct mm_struct
/* Mark us active and the previous one not anymore */
next->context.active++;
if (prev) {
+#ifndef DEBUG_STEAL_ONLY
+ pr_debug(" old context %p active was: %d\n",
+ prev, prev->context.active);
+#endif
WARN_ON(prev->context.active < 1);
prev->context.active--;
}
@@ -223,8 +205,8 @@ void switch_mmu_context(struct mm_struct
/* We really don't have a context, let's try to acquire one */
id = next_context;
- if (id > LAST_CONTEXT)
- id = FIRST_CONTEXT;
+ if (id > last_context)
+ id = first_context;
map = context_map;
/* No more free contexts, let's try to steal one */
@@ -242,9 +224,9 @@ void switch_mmu_context(struct mm_struct
/* We know there's at least one free context, try to find it */
while (__test_and_set_bit(id, map)) {
- id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
- if (id > LAST_CONTEXT)
- id = FIRST_CONTEXT;
+ id = find_next_zero_bit(map, last_context+1, id);
+ if (id > last_context)
+ id = first_context;
}
stolen:
next_context = id + 1;
@@ -313,6 +295,42 @@ void destroy_context(struct mm_struct *m
spin_unlock(&context_lock);
}
+#ifdef CONFIG_SMP
+
+static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+
+ /* We don't touch CPU 0 map, it's allocated at aboot and kept
+ * around forever
+ */
+ if (cpu == 0)
+ return NOTIFY_OK;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ pr_debug("MMU: Allocating stale context map for CPU %d\n", cpu);
+ stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ pr_debug("MMU: Freeing stale context map for CPU %d\n", cpu);
+ kfree(stale_map[cpu]);
+ stale_map[cpu] = NULL;
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata mmu_context_cpu_nb = {
+ .notifier_call = mmu_context_cpu_notify,
+};
+
+#endif /* CONFIG_SMP */
/*
* Initialize the context management stuff.
@@ -326,13 +344,56 @@ void __init mmu_context_init(void)
init_mm.context.active = NR_CPUS;
/*
+ * The MPC8xx has only 16 contexts. We rotate through them on each
+ * task switch. A better way would be to keep track of tasks that
+ * own contexts, and implement an LRU usage. That way very active
+ * tasks don't always have to pay the TLB reload overhead. The
+ * kernel pages are mapped shared, so the kernel can run on behalf
+ * of any task that makes a kernel entry. Shared does not mean they
+ * are not protected, just that the ASID comparison is not performed.
+ * -- Dan
+ *
+ * The IBM4xx has 256 contexts, so we can just rotate through these
+ * as a way of "switching" contexts. If the TID of the TLB is zero,
+ * the PID/TID comparison is disabled, so we can use a TID of zero
+ * to represent all kernel pages as shared among all contexts.
+ * -- Dan
+ */
+ if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
+ first_context = 0;
+ last_context = 15;
+ } else {
+ first_context = 1;
+ last_context = 255;
+ }
+
+#ifdef DEBUG_CLAMP_LAST_CONTEXT
+ last_context = DEBUG_CLAMP_LAST_CONTEXT;
+#endif
+ /*
+ * Allocate the maps used by context management
+ */
+ context_map = alloc_bootmem(CTX_MAP_SIZE);
+ context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
+ stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
+
+#ifdef CONFIG_SMP
+ register_cpu_notifier(&mmu_context_cpu_nb);
+#endif
+
+ printk(KERN_INFO
+ "MMU: Allocated %d bytes of context maps for %d contexts\n",
+ 2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
+ last_context - first_context + 1);
+
+ /*
* Some processors have too few contexts to reserve one for
* init_mm, and require using context 0 for a normal task.
* Other processors reserve the use of context zero for the kernel.
- * This code assumes FIRST_CONTEXT < 32.
+ * This code assumes first_context < 32.
*/
- context_map[0] = (1 << FIRST_CONTEXT) - 1;
- next_context = FIRST_CONTEXT;
- nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
+ context_map[0] = (1 << first_context) - 1;
+ next_context = first_context;
+ nr_free_contexts = last_context - first_context + 1;
}
Index: linux-work/arch/powerpc/kernel/setup_32.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/setup_32.c 2008-12-11 14:57:58.000000000 +1100
+++ linux-work/arch/powerpc/kernel/setup_32.c 2008-12-12 17:26:58.000000000 +1100
@@ -38,6 +38,7 @@
#include <asm/time.h>
#include <asm/serial.h>
#include <asm/udbg.h>
+#include <asm/mmu_context.h>
#include "setup.h"
@@ -330,4 +331,8 @@ void __init setup_arch(char **cmdline_p)
if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
paging_init();
+
+ /* Initialize the MMU context management stuff */
+ mmu_context_init();
+
}
Index: linux-work/arch/powerpc/mm/init_32.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/init_32.c 2008-12-11 14:57:41.000000000 +1100
+++ linux-work/arch/powerpc/mm/init_32.c 2008-12-11 14:58:01.000000000 +1100
@@ -35,7 +35,6 @@
#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
-#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/smp.h>
@@ -180,9 +179,6 @@ void __init MMU_init(void)
if (ppc_md.progress)
ppc_md.progress("MMU:setio", 0x302);
- /* Initialize the context management stuff */
- mmu_context_init();
-
if (ppc_md.progress)
ppc_md.progress("MMU:exit", 0x211);
next prev parent reply other threads:[~2008-12-15 5:45 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-12-15 5:43 [PATCH 0/16] powerpc: Preliminary work to enable SMP BookE (v2) Benjamin Herrenschmidt
2008-12-15 5:44 ` [PATCH 1/16] powerpc: Fix bogus cache flushing on all 40x and BookE processors v2 Benjamin Herrenschmidt
2008-12-15 5:44 ` [PATCH 2/16] powerpc: Fix asm EMIT_BUG_ENTRY with !CONFIG_BUG Benjamin Herrenschmidt
2008-12-15 5:44 ` [PATCH 3/16] powerpc/4xx: Extended DCR support v2 Benjamin Herrenschmidt
2008-12-17 17:33 ` Josh Boyer
2008-12-15 5:44 ` [PATCH 4/16] powerpc/fsl-booke: Fix problem with _tlbil_va Benjamin Herrenschmidt
2008-12-15 6:59 ` Stephen Rothwell
2008-12-15 7:04 ` Benjamin Herrenschmidt
2008-12-15 5:44 ` [PATCH 5/16] powerpc/mm: Add local_flush_tlb_mm() to SW loaded TLB implementations Benjamin Herrenschmidt
2008-12-15 20:30 ` Kumar Gala
2008-12-15 5:44 ` [PATCH 6/16] powerpc/mm: Split mmu_context handling v3 Benjamin Herrenschmidt
2008-12-15 15:43 ` Arnd Bergmann
2008-12-15 20:20 ` Benjamin Herrenschmidt
2008-12-15 5:44 ` [PATCH 7/16] powerpc/mm: Rework context management for CPUs with no hash table v2 Benjamin Herrenschmidt
2008-12-17 21:30 ` Kumar Gala
2008-12-15 5:44 ` [PATCH 8/16] powerpc/mm: Rename tlb_32.c and tlb_64.c to tlb_hash32.c and tlb_hash64.c Benjamin Herrenschmidt
2008-12-15 20:36 ` Kumar Gala
2008-12-15 20:46 ` Benjamin Herrenschmidt
2008-12-15 5:44 ` [PATCH 9/16] powerpc/mm: Introduce MMU features v2 Benjamin Herrenschmidt
2008-12-15 5:44 ` [PATCH 10/16] powerpc/mm: Remove flush_HPTE() Benjamin Herrenschmidt
2008-12-15 5:44 ` [PATCH 11/16] powerpc/mm: Add SMP support to no-hash TLB handling v3 Benjamin Herrenschmidt
2008-12-15 20:19 ` Kumar Gala
2008-12-15 20:46 ` Benjamin Herrenschmidt
2008-12-15 20:57 ` Kumar Gala
2008-12-15 21:03 ` Benjamin Herrenschmidt
2008-12-15 21:10 ` Kumar Gala
2008-12-15 21:18 ` Benjamin Herrenschmidt
2008-12-15 22:19 ` Kumar Gala
2008-12-15 23:31 ` Benjamin Herrenschmidt
2008-12-15 5:45 ` [PATCH 12/16] powerpc/mm: Split low level tlb invalidate for nohash processors Benjamin Herrenschmidt
2008-12-15 5:45 ` [PATCH 13/16] powerpc/44x: No need to mask MSR:CE, ME or DE in _tlbil_va on 440 Benjamin Herrenschmidt
2008-12-15 12:25 ` [PATCH 13/16] powerpc/44x: No need to mask MSR:CE,ME " Josh Boyer
2008-12-15 5:45 ` Benjamin Herrenschmidt [this message]
2008-12-17 21:21 ` [PATCH 14/16] powerpc/mm: Runtime allocation of mmu context maps for nohash CPUs v2 Kumar Gala
2008-12-15 5:45 ` [PATCH 15/16] powerpc/mm: Rework usage of _PAGE_COHERENT/NO_CACHE/GUARDED Benjamin Herrenschmidt
2008-12-15 20:54 ` Kumar Gala
2008-12-15 21:01 ` Benjamin Herrenschmidt
2008-12-15 21:08 ` Kumar Gala
2008-12-15 21:03 ` Michael Ellerman
2008-12-15 21:05 ` Benjamin Herrenschmidt
2008-12-15 5:45 ` [PATCH 16/16] powerpc/44x: 44x TLB doesn't need "Guarded" set for all pages Benjamin Herrenschmidt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081215054607.5DF5DDDFB7@ozlabs.org \
--to=benh@kernel.crashing.org \
--cc=kumar.gala@freescale.com \
--cc=linuxppc-dev@ozlabs.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).