* Re: TLB flushing on SGI platforms
@ 2003-12-01 21:42 David Mosberger
0 siblings, 0 replies; 2+ messages in thread
From: David Mosberger @ 2003-12-01 21:42 UTC (permalink / raw)
To: linux-ia64
>>>>> On Wed, 26 Nov 2003 09:43:01 -0600, Jack Steiner <steiner@sgi.com> said:
Jack> The SGI NUMA platform does not use the hardware "ptc" instruction
Jack> to flush TLBs. Instead, it has to write an MMR on the chipset on each
Jack> node to cause a TLB flush transaction to be placed on the bus. On
Jack> large systems, the overhead to broadcast the TLB flush to every node
Jack> in the system is one of the hot spots in the kernel.
Jack> In most cases, the TLB context being flushed has been loaded into
Jack> a small subset of the nodes. Flushing every node is unnecessary.
Jack> I'm looking for suggestions on the best way to limit TLB flushing so
Jack> that only the necessary nodes are flushed. Here is patch that
Jack> I believe will work. I added a bitmask to the mm_context_t to
Jack> track nodes where the context has been loaded. The TLB flush routine
Jack> issues the TLB flush requests only to these nodes.
Jack> Are there other/better ways that I can do this??
Why not use mm->cpu_vm_mask? It will give you CPU instead of
node-granularity, but if you really want nodes instead, you can do the
mapping in the NUMA-specific code.
--david
^ permalink raw reply [flat|nested] 2+ messages in thread
* TLB flushing on SGI platforms
@ 2003-11-26 15:43 Jack Steiner
0 siblings, 0 replies; 2+ messages in thread
From: Jack Steiner @ 2003-11-26 15:43 UTC (permalink / raw)
To: linux-ia64
The SGI NUMA platform does not use the hardware "ptc" instruction
to flush TLBs. Instead, it has to write an MMR on the chipset on each
node to cause a TLB flush transaction to be placed on the bus. On
large systems, the overhead to broadcast the TLB flush to every node
in the system is one of the hot spots in the kernel.
In most cases, the TLB context being flushed has been loaded into
a small subset of the nodes. Flushing every node is unnecessary.
I'm looking for suggestions on the best way to limit TLB flushing so
that only the necessary nodes are flushed. Here is patch that
I believe will work. I added a bitmask to the mm_context_t to
track nodes where the context has been loaded. The TLB flush routine
issues the TLB flush requests only to these nodes.
Are there other/better ways that I can do this??
------------------------------------------------------------------------------
diff -Naur linux_base/arch/ia64/mm/tlb.c linux/arch/ia64/mm/tlb.c
--- linux_base/arch/ia64/mm/tlb.c Tue Nov 25 10:03:46 2003
+++ linux/arch/ia64/mm/tlb.c Tue Nov 25 10:41:25 2003
@@ -59,7 +59,7 @@
for_each_process(tsk) {
if (!tsk->mm)
continue;
- tsk_context = tsk->mm->context;
+ tsk_context = tsk->mm->context.ctx;
if (tsk_context = ia64_ctx.next) {
if (++ia64_ctx.next >= ia64_ctx.limit) {
/* empty range: reset the range limit and start over */
diff -Naur linux_base/arch/ia64/sn/kernel/sn2/sn2_smp.c linux/arch/ia64/sn/kernel/sn2/sn2_smp.c
--- linux_base/arch/ia64/sn/kernel/sn2/sn2_smp.c Tue Nov 25 10:03:46 2003
+++ linux/arch/ia64/sn/kernel/sn2/sn2_smp.c Tue Nov 25 10:42:34 2003
@@ -98,6 +99,7 @@
int cnode, mycnode, nasid, flushed=0;
volatile unsigned long *ptc0, *ptc1;
unsigned long flags=0, data0, data1;
+ struct mm_struct *mm=current->active_mm;
data0 = (1UL<<SH_PTC_0_A_SHFT) |
(nbits<<SH_PTC_0_PS_SHFT) |
@@ -113,9 +115,8 @@
do {
data1 = start | (1UL<<SH_PTC_1_START_SHFT);
- for (cnode = 0; cnode < numnodes; cnode++) {
- if (is_headless_node(cnode))
- continue;
+ for (cnode=find_first_bit(&mm->context.node_history, numnodes); cnode < numnodes;
+ cnode=find_next_bit(&mm->context.node_history, numnodes, ++cnode)) {
if (cnode = mycnode) {
asm volatile ("ptc.ga %0,%1;;srlz.i;;" :: "r"(start), "r"(nbits<<2) : "memory");
} else {
diff -Naur linux_base/include/asm-ia64/mmu.h linux/include/asm-ia64/mmu.h
--- linux_base/include/asm-ia64/mmu.h Tue Nov 25 10:03:47 2003
+++ linux/include/asm-ia64/mmu.h Tue Nov 25 10:45:19 2003
@@ -1,11 +1,20 @@
#ifndef __MMU_H
#define __MMU_H
+#ifdef CONFIG_NUMA
+#include <linux/cpumask.h>
+#endif
+
/*
* Type for a context number. We declare it volatile to ensure proper ordering when it's
* accessed outside of spinlock'd critical sections (e.g., as done in activate_mm() and
* init_new_context()).
*/
-typedef volatile unsigned long mm_context_t;
+typedef struct {
+ volatile unsigned long ctx;
+#ifdef CONFIG_NUMA
+ cpumask_t node_history; /* ZZZ change to nodemask_t when avail */
+#endif
+} mm_context_t;
#endif
diff -Naur linux_base/include/asm-ia64/mmu_context.h linux/include/asm-ia64/mmu_context.h
--- linux_base/include/asm-ia64/mmu_context.h Tue Nov 25 10:03:47 2003
+++ linux/include/asm-ia64/mmu_context.h Tue Nov 25 11:03:41 2003
@@ -75,6 +75,12 @@
{
}
+static inline void
+clear_mm_context(struct mm_struct *mm)
+{
+ memset(&mm->context, 0, sizeof(mm->context));
+}
+
/*
* When the context counter wraps around all TLBs need to be flushed because an old
* context number might have been reused. This is signalled by the ia64_need_tlb_flush
@@ -92,26 +98,27 @@
}
}
-static inline mm_context_t
+static inline unsigned long
get_mmu_context (struct mm_struct *mm)
{
- mm_context_t context = mm->context;
+ mm_context_t *context = &mm->context;
+ unsigned long ctx = context->ctx;
- if (context)
- return context;
+ if (ctx)
+ return ctx;
spin_lock(&ia64_ctx.lock);
{
/* re-check, now that we've got the lock: */
- context = mm->context;
- if (context = 0) {
+ ctx = context->ctx;
+ if (ctx = 0) {
if (ia64_ctx.next >= ia64_ctx.limit)
wrap_mmu_context(mm);
- mm->context = context = ia64_ctx.next++;
+ context->ctx = ctx = ia64_ctx.next++;
}
}
spin_unlock(&ia64_ctx.lock);
- return context;
+ return ctx;
}
/*
@@ -122,7 +129,7 @@
init_new_context (struct task_struct *p, struct mm_struct *mm)
{
MMU_TRACE('N', smp_processor_id(), mm, 0);
- mm->context = 0;
+ clear_mm_context(mm);
return 0;
}
@@ -134,7 +141,7 @@
}
static inline void
-reload_context (mm_context_t context)
+reload_context (unsigned int context)
{
unsigned long rid;
unsigned long rid_incr = 0;
@@ -164,15 +171,18 @@
static inline void
activate_context (struct mm_struct *mm)
{
- mm_context_t context;
+ unsigned long ctx;
+#ifdef CONFIG_NUMA
+ set_bit(numa_node_id(), &mm->context.node_history);
+#endif
do {
- context = get_mmu_context(mm);
+ ctx = get_mmu_context(mm);
MMU_TRACE('A', smp_processor_id(), mm, context);
- reload_context(context);
+ reload_context(ctx);
MMU_TRACE('a', smp_processor_id(), mm, context);
/* in the unlikely event of a TLB-flush by another thread, redo the load: */
- } while (unlikely(context != mm->context));
+ } while (unlikely(ctx != mm->context.ctx));
}
#define deactivate_mm(tsk,mm) \
diff -Naur linux_base/include/asm-ia64/tlbflush.h linux/include/asm-ia64/tlbflush.h
--- linux_base/include/asm-ia64/tlbflush.h Tue Nov 25 10:03:47 2003
+++ linux/include/asm-ia64/tlbflush.h Tue Nov 25 10:47:48 2003
@@ -52,7 +52,7 @@
if (!mm)
goto out;
- mm->context = 0;
+ clear_mm_context(mm);
if (atomic_read(&mm->mm_users) = 0)
goto out; /* happens as a result of exit_mmap() */
diff -Naur linux_base/mm/memory.c linux/mm/memory.c
--- linux_base/mm/memory.c Tue Nov 25 10:03:50 2003
+++ linux/mm/memory.c Tue Nov 25 10:55:00 2003
@@ -572,9 +572,10 @@
if ((long)zap_bytes > 0)
continue;
if (need_resched()) {
+ int fullmm = (*tlbp)->fullmm;
tlb_finish_mmu(*tlbp, tlb_start, start);
cond_resched_lock(&mm->page_table_lock);
- *tlbp = tlb_gather_mmu(mm, 0);
+ *tlbp = tlb_gather_mmu(mm, fullmm);
tlb_start_valid = 0;
}
zap_bytes = ZAP_BLOCK_SIZE;
--
Thanks
Jack Steiner (steiner@sgi.com) 651-683-5302
Principal Engineer SGI - Silicon Graphics, Inc.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2003-12-01 21:42 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-12-01 21:42 TLB flushing on SGI platforms David Mosberger
-- strict thread matches above, loose matches on Subject: below --
2003-11-26 15:43 Jack Steiner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox