public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* Re: TLB flushing on SGI platforms
@ 2003-12-01 21:42 David Mosberger
  0 siblings, 0 replies; 2+ messages in thread
From: David Mosberger @ 2003-12-01 21:42 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Wed, 26 Nov 2003 09:43:01 -0600, Jack Steiner <steiner@sgi.com> said:

  Jack> The SGI NUMA platform does not use the hardware "ptc" instruction
  Jack> to flush TLBs. Instead, it has to write an MMR on the chipset on each
  Jack> node to cause a TLB flush transaction to be placed on the bus. On
  Jack> large systems, the overhead to broadcast the TLB flush to every node
  Jack> in the system is one of the hot spots in the kernel.

  Jack> In most cases, the TLB context being flushed has been loaded into
  Jack> a small subset of the nodes. Flushing every node is unnecessary.

  Jack> I'm looking for suggestions on the best way to limit TLB flushing so
  Jack> that only the necessary nodes are flushed. Here is patch that
  Jack> I believe will work. I added a bitmask to the mm_context_t to
  Jack> track nodes where the context has been loaded. The TLB flush routine
  Jack> issues the TLB flush requests only to these nodes.

  Jack> Are there other/better ways that I can do this??

Why not use mm->cpu_vm_mask?  It will give you CPU instead of
node-granularity, but if you really want nodes instead, you can do the
mapping in the NUMA-specific code.

	--david

^ permalink raw reply	[flat|nested] 2+ messages in thread
* TLB flushing on SGI platforms
@ 2003-11-26 15:43 Jack Steiner
  0 siblings, 0 replies; 2+ messages in thread
From: Jack Steiner @ 2003-11-26 15:43 UTC (permalink / raw)
  To: linux-ia64


The SGI NUMA platform does not use the hardware "ptc" instruction
to flush TLBs. Instead, it has to write an MMR on the chipset on each
node to cause a TLB flush transaction to be placed on the bus. On
large systems, the overhead to broadcast the TLB flush to every node
in the system is one of the hot spots in the kernel.

In most cases, the TLB context being flushed has been loaded into
a small subset of the nodes. Flushing every node is unnecessary.

I'm looking for suggestions on the best way to limit TLB flushing so
that only the necessary nodes are flushed. Here is patch that
I believe will work. I added a bitmask to the mm_context_t to
track nodes where the context has been loaded. The TLB flush routine
issues the TLB flush requests only to these nodes.

Are there other/better ways that I can do this??


------------------------------------------------------------------------------



diff -Naur linux_base/arch/ia64/mm/tlb.c linux/arch/ia64/mm/tlb.c
--- linux_base/arch/ia64/mm/tlb.c	Tue Nov 25 10:03:46 2003
+++ linux/arch/ia64/mm/tlb.c	Tue Nov 25 10:41:25 2003
@@ -59,7 +59,7 @@
 	for_each_process(tsk) {
 		if (!tsk->mm)
 			continue;
-		tsk_context = tsk->mm->context;
+		tsk_context = tsk->mm->context.ctx;
 		if (tsk_context = ia64_ctx.next) {
 			if (++ia64_ctx.next >= ia64_ctx.limit) {
 				/* empty range: reset the range limit and start over */


diff -Naur linux_base/arch/ia64/sn/kernel/sn2/sn2_smp.c linux/arch/ia64/sn/kernel/sn2/sn2_smp.c
--- linux_base/arch/ia64/sn/kernel/sn2/sn2_smp.c	Tue Nov 25 10:03:46 2003
+++ linux/arch/ia64/sn/kernel/sn2/sn2_smp.c	Tue Nov 25 10:42:34 2003
@@ -98,6 +99,7 @@
 	int			cnode, mycnode, nasid, flushed=0;
 	volatile unsigned	long	*ptc0, *ptc1;
 	unsigned long		flags=0, data0, data1;
+	struct mm_struct	*mm=current->active_mm;
 
 	data0 = (1UL<<SH_PTC_0_A_SHFT) |
 		(nbits<<SH_PTC_0_PS_SHFT) |
@@ -113,9 +115,8 @@
 
 	do {
 		data1 = start | (1UL<<SH_PTC_1_START_SHFT);
-		for (cnode = 0; cnode < numnodes; cnode++) {
-			if (is_headless_node(cnode))
-				continue;
+		for (cnode=find_first_bit(&mm->context.node_history, numnodes); cnode < numnodes; 
+				cnode=find_next_bit(&mm->context.node_history, numnodes, ++cnode)) {
 			if (cnode = mycnode) {
 				asm volatile ("ptc.ga %0,%1;;srlz.i;;" :: "r"(start), "r"(nbits<<2) : "memory");
 			} else {


diff -Naur linux_base/include/asm-ia64/mmu.h linux/include/asm-ia64/mmu.h
--- linux_base/include/asm-ia64/mmu.h	Tue Nov 25 10:03:47 2003
+++ linux/include/asm-ia64/mmu.h	Tue Nov 25 10:45:19 2003
@@ -1,11 +1,20 @@
 #ifndef __MMU_H
 #define __MMU_H
 
+#ifdef CONFIG_NUMA
+#include <linux/cpumask.h>
+#endif
+
 /*
  * Type for a context number.  We declare it volatile to ensure proper ordering when it's
  * accessed outside of spinlock'd critical sections (e.g., as done in activate_mm() and
  * init_new_context()).
  */
-typedef volatile unsigned long mm_context_t;
+typedef struct {
+	volatile unsigned long ctx;
+#ifdef CONFIG_NUMA
+	cpumask_t node_history;			/* ZZZ change to nodemask_t when avail */
+#endif
+} mm_context_t;
 
 #endif


diff -Naur linux_base/include/asm-ia64/mmu_context.h linux/include/asm-ia64/mmu_context.h
--- linux_base/include/asm-ia64/mmu_context.h	Tue Nov 25 10:03:47 2003
+++ linux/include/asm-ia64/mmu_context.h	Tue Nov 25 11:03:41 2003
@@ -75,6 +75,12 @@
 {
 }
 
+static inline void
+clear_mm_context(struct mm_struct *mm)
+{
+	memset(&mm->context, 0, sizeof(mm->context));
+}
+
 /*
  * When the context counter wraps around all TLBs need to be flushed because an old
  * context number might have been reused. This is signalled by the ia64_need_tlb_flush
@@ -92,26 +98,27 @@
 	}
 }
 
-static inline mm_context_t
+static inline unsigned long
 get_mmu_context (struct mm_struct *mm)
 {
-	mm_context_t context = mm->context;
+	mm_context_t *context = &mm->context;
+	unsigned long ctx = context->ctx;
 
-	if (context)
-		return context;
+	if (ctx)
+		return ctx;
 
 	spin_lock(&ia64_ctx.lock);
 	{
 		/* re-check, now that we've got the lock: */
-		context = mm->context;
-		if (context = 0) {
+		ctx = context->ctx;
+		if (ctx = 0) {
 			if (ia64_ctx.next >= ia64_ctx.limit)
 				wrap_mmu_context(mm);
-			mm->context = context = ia64_ctx.next++;
+			context->ctx = ctx = ia64_ctx.next++;
 		}
 	}
 	spin_unlock(&ia64_ctx.lock);
-	return context;
+	return ctx;
 }
 
 /*
@@ -122,7 +129,7 @@
 init_new_context (struct task_struct *p, struct mm_struct *mm)
 {
 	MMU_TRACE('N', smp_processor_id(), mm, 0);
-	mm->context = 0;
+	clear_mm_context(mm);
 	return 0;
 }
 
@@ -134,7 +141,7 @@
 }
 
 static inline void
-reload_context (mm_context_t context)
+reload_context (unsigned int context)
 {
 	unsigned long rid;
 	unsigned long rid_incr = 0;
@@ -164,15 +171,18 @@
 static inline void
 activate_context (struct mm_struct *mm)
 {
-	mm_context_t context;
+	unsigned long ctx;
 
+#ifdef CONFIG_NUMA
+	set_bit(numa_node_id(), &mm->context.node_history);
+#endif
 	do {
-		context = get_mmu_context(mm);
+		ctx = get_mmu_context(mm);
 		MMU_TRACE('A', smp_processor_id(), mm, context);
-		reload_context(context);
+		reload_context(ctx);
 		MMU_TRACE('a', smp_processor_id(), mm, context);
 		/* in the unlikely event of a TLB-flush by another thread, redo the load: */
-	} while (unlikely(context != mm->context));
+	} while (unlikely(ctx != mm->context.ctx));
 }
 
 #define deactivate_mm(tsk,mm)					\



diff -Naur linux_base/include/asm-ia64/tlbflush.h linux/include/asm-ia64/tlbflush.h
--- linux_base/include/asm-ia64/tlbflush.h	Tue Nov 25 10:03:47 2003
+++ linux/include/asm-ia64/tlbflush.h	Tue Nov 25 10:47:48 2003
@@ -52,7 +52,7 @@
 	if (!mm)
 		goto out;
 
-	mm->context = 0;
+	clear_mm_context(mm);
 
 	if (atomic_read(&mm->mm_users) = 0)
 		goto out;		/* happens as a result of exit_mmap() */


diff -Naur linux_base/mm/memory.c linux/mm/memory.c
--- linux_base/mm/memory.c	Tue Nov 25 10:03:50 2003
+++ linux/mm/memory.c	Tue Nov 25 10:55:00 2003
@@ -572,9 +572,10 @@
 			if ((long)zap_bytes > 0)
 				continue;
 			if (need_resched()) {
+				int fullmm = (*tlbp)->fullmm;
 				tlb_finish_mmu(*tlbp, tlb_start, start);
 				cond_resched_lock(&mm->page_table_lock);
-				*tlbp = tlb_gather_mmu(mm, 0);
+				*tlbp = tlb_gather_mmu(mm, fullmm);
 				tlb_start_valid = 0;
 			}
 			zap_bytes = ZAP_BLOCK_SIZE;
-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2003-12-01 21:42 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-12-01 21:42 TLB flushing on SGI platforms David Mosberger
  -- strict thread matches above, loose matches on Subject: below --
2003-11-26 15:43 Jack Steiner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox