public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] - Improve SN2 TLB flushing algorithms
@ 2004-01-28 20:59 Jack Steiner
  2004-01-28 21:17 ` Christoph Hellwig
                   ` (12 more replies)
  0 siblings, 13 replies; 14+ messages in thread
From: Jack Steiner @ 2004-01-28 20:59 UTC (permalink / raw)
  To: linux-ia64

The SGI NUMA platform does not use the hardware "ptc" instruction
to flush TLBs. Instead, it has to write an MMR on the chipset on each
node to cause a TLB flush transaction to be placed on the bus. On
large systems, the overhead to broadcast the TLB flush to every node
in the system is one of the hot spots in the kernel.

In most cases, the TLB context being flushed has been loaded into
a small subset of the nodes. Flushing every node is unnecessary.

This patch uses the cpu_vm_mask to track cpus that have loaded a context. 
TLB's are flushed only on these nodes.


(This patch is an update to a patch proposed in Dec. It incorporates
suggestions from David M that substantially improves the patch).




--- linux.base/./include/asm-ia64/mmu_context.h	Fri Jan  9 00:59:09 2004
+++ linux/./include/asm-ia64/mmu_context.h	Tue Jan 27 12:56:13 2004
@@ -21,6 +21,7 @@
 
 # ifndef __ASSEMBLY__
 
+#include <linux/config.h>
 #include <linux/compiler.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
@@ -106,6 +107,9 @@
 		/* re-check, now that we've got the lock: */
 		context = mm->context;
 		if (context = 0) {
+#ifdef CONFIG_NUMA
+			cpus_clear(mm->cpu_vm_mask);
+#endif
 			if (ia64_ctx.next >= ia64_ctx.limit)
 				wrap_mmu_context(mm);
 			mm->context = context = ia64_ctx.next++;
@@ -170,6 +174,10 @@
 	do {
 		context = get_mmu_context(mm);
 		MMU_TRACE('A', smp_processor_id(), mm, context);
+#ifdef CONFIG_NUMA
+		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+#endif
 		reload_context(context);
 		MMU_TRACE('a', smp_processor_id(), mm, context);
 		/* in the unlikely event of a TLB-flush by another thread, redo the load: */



--- linux.base/./arch/ia64/sn/kernel/sn2/sn2_smp.c	Mon Jan 26 17:06:03 2004
+++ linux/./arch/ia64/sn/kernel/sn2/sn2_smp.c	Tue Jan 27 10:28:30 2004
@@ -26,6 +26,8 @@
 #include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/smp.h>
+#include <asm/numa.h>
+#include <asm/bitops.h>
 #include <asm/hw_irq.h>
 #include <asm/current.h>
 #include <asm/sn/sn_cpuid.h>
@@ -34,6 +36,13 @@
 #include <asm/sn/nodepda.h>
 #include <asm/sn/rw_mmr.h>
 
+/* When nodemask_t is available, delete the following definitions */
+#define NODEMASK_WORDCOUNT       ((NR_NODES+(BITS_PER_LONG-1))/BITS_PER_LONG)
+#define NODE_MASK_ALL    { [0 ... ((NR_NODES+BITS_PER_LONG-1)/BITS_PER_LONG)-1] = ~0UL }
+#define NODE_MASK_NONE   { [0 ... ((NR_NODES+BITS_PER_LONG-1)/BITS_PER_LONG)-1] = 0 }
+typedef unsigned long   nodemask_t[NODEMASK_WORDCOUNT];
+
+
 void sn2_ptc_deadlock_recovery(unsigned long data0, unsigned long data1);
 
 
@@ -66,14 +75,52 @@
  *
  * Purges the translation caches of all processors of the given virtual address
  * range.
+ *
+ * Note:
+ * 	- cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
+ * 	- cpu_vm_mask is converted into a nodemask of the nodes containing the
+ * 	  cpus in cpu_vm_mask.
+ *	- if only one bit is set in cpu_vm_mask & it is the current cpu,
+ *	  then only the local TLB needs to be flushed. This flushing can be done
+ *	  using ptc.l. This is the common case & avoids the global spinlock.
+ *	- if multiple cpus have loaded the context, then flushing has to be
+ *	  done with ptc.g/MMRs under protection of the global ptc_lock.
  */
 
 void
 sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
 {
-	int			cnode, mycnode, nasid, flushed=0;
+	int			i, cnode, mynasid, cpu, lcpu=0, nasid, flushed=0;
 	volatile unsigned	long	*ptc0, *ptc1;
 	unsigned long		flags=0, data0, data1;
+	struct mm_struct	*mm=current->active_mm;
+	nodemask_t		nodes_flushed=NODE_MASK_NONE;
+	short			nasids[NR_NODES], nix;
+
+	for (i=0, cpu=find_first_bit(&mm->cpu_vm_mask, NR_CPUS); cpu < NR_CPUS;
+			i++, cpu=find_next_bit(&mm->cpu_vm_mask, NR_CPUS, ++cpu)) {
+		cnode = cpu_to_node(cpu);
+		__set_bit(cnode, nodes_flushed);
+		lcpu = cpu;
+	}
+
+	preempt_disable();
+
+	if (likely(i = 1 && lcpu = smp_processor_id())) {
+		do {
+			asm volatile ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory");
+			start += (1UL << nbits);
+		} while (start < end);
+		ia64_srlz_i();
+		preempt_enable();
+		return;
+	}
+
+	nix = 0;
+	for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; 
+			cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode))
+		nasids[nix++] = cnodeid_to_nasid(cnode);
+
 
 	data0 = (1UL<<SH_PTC_0_A_SHFT) |
 		(nbits<<SH_PTC_0_PS_SHFT) |
@@ -83,19 +130,18 @@
 	ptc0 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_0);
 	ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1);
 
-	mycnode = numa_node_id();
+
+	mynasid = smp_physical_node_id();
 
 	spin_lock_irqsave(&sn2_global_ptc_lock, flags);
 
 	do {
 		data1 = start | (1UL<<SH_PTC_1_START_SHFT);
-		for (cnode = 0; cnode < numnodes; cnode++) {
-			if (is_headless_node(cnode))
-				continue;
-			if (cnode = mycnode) {
+		for (i=0; i<nix; i++) {
+			nasid = nasids[i];
+			if (likely(nasid = mynasid)) {
 				asm volatile ("ptc.ga %0,%1;;srlz.i;;" :: "r"(start), "r"(nbits<<2) : "memory");
 			} else {
-				nasid = cnodeid_to_nasid(cnode);
 				ptc0 = CHANGE_NASID(nasid, ptc0);
 				ptc1 = CHANGE_NASID(nasid, ptc1);
 				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
@@ -113,6 +159,7 @@
 
 	spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
 
+	preempt_enable();
 }
 
 /*
@@ -218,3 +265,4 @@
 
 	sn_send_IPI_phys(physid, vector, delivery_mode);
 }
+EXPORT_SYMBOL(sn2_send_IPI);
-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
@ 2004-01-28 21:17 ` Christoph Hellwig
  2004-01-28 22:36 ` Jack Steiner
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2004-01-28 21:17 UTC (permalink / raw)
  To: linux-ia64

On Wed, Jan 28, 2004 at 02:59:12PM -0600, Jack Steiner wrote:
> +#ifdef CONFIG_NUMA
> +			cpus_clear(mm->cpu_vm_mask);
> +#endif

I really hate this ifdefs all over the place.  Does this really hurt that
much on non-NUMA systems?  Also SN2 seems to use this code always so
CONFIG_NUMA looks like the wrong ifdef to me.  

> +#ifdef CONFIG_NUMA
> +		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
> +			cpu_set(smp_processor_id(), mm->cpu_vm_mask);

cpu_test_and_set()?

> +/* When nodemask_t is available, delete the following definitions */
> +#define NODEMASK_WORDCOUNT       ((NR_NODES+(BITS_PER_LONG-1))/BITS_PER_LONG)
> +#define NODE_MASK_ALL    { [0 ... ((NR_NODES+BITS_PER_LONG-1)/BITS_PER_LONG)-1] = ~0UL }
> +#define NODE_MASK_NONE   { [0 ... ((NR_NODES+BITS_PER_LONG-1)/BITS_PER_LONG)-1] = 0 }
> +typedef unsigned long   nodemask_t[NODEMASK_WORDCOUNT];

Don't we have the generic bitmask code merged now?
>
> +	for (i=0, cpu=find_first_bit(&mm->cpu_vm_mask, NR_CPUS); cpu < NR_CPUS;
> +			i++, cpu=find_next_bit(&mm->cpu_vm_mask, NR_CPUS, ++cpu)) {

This assumes a specific cpumask_t implementation.  You should just use
for_each_cpu_mask()

> @@ -218,3 +265,4 @@
>  
>  	sn_send_IPI_phys(physid, vector, delivery_mode);
>  }
> +EXPORT_SYMBOL(sn2_send_IPI);

What's this?


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
  2004-01-28 21:17 ` Christoph Hellwig
@ 2004-01-28 22:36 ` Jack Steiner
  2004-01-28 23:57 ` Peter Chubb
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Jack Steiner @ 2004-01-28 22:36 UTC (permalink / raw)
  To: linux-ia64

On Wed, Jan 28, 2004 at 09:17:59PM +0000, Christoph Hellwig wrote:
> On Wed, Jan 28, 2004 at 02:59:12PM -0600, Jack Steiner wrote:
> > +#ifdef CONFIG_NUMA
> > +			cpus_clear(mm->cpu_vm_mask);
> > +#endif
> 
> I really hate this ifdefs all over the place.  Does this really hurt that
> much on non-NUMA systems?  Also SN2 seems to use this code always so
> CONFIG_NUMA looks like the wrong ifdef to me.  

Are you suggesting that we remove the #ifdef OR hide the code
in a function that, depending on config options, may or may not do anything?

The code is needed on kernels built for SN2. That includes both
CONFIG_IA64_GENERIC & CONFIG_IA64_SGI_SN2. I agree that CONFIG_NUMA
is not a great choice but nothing else seemed appropriate. 

I could either delete the #ifdef, or switch it to 
"#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_SGI_SN2)".
The latter could be hidden in an inline function. I cant think of any
other options.

Which looks best or is there a better approach.


> 
> > +#ifdef CONFIG_NUMA
> > +		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
> > +			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
> 
> cpu_test_and_set()?

On IA64 (not sure about other architectures), cpu_test_and_set will always 
set the bit regardless of it's previous state. That causes the cacheline containing
the bitmask to be bounced between cpus - possibly unnecessarily. For most
applications this may not matter. However, for large OpenMP apps, this may add additional
overhead. I was trying to avoid this extra overhead.

In addition, the code in include/asm-sparc64/mmu_context.h is similar (activate_mm).
I assume (just a guess) that they were trying to avoid the same problem.



> 
> > +/* When nodemask_t is available, delete the following definitions */
> > +#define NODEMASK_WORDCOUNT       ((NR_NODES+(BITS_PER_LONG-1))/BITS_PER_LONG)
> > +#define NODE_MASK_ALL    { [0 ... ((NR_NODES+BITS_PER_LONG-1)/BITS_PER_LONG)-1] = ~0UL }
> > +#define NODE_MASK_NONE   { [0 ... ((NR_NODES+BITS_PER_LONG-1)/BITS_PER_LONG)-1] = 0 }
> > +typedef unsigned long   nodemask_t[NODEMASK_WORDCOUNT];
> 
> Don't we have the generic bitmask code merged now?

Agree. Will change.


> >
> > +	for (i=0, cpu=find_first_bit(&mm->cpu_vm_mask, NR_CPUS); cpu < NR_CPUS;
> > +			i++, cpu=find_next_bit(&mm->cpu_vm_mask, NR_CPUS, ++cpu)) {
> 
> This assumes a specific cpumask_t implementation.  You should just use
> for_each_cpu_mask()

Agree. Will change.


> 
> > @@ -218,3 +265,4 @@
> >  
> >  	sn_send_IPI_phys(physid, vector, delivery_mode);
> >  }
> > +EXPORT_SYMBOL(sn2_send_IPI);
> 
> What's this?

Whoops - different patch. I'll delete it.


-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
  2004-01-28 21:17 ` Christoph Hellwig
  2004-01-28 22:36 ` Jack Steiner
@ 2004-01-28 23:57 ` Peter Chubb
  2004-01-29  0:38 ` David Mosberger
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Peter Chubb @ 2004-01-28 23:57 UTC (permalink / raw)
  To: linux-ia64

>>>>> "Jack" = Jack Steiner <steiner@SGI.com> writes:

Jack> On Wed, Jan 28, 2004 at 09:17:59PM +0000, Christoph Hellwig
Jack> wrote:
>> On Wed, Jan 28, 2004 at 02:59:12PM -0600, Jack Steiner wrote: >
>> +#ifdef CONFIG_NUMA > + cpus_clear(mm->cpu_vm_mask); > +#endif
>> 
>> I really hate this ifdefs all over the place.  Does this really
>> hurt that much on non-NUMA systems?  Also SN2 seems to use this
>> code always so CONFIG_NUMA looks like the wrong ifdef to me.

Jack> Are you suggesting that we remove the #ifdef OR hide the code in
Jack> a function that, depending on config options, may or may not do
Jack> anything?

Jack> The code is needed on kernels built for SN2. That includes both
Jack> CONFIG_IA64_GENERIC & CONFIG_IA64_SGI_SN2. I agree that
Jack> CONFIG_NUMA is not a great choice but nothing else seemed
Jack> appropriate.

As CONFIG_IA64_GENERIC is a superset of CONFIG_IA64_SGI_SN2
why not make CONFIG_IA64_GENERIC turn on CONFIG_IA64_SGI_SN2   and
then only test for the latter.

And yes, using a function that disappears or does nothing on platforms
for which it is inappropriate is a better approach than littering
#ifdefs though the code.

--
Dr Peter Chubb  http://www.gelato.unsw.edu.au  peterc AT gelato.unsw.edu.au
The technical we do immediately,  the political takes *forever*

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (2 preceding siblings ...)
  2004-01-28 23:57 ` Peter Chubb
@ 2004-01-29  0:38 ` David Mosberger
  2004-01-29  1:13 ` Jack Steiner
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: David Mosberger @ 2004-01-29  0:38 UTC (permalink / raw)
  To: linux-ia64

Jack,

As others pointed out, the CONFIG_NUMA are rather ugly here:

 > @@ -106,6 +107,9 @@
 >  		/* re-check, now that we've got the lock: */
 >  		context = mm->context;
 >  		if (context = 0) {
 > +#ifdef CONFIG_NUMA
 > +			cpus_clear(mm->cpu_vm_mask);
 > +#endif
 >  			if (ia64_ctx.next >= ia64_ctx.limit)
 >  				wrap_mmu_context(mm);
 >  			mm->context = context = ia64_ctx.next++;
 > @@ -170,6 +174,10 @@
 >  	do {
 >  		context = get_mmu_context(mm);
 >  		MMU_TRACE('A', smp_processor_id(), mm, context);
 > +#ifdef CONFIG_NUMA
 > +		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
 > +			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
 > +#endif
 >  		reload_context(context);
 >  		MMU_TRACE('a', smp_processor_id(), mm, context);
 >  		/* in the unlikely event of a TLB-flush by another thread, redo the load: */

I'd be OK with unconditionally maintaining cpu_vm_mask but only if
cpu_vm_mask falls into the same cache-line as mm->context.  AFAIK,
this _used_ to be the case, but recently some (large) members were
added between "cpu_vm_mask" and "context".  I suspect that's just a
mistake.  Could you float a patch on lkml to fix this?

	--david

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (3 preceding siblings ...)
  2004-01-29  0:38 ` David Mosberger
@ 2004-01-29  1:13 ` Jack Steiner
  2004-01-29  3:11 ` Matthew Wilcox
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Jack Steiner @ 2004-01-29  1:13 UTC (permalink / raw)
  To: linux-ia64

On Thu, Jan 29, 2004 at 10:57:49AM +1100, Peter Chubb wrote:
> >>>>> "Jack" = Jack Steiner <steiner@SGI.com> writes:
> 
> Jack> On Wed, Jan 28, 2004 at 09:17:59PM +0000, Christoph Hellwig
> Jack> wrote:
> >> On Wed, Jan 28, 2004 at 02:59:12PM -0600, Jack Steiner wrote: >
> >> +#ifdef CONFIG_NUMA > + cpus_clear(mm->cpu_vm_mask); > +#endif
> >> 
> >> I really hate this ifdefs all over the place.  Does this really
> >> hurt that much on non-NUMA systems?  Also SN2 seems to use this
> >> code always so CONFIG_NUMA looks like the wrong ifdef to me.
> 
> Jack> Are you suggesting that we remove the #ifdef OR hide the code in
> Jack> a function that, depending on config options, may or may not do
> Jack> anything?

I dont have a strong opinion. Either is acceptible to me.
David suggested removing the #ifdef iff we can move cpu_vm_mask
closer to the mm_context_t. I'll pursue this....


> 
> Jack> The code is needed on kernels built for SN2. That includes both
> Jack> CONFIG_IA64_GENERIC & CONFIG_IA64_SGI_SN2. I agree that
> Jack> CONFIG_NUMA is not a great choice but nothing else seemed
> Jack> appropriate.
> 
> As CONFIG_IA64_GENERIC is a superset of CONFIG_IA64_SGI_SN2
> why not make CONFIG_IA64_GENERIC turn on CONFIG_IA64_SGI_SN2   and
> then only test for the latter.

This wont work with the current usage of the CONFIG options. For 
example:
	# if defined (CONFIG_IA64_HP_SIM)
	#  include <asm/machvec_hpsim.h>
	# elif defined (CONFIG_IA64_DIG)
	#  include <asm/machvec_dig.h>
	# elif defined (CONFIG_IA64_HP_ZX1)
	#  include <asm/machvec_hpzx1.h>
	# elif defined (CONFIG_IA64_SGI_SN2)
	#  include <asm/machvec_sn2.h>
	# elif defined (CONFIG_IA64_GENERIC)
	....

(It sounded like a good idea though. Most but not all places that check
CONFIG_IA64_SGI_SN2 also check for CONFIG_IA64_GENERIC).


> 
> And yes, using a function that disappears or does nothing on platforms
> for which it is inappropriate is a better approach than littering
> #ifdefs though the code.
> 
> --
> Dr Peter Chubb  http://www.gelato.unsw.edu.au  peterc AT gelato.unsw.edu.au
> The technical we do immediately,  the political takes *forever*

-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (4 preceding siblings ...)
  2004-01-29  1:13 ` Jack Steiner
@ 2004-01-29  3:11 ` Matthew Wilcox
  2004-01-29  4:00 ` Jack Steiner
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Matthew Wilcox @ 2004-01-29  3:11 UTC (permalink / raw)
  To: linux-ia64

On Wed, Jan 28, 2004 at 07:13:42PM -0600, Jack Steiner wrote:
> This wont work with the current usage of the CONFIG options. For 
> example:
> 	# if defined (CONFIG_IA64_HP_SIM)
> 	#  include <asm/machvec_hpsim.h>
> 	# elif defined (CONFIG_IA64_DIG)
> 	#  include <asm/machvec_dig.h>
> 	# elif defined (CONFIG_IA64_HP_ZX1)
> 	#  include <asm/machvec_hpzx1.h>
> 	# elif defined (CONFIG_IA64_SGI_SN2)
> 	#  include <asm/machvec_sn2.h>
> 	# elif defined (CONFIG_IA64_GENERIC)
> 	....
> 
> (It sounded like a good idea though. Most but not all places that check
> CONFIG_IA64_SGI_SN2 also check for CONFIG_IA64_GENERIC).

This one can be easily fixed by testing *first* for CONFIG_IA64_GENERIC,
then for all the other options.  Any other places that won't work?

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (5 preceding siblings ...)
  2004-01-29  3:11 ` Matthew Wilcox
@ 2004-01-29  4:00 ` Jack Steiner
  2004-01-29 13:40 ` Christoph Hellwig
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Jack Steiner @ 2004-01-29  4:00 UTC (permalink / raw)
  To: linux-ia64

On Thu, Jan 29, 2004 at 03:11:42AM +0000, Matthew Wilcox wrote:
> On Wed, Jan 28, 2004 at 07:13:42PM -0600, Jack Steiner wrote:
> > This wont work with the current usage of the CONFIG options. For 
> > example:
> > 	# if defined (CONFIG_IA64_HP_SIM)
> > 	#  include <asm/machvec_hpsim.h>
> > 	# elif defined (CONFIG_IA64_DIG)
> > 	#  include <asm/machvec_dig.h>
> > 	# elif defined (CONFIG_IA64_HP_ZX1)
> > 	#  include <asm/machvec_hpzx1.h>
> > 	# elif defined (CONFIG_IA64_SGI_SN2)
> > 	#  include <asm/machvec_sn2.h>
> > 	# elif defined (CONFIG_IA64_GENERIC)
> > 	....
> > 
> > (It sounded like a good idea though. Most but not all places that check
> > CONFIG_IA64_SGI_SN2 also check for CONFIG_IA64_GENERIC).
> 
> This one can be easily fixed by testing *first* for CONFIG_IA64_GENERIC,
> then for all the other options.  

Sure, but I was reluctant to make that kind of change. As far as I know, most
code expects only a single "system type" to be defined. It seems like it
could prove to be error prone if these definition were order dependent.

If the ordering was incorrect, code could compile ok but might not run on
all platforms if the SN2 version of code was used instead of the generic version.


> Any other places that won't work?

Most of the code looks ok.
AFAIK, we havent tested generic 2.6 kernels on SN2 (Jesse - is this true)  so there may 
be other places that will have problems. But in general, I agree that proper
ordering should usually work.
	
-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (6 preceding siblings ...)
  2004-01-29  4:00 ` Jack Steiner
@ 2004-01-29 13:40 ` Christoph Hellwig
  2004-01-29 17:07 ` Jesse Barnes
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2004-01-29 13:40 UTC (permalink / raw)
  To: linux-ia64

On Wed, Jan 28, 2004 at 04:36:01PM -0600, Jack Steiner wrote:
> Are you suggesting that we remove the #ifdef OR hide the code
> in a function that, depending on config options, may or may not do anything?

Yes.  And I wonder whether we should just do it unconditionally, but
I'm not really sure.

> > > +		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
> > > +			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
> > 
> > cpu_test_and_set()?
> 
> On IA64 (not sure about other architectures), cpu_test_and_set will always 
> set the bit regardless of it's previous state. That causes the cacheline containing
> the bitmask to be bounced between cpus - possibly unnecessarily. For most

Ok.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (7 preceding siblings ...)
  2004-01-29 13:40 ` Christoph Hellwig
@ 2004-01-29 17:07 ` Jesse Barnes
  2004-01-29 22:56 ` Jack Steiner
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2004-01-29 17:07 UTC (permalink / raw)
  To: linux-ia64

On Wed, Jan 28, 2004 at 10:00:08PM -0600, Jack Steiner wrote:
> Most of the code looks ok.
> AFAIK, we havent tested generic 2.6 kernels on SN2 (Jesse - is this true)  so there may 

No, I've tested them a bit.  They seem ok so far, but I haven't put any
stress on them.

Jesse

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (8 preceding siblings ...)
  2004-01-29 17:07 ` Jesse Barnes
@ 2004-01-29 22:56 ` Jack Steiner
  2004-01-29 23:09 ` Jesse Barnes
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 14+ messages in thread
From: Jack Steiner @ 2004-01-29 22:56 UTC (permalink / raw)
  To: linux-ia64

Here is the next iteration of the patch.
I also posted a change to LKML to move cpu_vm_mask next to context.



--- linux.base/./include/asm-ia64/mmu_context.h	Fri Jan  9 00:59:09 2004
+++ linux/./include/asm-ia64/mmu_context.h	Thu Jan 29 13:08:11 2004
@@ -21,6 +21,7 @@
 
 # ifndef __ASSEMBLY__
 
+#include <linux/config.h>
 #include <linux/compiler.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
@@ -106,6 +107,7 @@
 		/* re-check, now that we've got the lock: */
 		context = mm->context;
 		if (context = 0) {
+			cpus_clear(mm->cpu_vm_mask);
 			if (ia64_ctx.next >= ia64_ctx.limit)
 				wrap_mmu_context(mm);
 			mm->context = context = ia64_ctx.next++;
@@ -170,6 +172,8 @@
 	do {
 		context = get_mmu_context(mm);
 		MMU_TRACE('A', smp_processor_id(), mm, context);
+		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
 		reload_context(context);
 		MMU_TRACE('a', smp_processor_id(), mm, context);
 		/* in the unlikely event of a TLB-flush by another thread, redo the load: */




--- linux.base/./arch/ia64/sn/kernel/sn2/sn2_smp.c	Mon Jan 26 17:06:03 2004
+++ linux/./arch/ia64/sn/kernel/sn2/sn2_smp.c	Thu Jan 29 16:50:51 2004
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/init.h>
@@ -26,6 +26,8 @@
 #include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/smp.h>
+#include <asm/numa.h>
+#include <asm/bitops.h>
 #include <asm/hw_irq.h>
 #include <asm/current.h>
 #include <asm/sn/sn_cpuid.h>
@@ -66,14 +68,56 @@
  *
  * Purges the translation caches of all processors of the given virtual address
  * range.
+ *
+ * Note:
+ * 	- cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
+ * 	- cpu_vm_mask is converted into a nodemask of the nodes containing the
+ * 	  cpus in cpu_vm_mask.
+ *	- if only one bit is set in cpu_vm_mask & it is the current cpu,
+ *	  then only the local TLB needs to be flushed. This flushing can be done
+ *	  using ptc.l. This is the common case & avoids the global spinlock.
+ *	- if multiple cpus have loaded the context, then flushing has to be
+ *	  done with ptc.g/MMRs under protection of the global ptc_lock.
  */
 
 void
 sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
 {
-	int			cnode, mycnode, nasid, flushed=0;
+	int			i, cnode, mynasid, cpu, lcpu=0, nasid, flushed=0;
 	volatile unsigned	long	*ptc0, *ptc1;
 	unsigned long		flags=0, data0, data1;
+	struct mm_struct	*mm=current->active_mm;
+	short			nasids[NR_NODES], nix;
+	DECLARE_BITMAP(nodes_flushed, NR_NODES);
+
+	CLEAR_BITMAP(nodes_flushed, NR_NODES);
+
+	i = 0;
+	for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+		cnode = cpu_to_node(cpu);
+		__set_bit(cnode, nodes_flushed);
+		lcpu = cpu;
+		i++;
+		printk("cpu %d\n", cpu);
+	}
+
+	preempt_disable();
+
+	if (likely(i = 1 && lcpu = smp_processor_id())) {
+		do {
+			asm volatile ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory");
+			start += (1UL << nbits);
+		} while (start < end);
+		ia64_srlz_i();
+		preempt_enable();
+		return;
+	}
+
+	nix = 0;
+	for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; 
+			cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode))
+		nasids[nix++] = cnodeid_to_nasid(cnode);
+
 
 	data0 = (1UL<<SH_PTC_0_A_SHFT) |
 		(nbits<<SH_PTC_0_PS_SHFT) |
@@ -83,19 +127,18 @@
 	ptc0 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_0);
 	ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1);
 
-	mycnode = numa_node_id();
+
+	mynasid = smp_physical_node_id();
 
 	spin_lock_irqsave(&sn2_global_ptc_lock, flags);
 
 	do {
 		data1 = start | (1UL<<SH_PTC_1_START_SHFT);
-		for (cnode = 0; cnode < numnodes; cnode++) {
-			if (is_headless_node(cnode))
-				continue;
-			if (cnode = mycnode) {
+		for (i=0; i<nix; i++) {
+			nasid = nasids[i];
+			if (likely(nasid = mynasid)) {
 				asm volatile ("ptc.ga %0,%1;;srlz.i;;" :: "r"(start), "r"(nbits<<2) : "memory");
 			} else {
-				nasid = cnodeid_to_nasid(cnode);
 				ptc0 = CHANGE_NASID(nasid, ptc0);
 				ptc1 = CHANGE_NASID(nasid, ptc1);
 				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
@@ -113,6 +156,7 @@
 
 	spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
 
+	preempt_enable();
 }
 
 /*

-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (9 preceding siblings ...)
  2004-01-29 22:56 ` Jack Steiner
@ 2004-01-29 23:09 ` Jesse Barnes
  2004-01-30  2:22 ` Jack Steiner
  2004-02-05 21:12 ` Jack Steiner
  12 siblings, 0 replies; 14+ messages in thread
From: Jesse Barnes @ 2004-01-29 23:09 UTC (permalink / raw)
  To: linux-ia64

On Thu, Jan 29, 2004 at 04:56:00PM -0600, Jack Steiner wrote:
> --- linux.base/./include/asm-ia64/mmu_context.h	Fri Jan  9 00:59:09 2004
> +++ linux/./include/asm-ia64/mmu_context.h	Thu Jan 29 13:08:11 2004

How did you generate this patch?  linux/./... seems like an odd path (it
should be applicable with patch -p1).

> @@ -21,6 +21,7 @@
>  
>  # ifndef __ASSEMBLY__
>  
> +#include <linux/config.h>
>  #include <linux/compiler.h>
>  #include <linux/percpu.h>
>  #include <linux/sched.h>

You can drop this hunk since you no longer refer to any CONFIG_*
variables.

> +	if (likely(i = 1 && lcpu = smp_processor_id())) {
> +		do {
> +			asm volatile ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory");

Can you change this to ia64_ptcgl(start, nbits<<2) (I think that's the
right intrinsic).  I'm putting together a patch to convert all the
inline assembly in the sn2 code to use the intrinsics at David's
request, but it'll be easier if we don't add new stuff.

> -		for (cnode = 0; cnode < numnodes; cnode++) {
> -			if (is_headless_node(cnode))
> -				continue;
> -			if (cnode = mycnode) {
> +		for (i=0; i<nix; i++) {
> +			nasid = nasids[i];
> +			if (likely(nasid = mynasid)) {
>  				asm volatile ("ptc.ga %0,%1;;srlz.i;;" :: "r"(start), "r"(nbits<<2) : "memory");
>  			} else {
> -				nasid = cnodeid_to_nasid(cnode);
>  				ptc0 = CHANGE_NASID(nasid, ptc0);
>  				ptc1 = CHANGE_NASID(nasid, ptc1);
>  				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
> @@ -113,6 +156,7 @@

The above will no longer apply since that part has already been changed
to use ia64_ptcga(...).  Are you using the to-linus-2.5 BK tree?  If
not, that's probably easiest.

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (10 preceding siblings ...)
  2004-01-29 23:09 ` Jesse Barnes
@ 2004-01-30  2:22 ` Jack Steiner
  2004-02-05 21:12 ` Jack Steiner
  12 siblings, 0 replies; 14+ messages in thread
From: Jack Steiner @ 2004-01-30  2:22 UTC (permalink / raw)
  To: linux-ia64

One more time. This include the new intrinsics (sorry - I missed the fact
that this update was made).


--- linux_base/include/asm-ia64/mmu_context.h	Thu Jan 29 18:15:14 2004
+++ linux/include/asm-ia64/mmu_context.h	Thu Jan 29 18:51:53 2004
@@ -21,6 +21,7 @@
 
 # ifndef __ASSEMBLY__
 
+#include <linux/config.h>
 #include <linux/compiler.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
@@ -106,6 +107,7 @@
 		/* re-check, now that we've got the lock: */
 		context = mm->context;
 		if (context = 0) {
+			cpus_clear(mm->cpu_vm_mask);
 			if (ia64_ctx.next >= ia64_ctx.limit)
 				wrap_mmu_context(mm);
 			mm->context = context = ia64_ctx.next++;
@@ -170,6 +172,8 @@
 	do {
 		context = get_mmu_context(mm);
 		MMU_TRACE('A', smp_processor_id(), mm, context);
+		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
 		reload_context(context);
 		MMU_TRACE('a', smp_processor_id(), mm, context);
 		/* in the unlikely event of a TLB-flush by another thread, redo the load: */



--- linux_base/arch/ia64/sn/kernel/sn2/sn2_smp.c	Thu Jan 29 18:14:23 2004
+++ linux/arch/ia64/sn/kernel/sn2/sn2_smp.c	Thu Jan 29 18:59:05 2004
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/init.h>
@@ -26,6 +26,8 @@
 #include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/smp.h>
+#include <asm/numa.h>
+#include <asm/bitops.h>
 #include <asm/hw_irq.h>
 #include <asm/current.h>
 #include <asm/sn/sn_cpuid.h>
@@ -66,14 +68,56 @@
  *
  * Purges the translation caches of all processors of the given virtual address
  * range.
+ *
+ * Note:
+ * 	- cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
+ * 	- cpu_vm_mask is converted into a nodemask of the nodes containing the
+ * 	  cpus in cpu_vm_mask.
+ *	- if only one bit is set in cpu_vm_mask & it is the current cpu,
+ *	  then only the local TLB needs to be flushed. This flushing can be done
+ *	  using ptc.l. This is the common case & avoids the global spinlock.
+ *	- if multiple cpus have loaded the context, then flushing has to be
+ *	  done with ptc.g/MMRs under protection of the global ptc_lock.
  */
 
 void
 sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
 {
-	int			cnode, mycnode, nasid, flushed=0;
+	int			i, cnode, mynasid, cpu, lcpu=0, nasid, flushed=0;
 	volatile unsigned	long	*ptc0, *ptc1;
 	unsigned long		flags=0, data0, data1;
+	struct mm_struct	*mm=current->active_mm;
+	short			nasids[NR_NODES], nix;
+	DECLARE_BITMAP(nodes_flushed, NR_NODES);
+
+	CLEAR_BITMAP(nodes_flushed, NR_NODES);
+
+	i = 0;
+	for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+		cnode = cpu_to_node(cpu);
+		__set_bit(cnode, nodes_flushed);
+		lcpu = cpu;
+		i++;
+		printk("cpu %d\n", cpu);
+	}
+
+	preempt_disable();
+
+	if (likely(i = 1 && lcpu = smp_processor_id())) {
+		do {
+			ia64_ptcl(start, nbits<<2);
+			start += (1UL << nbits);
+		} while (start < end);
+		ia64_srlz_i();
+		preempt_enable();
+		return;
+	}
+
+	nix = 0;
+	for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; 
+			cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode))
+		nasids[nix++] = cnodeid_to_nasid(cnode);
+
 
 	data0 = (1UL<<SH_PTC_0_A_SHFT) |
 		(nbits<<SH_PTC_0_PS_SHFT) |
@@ -83,20 +127,19 @@
 	ptc0 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_0);
 	ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1);
 
-	mycnode = numa_node_id();
+
+	mynasid = smp_physical_node_id();
 
 	spin_lock_irqsave(&sn2_global_ptc_lock, flags);
 
 	do {
 		data1 = start | (1UL<<SH_PTC_1_START_SHFT);
-		for (cnode = 0; cnode < numnodes; cnode++) {
-			if (is_headless_node(cnode))
-				continue;
-			if (cnode = mycnode) {
+		for (i=0; i<nix; i++) {
+			nasid = nasids[i];
+			if (likely(nasid = mynasid)) {
 				ia64_ptcga(start, nbits<<2);
 				ia64_srlz_i();
 			} else {
-				nasid = cnodeid_to_nasid(cnode);
 				ptc0 = CHANGE_NASID(nasid, ptc0);
 				ptc1 = CHANGE_NASID(nasid, ptc1);
 				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
@@ -114,6 +157,7 @@
 
 	spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
 
+	preempt_enable();
 }
 
 /*
-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] - Improve SN2 TLB flushing algorithms
  2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
                   ` (11 preceding siblings ...)
  2004-01-30  2:22 ` Jack Steiner
@ 2004-02-05 21:12 ` Jack Steiner
  12 siblings, 0 replies; 14+ messages in thread
From: Jack Steiner @ 2004-02-05 21:12 UTC (permalink / raw)
  To: linux-ia64

David - here is an updated patch for SN2 TLB flushing. 
In the previous patch, I forgot to delete the "#include <config.h> 
in mmu_context.h - not needed since the #ifdef's are gone....


diff -Naur linux_base/arch/ia64/sn/kernel/sn2/sn2_smp.c linux/arch/ia64/sn/kernel/sn2/sn2_smp.c
--- linux_base/arch/ia64/sn/kernel/sn2/sn2_smp.c	Thu Jan 29 18:14:23 2004
+++ linux/arch/ia64/sn/kernel/sn2/sn2_smp.c	Mon Feb  2 08:09:31 2004
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/init.h>
@@ -26,6 +26,8 @@
 #include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/smp.h>
+#include <asm/numa.h>
+#include <asm/bitops.h>
 #include <asm/hw_irq.h>
 #include <asm/current.h>
 #include <asm/sn/sn_cpuid.h>
@@ -66,14 +68,56 @@
  *
  * Purges the translation caches of all processors of the given virtual address
  * range.
+ *
+ * Note:
+ * 	- cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
+ * 	- cpu_vm_mask is converted into a nodemask of the nodes containing the
+ * 	  cpus in cpu_vm_mask.
+ *	- if only one bit is set in cpu_vm_mask & it is the current cpu,
+ *	  then only the local TLB needs to be flushed. This flushing can be done
+ *	  using ptc.l. This is the common case & avoids the global spinlock.
+ *	- if multiple cpus have loaded the context, then flushing has to be
+ *	  done with ptc.g/MMRs under protection of the global ptc_lock.
  */
 
 void
 sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
 {
-	int			cnode, mycnode, nasid, flushed=0;
+	int			i, cnode, mynasid, cpu, lcpu=0, nasid, flushed=0;
 	volatile unsigned	long	*ptc0, *ptc1;
 	unsigned long		flags=0, data0, data1;
+	struct mm_struct	*mm=current->active_mm;
+	short			nasids[NR_NODES], nix;
+	DECLARE_BITMAP(nodes_flushed, NR_NODES);
+
+	CLEAR_BITMAP(nodes_flushed, NR_NODES);
+
+	i = 0;
+
+	for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+		cnode = cpu_to_node(cpu);
+		__set_bit(cnode, nodes_flushed);
+		lcpu = cpu;
+		i++;
+	}
+
+	preempt_disable();
+
+	if (likely(i = 1 && lcpu = smp_processor_id())) {
+		do {
+			ia64_ptcl(start, nbits<<2);
+			start += (1UL << nbits);
+		} while (start < end);
+		ia64_srlz_i();
+		preempt_enable();
+		return;
+	}
+
+	nix = 0;
+	for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; 
+			cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode))
+		nasids[nix++] = cnodeid_to_nasid(cnode);
+
 
 	data0 = (1UL<<SH_PTC_0_A_SHFT) |
 		(nbits<<SH_PTC_0_PS_SHFT) |
@@ -83,20 +127,19 @@
 	ptc0 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_0);
 	ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1);
 
-	mycnode = numa_node_id();
+
+	mynasid = smp_physical_node_id();
 
 	spin_lock_irqsave(&sn2_global_ptc_lock, flags);
 
 	do {
 		data1 = start | (1UL<<SH_PTC_1_START_SHFT);
-		for (cnode = 0; cnode < numnodes; cnode++) {
-			if (is_headless_node(cnode))
-				continue;
-			if (cnode = mycnode) {
+		for (i=0; i<nix; i++) {
+			nasid = nasids[i];
+			if (likely(nasid = mynasid)) {
 				ia64_ptcga(start, nbits<<2);
 				ia64_srlz_i();
 			} else {
-				nasid = cnodeid_to_nasid(cnode);
 				ptc0 = CHANGE_NASID(nasid, ptc0);
 				ptc1 = CHANGE_NASID(nasid, ptc1);
 				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
@@ -114,6 +157,7 @@
 
 	spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
 
+	preempt_enable();
 }
 
 /*
diff -Naur linux_base/include/asm-ia64/mmu_context.h linux/include/asm-ia64/mmu_context.h
--- linux_base/include/asm-ia64/mmu_context.h	Thu Jan 29 18:15:14 2004
+++ linux/include/asm-ia64/mmu_context.h	Sun Feb  1 12:38:13 2004
@@ -106,6 +106,7 @@
 		/* re-check, now that we've got the lock: */
 		context = mm->context;
 		if (context = 0) {
+			cpus_clear(mm->cpu_vm_mask);
 			if (ia64_ctx.next >= ia64_ctx.limit)
 				wrap_mmu_context(mm);
 			mm->context = context = ia64_ctx.next++;
@@ -170,6 +171,8 @@
 	do {
 		context = get_mmu_context(mm);
 		MMU_TRACE('A', smp_processor_id(), mm, context);
+		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
+			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
 		reload_context(context);
 		MMU_TRACE('a', smp_processor_id(), mm, context);
 		/* in the unlikely event of a TLB-flush by another thread, redo the load: */
-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2004-02-05 21:12 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-01-28 20:59 [PATCH] - Improve SN2 TLB flushing algorithms Jack Steiner
2004-01-28 21:17 ` Christoph Hellwig
2004-01-28 22:36 ` Jack Steiner
2004-01-28 23:57 ` Peter Chubb
2004-01-29  0:38 ` David Mosberger
2004-01-29  1:13 ` Jack Steiner
2004-01-29  3:11 ` Matthew Wilcox
2004-01-29  4:00 ` Jack Steiner
2004-01-29 13:40 ` Christoph Hellwig
2004-01-29 17:07 ` Jesse Barnes
2004-01-29 22:56 ` Jack Steiner
2004-01-29 23:09 ` Jesse Barnes
2004-01-30  2:22 ` Jack Steiner
2004-02-05 21:12 ` Jack Steiner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox