From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jack Steiner Date: Thu, 29 Jan 2004 22:56:00 +0000 Subject: Re: [PATCH] - Improve SN2 TLB flushing algorithms Message-Id: <20040129225600.GA16786@sgi.com> List-Id: References: <20040128205912.GA27401@sgi.com> In-Reply-To: <20040128205912.GA27401@sgi.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: linux-ia64@vger.kernel.org Here is the next iteration of the patch. I also posted a change to LKML to move cpu_vm_mask next to context. --- linux.base/./include/asm-ia64/mmu_context.h Fri Jan 9 00:59:09 2004 +++ linux/./include/asm-ia64/mmu_context.h Thu Jan 29 13:08:11 2004 @@ -21,6 +21,7 @@ # ifndef __ASSEMBLY__ +#include #include #include #include @@ -106,6 +107,7 @@ /* re-check, now that we've got the lock: */ context = mm->context; if (context = 0) { + cpus_clear(mm->cpu_vm_mask); if (ia64_ctx.next >= ia64_ctx.limit) wrap_mmu_context(mm); mm->context = context = ia64_ctx.next++; @@ -170,6 +172,8 @@ do { context = get_mmu_context(mm); MMU_TRACE('A', smp_processor_id(), mm, context); + if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) + cpu_set(smp_processor_id(), mm->cpu_vm_mask); reload_context(context); MMU_TRACE('a', smp_processor_id(), mm, context); /* in the unlikely event of a TLB-flush by another thread, redo the load: */ --- linux.base/./arch/ia64/sn/kernel/sn2/sn2_smp.c Mon Jan 26 17:06:03 2004 +++ linux/./arch/ia64/sn/kernel/sn2/sn2_smp.c Thu Jan 29 16:50:51 2004 @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. */ #include @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -66,14 +68,56 @@ * * Purges the translation caches of all processors of the given virtual address * range. + * + * Note: + * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. + * - cpu_vm_mask is converted into a nodemask of the nodes containing the + * cpus in cpu_vm_mask. + * - if only one bit is set in cpu_vm_mask & it is the current cpu, + * then only the local TLB needs to be flushed. This flushing can be done + * using ptc.l. This is the common case & avoids the global spinlock. + * - if multiple cpus have loaded the context, then flushing has to be + * done with ptc.g/MMRs under protection of the global ptc_lock. */ void sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) { - int cnode, mycnode, nasid, flushed=0; + int i, cnode, mynasid, cpu, lcpu=0, nasid, flushed=0; volatile unsigned long *ptc0, *ptc1; unsigned long flags=0, data0, data1; + struct mm_struct *mm=current->active_mm; + short nasids[NR_NODES], nix; + DECLARE_BITMAP(nodes_flushed, NR_NODES); + + CLEAR_BITMAP(nodes_flushed, NR_NODES); + + i = 0; + for_each_cpu_mask(cpu, mm->cpu_vm_mask) { + cnode = cpu_to_node(cpu); + __set_bit(cnode, nodes_flushed); + lcpu = cpu; + i++; + printk("cpu %d\n", cpu); + } + + preempt_disable(); + + if (likely(i = 1 && lcpu = smp_processor_id())) { + do { + asm volatile ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory"); + start += (1UL << nbits); + } while (start < end); + ia64_srlz_i(); + preempt_enable(); + return; + } + + nix = 0; + for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; + cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode)) + nasids[nix++] = cnodeid_to_nasid(cnode); + data0 = (1UL<