linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Alex Shi <alex.shi@intel.com>
To: rob@landley.net, tglx@linutronix.de, mingo@redhat.com,
	hpa@zytor.com, arnd@arndb.de, rostedt@goodmis.org,
	fweisbec@gmail.com
Cc: jeremy@goop.org, gregkh@linuxfoundation.org,
	borislav.petkov@amd.com, alex.shi@intel.com, riel@redhat.com,
	luto@mit.edu, avi@redhat.com, len.brown@intel.com,
	dhowells@redhat.com, fenghua.yu@intel.com, ak@linux.intel.com,
	cpw@sgi.com, steiner@sgi.com, akpm@linux-foundation.org,
	penberg@kernel.org, hughd@google.com, rientjes@google.com,
	kosaki.motohiro@jp.fujitsu.com, n-horiguchi@ah.jp.nec.com,
	paul.gortmaker@windriver.com, trenn@suse.de, tj@kernel.org,
	oleg@redhat.com, axboe@kernel.dk, a.p.zijlstra@chello.nl,
	kamezawa.hiroyu@jp.fujitsu.com, viro@zeniv.linux.org.uk,
	linux-kernel@vger.kernel.org
Subject: [PATCH v4 6/7] x86/tlb: optimizing flush_tlb_mm
Date: Thu, 10 May 2012 13:00:12 +0800	[thread overview]
Message-ID: <1336626013-28413-7-git-send-email-alex.shi@intel.com> (raw)
In-Reply-To: <1336626013-28413-1-git-send-email-alex.shi@intel.com>

Not every flush_tlb_mm execution moment is really need to evacuate all
TLB entries, like in munmap, just few 'invlpg' is better for whole
process performance, since it leaves most of TLB entries for later
accessing.

This patch is changing flush_tlb_mm(mm) to flush_tlb_mm(mm, start, end)
in cases.

The performance balance points checking is left in __flush_tlb_range()

Signed-off-by: Alex Shi <alex.shi@intel.com>
---
 arch/x86/include/asm/tlb.h      |    2 +-
 arch/x86/include/asm/tlbflush.h |    5 ++-
 arch/x86/mm/pgtable.c           |    2 +-
 arch/x86/mm/tlb.c               |   64 ++++++++++++++++++--------------------
 fs/proc/task_mmu.c              |    2 +-
 include/asm-generic/tlb.h       |    4 +-
 include/asm-generic/tlbflush.h  |    3 +-
 kernel/fork.c                   |    2 +-
 mm/memory.c                     |    9 +++--
 9 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 829215f..505fdfe 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -4,7 +4,7 @@
 #define tlb_start_vma(tlb, vma) do { } while (0)
 #define tlb_end_vma(tlb, vma) do { } while (0)
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+#define tlb_flush(tlb, start, end) flush_tlb_mm((tlb)->mm, start, end)
 
 #include <asm-generic/tlb.h>
 
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index c39c94e..1d07cf1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -85,7 +85,8 @@ static inline void __flush_tlb_one(unsigned long addr)
 #define flush_tlb_all() __flush_tlb_all()
 #define local_flush_tlb() __flush_tlb()
 
-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void flush_tlb_mm(struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	if (mm == current->active_mm)
 		__flush_tlb();
@@ -124,7 +125,7 @@ static inline void reset_lazy_tlbstate(void)
 
 extern void flush_tlb_all(void);
 extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_mm(struct mm_struct *, unsigned long, unsigned long);
 extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
 extern void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8573b83..5aea5b0 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -168,7 +168,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 	 * section 8.1: in PAE mode we explicitly have to flush the
 	 * TLB via cr3 if the top-level pgd is changed...
 	 */
-	flush_tlb_mm(mm);
+	flush_tlb_mm(mm, 0UL, -1UL);
 }
 #else  /* !CONFIG_X86_PAE */
 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 4d8fb09..f32dc1e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -298,22 +298,6 @@ void flush_tlb_current_task(void)
 	preempt_enable();
 }
 
-void flush_tlb_mm(struct mm_struct *mm)
-{
-	preempt_disable();
-
-	if (current->active_mm == mm) {
-		if (current->mm)
-			local_flush_tlb();
-		else
-			leave_mm(smp_processor_id());
-	}
-	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
-
-	preempt_enable();
-}
-
 static inline int has_large_page(struct mm_struct *mm,
 				 unsigned long start, unsigned long end)
 {
@@ -336,39 +320,32 @@ static inline int has_large_page(struct mm_struct *mm,
 	return 0;
 }
 
-void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
+void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
+				unsigned long end, unsigned long vmflag)
 {
-	struct mm_struct *mm;
-
-	if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB
-			|| tlb_flushall_factor == (u16)TLB_FLUSH_ALL) {
-flush_all:
-		flush_tlb_mm(vma->vm_mm);
-		return;
-	}
-
 	preempt_disable();
-	mm = vma->vm_mm;
 	if (current->active_mm == mm) {
 		if (current->mm) {
-			unsigned long addr, vmflag = vma->vm_flags;
-			unsigned act_entries, tlb_entries = 0;
+			unsigned long addr;
+			unsigned long act_entries, tlb_entries = 0;
 
+			if (end == TLB_FLUSH_ALL ||
+				tlb_flushall_factor == (u16)TLB_FLUSH_ALL) {
+				local_flush_tlb();
+				goto flush_all;
+			}
 			if (vmflag & VM_EXEC)
 				tlb_entries = tlb_lli_4k[ENTRIES];
 			else
 				tlb_entries = tlb_lld_4k[ENTRIES];
-
-			act_entries = tlb_entries > mm->total_vm ?
-					mm->total_vm : tlb_entries;
+			act_entries = min(mm->total_vm, tlb_entries);
 
 			if ((end - start) >> PAGE_SHIFT >
 					act_entries >> tlb_flushall_factor)
 				local_flush_tlb();
 			else {
 				if (has_large_page(mm, start, end)) {
-					preempt_enable();
+					local_flush_tlb();
 					goto flush_all;
 				}
 				for (addr = start; addr <= end;
@@ -386,11 +363,30 @@ flush_all:
 			leave_mm(smp_processor_id());
 		}
 	}
+
+flush_all:
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
 	preempt_enable();
 }
 
+void flush_tlb_mm(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	__flush_tlb_range(mm, start, end, 0UL);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long vmflag = vma->vm_flags;
+
+	if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB)
+		__flush_tlb_range(mm, 0UL, TLB_FLUSH_ALL, 0);
+	else
+		__flush_tlb_range(mm, start, end, vmflag);
+}
+
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2d60492..5728c8f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -660,7 +660,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 			walk_page_range(vma->vm_start, vma->vm_end,
 					&clear_refs_walk);
 		}
-		flush_tlb_mm(mm);
+		flush_tlb_mm(mm, 0UL, -1UL);
 		up_read(&mm->mmap_sem);
 		mmput(mm);
 	}
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index f96a5b5..24e205d 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,7 +112,7 @@ static inline int tlb_fast_mode(struct mmu_gather *tlb)
 }
 
 void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
-void tlb_flush_mmu(struct mmu_gather *tlb);
+void tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end);
 int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
 
@@ -123,7 +123,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	if (!__tlb_remove_page(tlb, page))
-		tlb_flush_mmu(tlb);
+		tlb_flush_mmu(tlb, 0UL, -1UL);
 }
 
 /**
diff --git a/include/asm-generic/tlbflush.h b/include/asm-generic/tlbflush.h
index d6d0a88..db1d4bb 100644
--- a/include/asm-generic/tlbflush.h
+++ b/include/asm-generic/tlbflush.h
@@ -11,7 +11,8 @@
 
 #include <linux/bug.h>
 
-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void flush_tlb_mm(struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	BUG();
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index b9372a0..a4f0c64 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -427,7 +427,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	retval = 0;
 out:
 	up_write(&mm->mmap_sem);
-	flush_tlb_mm(oldmm);
+	flush_tlb_mm(oldmm, 0UL, -1UL);
 	up_write(&oldmm->mmap_sem);
 	return retval;
 fail_nomem_anon_vma_fork:
diff --git a/mm/memory.c b/mm/memory.c
index 6105f47..05e2c2e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -218,14 +218,15 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
 #endif
 }
 
-void tlb_flush_mmu(struct mmu_gather *tlb)
+void tlb_flush_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	struct mmu_gather_batch *batch;
 
 	if (!tlb->need_flush)
 		return;
 	tlb->need_flush = 0;
-	tlb_flush(tlb);
+	tlb_flush(tlb, start, end);
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
 #endif
@@ -248,7 +249,7 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
 {
 	struct mmu_gather_batch *batch, *next;
 
-	tlb_flush_mmu(tlb);
+	tlb_flush_mmu(tlb, start, end);
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
@@ -1204,7 +1205,7 @@ again:
 	 */
 	if (force_flush) {
 		force_flush = 0;
-		tlb_flush_mmu(tlb);
+		tlb_flush_mmu(tlb, addr, end);
 		if (addr != end)
 			goto again;
 	}
-- 
1.7.5.4


  parent reply	other threads:[~2012-05-10  5:03 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-10  5:00 [PATCH v4 0/7] tlb flush optimization Alex Shi
2012-05-10  5:00 ` [PATCH v4 1/7] x86/tlb: unify TLB_FLUSH_ALL definition Alex Shi
2012-05-10 18:46   ` Rob Landley
2012-05-11 18:33     ` H. Peter Anvin
2012-05-10  5:00 ` [PATCH v4 2/7] x86/tlb_info: get last level TLB entry number of CPU Alex Shi
2012-05-10 14:43   ` Borislav Petkov
2012-05-11  0:33     ` Alex Shi
2012-05-10 15:58   ` Borislav Petkov
2012-05-11  0:38     ` Alex Shi
2012-05-10  5:00 ` [PATCH v4 3/7] x86/flush_tlb: try flush_tlb_single one by one in flush_tlb_range Alex Shi
2012-05-10  7:53   ` Borislav Petkov
2012-05-10  8:50     ` Alex Shi
2012-05-10 21:42       ` Rob Landley
2012-05-10  8:42   ` Borislav Petkov
2012-05-10  9:04     ` Alex Shi
2012-05-12  8:01       ` Alex Shi
2012-05-13 11:13         ` Borislav Petkov
2012-05-15  1:06           ` Alex Shi
2012-05-15 10:33             ` Borislav Petkov
2012-05-15 11:16               ` Peter Zijlstra
2012-05-15 11:56                 ` Borislav Petkov
2012-05-15 12:00                   ` Peter Zijlstra
2012-05-15 13:58                     ` Alex Shi
2012-05-10  5:00 ` [PATCH v4 4/7] x86/tlb: fall back to flush all when meet a THP large page Alex Shi
2012-05-10  9:29   ` Peter Zijlstra
2012-05-10 10:40     ` Borislav Petkov
2012-05-11  0:44       ` Alex Shi
2012-05-11  9:03         ` Peter Zijlstra
2012-05-11 16:28   ` Andrea Arcangeli
2012-05-12  7:58     ` Alex Shi
2012-05-10  5:00 ` [PATCH v4 5/7] x86/tlb: add tlb flush all factor for specific CPU Alex Shi
2012-05-10  9:35   ` Peter Zijlstra
2012-05-11  0:47     ` Alex Shi
2012-05-10  9:37   ` Peter Zijlstra
2012-05-11  0:48     ` Alex Shi
2012-05-10  9:38   ` Peter Zijlstra
2012-05-10 10:42     ` Borislav Petkov
2012-05-11  0:50       ` Alex Shi
2012-05-11  0:49     ` Alex Shi
2012-05-11  9:04       ` Peter Zijlstra
2012-05-11  9:04         ` Peter Zijlstra
2012-05-11 12:51           ` Alex Shi
2012-05-10  5:00 ` Alex Shi [this message]
2012-05-10  8:27   ` [PATCH v4 6/7] x86/tlb: optimizing flush_tlb_mm Peter Zijlstra
2012-05-10  5:00 ` [PATCH v4 7/7] x86/tlb: add tlb_flushall_factor into sysfs for user testing/tuning Alex Shi
2012-05-10  8:27   ` Borislav Petkov
2012-05-11  0:52     ` Alex Shi
2012-05-11  9:51       ` Borislav Petkov
2012-05-11 12:53         ` Alex Shi
2012-05-10 15:13   ` Greg KH
2012-05-11  0:59     ` Alex Shi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1336626013-28413-7-git-send-email-alex.shi@intel.com \
    --to=alex.shi@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=avi@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=borislav.petkov@amd.com \
    --cc=cpw@sgi.com \
    --cc=dhowells@redhat.com \
    --cc=fenghua.yu@intel.com \
    --cc=fweisbec@gmail.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jeremy@goop.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=len.brown@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@mit.edu \
    --cc=mingo@redhat.com \
    --cc=n-horiguchi@ah.jp.nec.com \
    --cc=oleg@redhat.com \
    --cc=paul.gortmaker@windriver.com \
    --cc=penberg@kernel.org \
    --cc=riel@redhat.com \
    --cc=rientjes@google.com \
    --cc=rob@landley.net \
    --cc=rostedt@goodmis.org \
    --cc=steiner@sgi.com \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=trenn@suse.de \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).