All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nicholas Piggin <npiggin@gmail.com>
To: linuxppc-dev@lists.ozlabs.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>,
	Christophe Leroy <christophe.leroy@c-s.fr>
Subject: [PATCH 08/10] powerpc/mm/slice: Use const pointers to cached slice masks where possible
Date: Tue,  6 Mar 2018 23:25:05 +1000	[thread overview]
Message-ID: <20180306132507.10649-9-npiggin@gmail.com> (raw)
In-Reply-To: <20180306132507.10649-1-npiggin@gmail.com>

The slice_mask cache was a basic conversion which copied the slice
mask into caller's structures, because that's how the original code
worked. In most cases the pointer can be used directly instead, saving
a copy and an on-stack structure.

On POWER8, this increases vfork+exec+exit performance by 0.3%
and reduces time to mmap+munmap a 64kB page by 2%.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/slice.c | 77 +++++++++++++++++++++----------------------------
 1 file changed, 33 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 46daa1d1794f..086c31b8b982 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -472,10 +472,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 				      unsigned long flags, unsigned int psize,
 				      int topdown)
 {
-	struct slice_mask mask;
 	struct slice_mask good_mask;
 	struct slice_mask potential_mask;
-	struct slice_mask compat_mask;
+	const struct slice_mask *maskp;
+	const struct slice_mask *compat_maskp = NULL;
 	int fixed = (flags & MAP_FIXED);
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 	unsigned long page_size = 1UL << pshift;
@@ -509,22 +509,6 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		on_each_cpu(slice_flush_segments, mm, 1);
 	}
 
-	/*
-	 * init different masks
-	 */
-	mask.low_slices = 0;
-
-	/* silence stupid warning */;
-	potential_mask.low_slices = 0;
-
-	compat_mask.low_slices = 0;
-
-	if (SLICE_NUM_HIGH) {
-		bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
-		bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
-		bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
-	}
-
 	/* Sanity checks */
 	BUG_ON(mm->task_size == 0);
 	BUG_ON(mm->context.slb_addr_limit == 0);
@@ -547,8 +531,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* First make up a "good" mask of slices that have the right size
 	 * already
 	 */
-	good_mask = *slice_mask_for_size(mm, psize);
-	slice_print_mask(" good_mask", &good_mask);
+	maskp = slice_mask_for_size(mm, psize);
 
 	/*
 	 * Here "good" means slices that are already the right page size,
@@ -572,11 +555,19 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 #ifdef CONFIG_PPC_64K_PAGES
 	/* If we support combo pages, we can allow 64k pages in 4k slices */
 	if (psize == MMU_PAGE_64K) {
-		compat_mask = *slice_mask_for_size(mm, MMU_PAGE_4K);
+		compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
 		if (fixed)
-			slice_or_mask(&good_mask, &good_mask, &compat_mask);
-	}
+			slice_or_mask(&good_mask, maskp, compat_maskp);
+		else
+			slice_copy_mask(&good_mask, maskp);
+	} else
 #endif
+	{
+		slice_copy_mask(&good_mask, maskp);
+	}
+	slice_print_mask(" good_mask", &good_mask);
+	if (compat_maskp)
+		slice_print_mask(" compat_mask", compat_maskp);
 
 	/* First check hint if it's valid or if we have MAP_FIXED */
 	if (addr || fixed) {
@@ -643,7 +634,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 #ifdef CONFIG_PPC_64K_PAGES
 	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
-		slice_or_mask(&potential_mask, &potential_mask, &compat_mask);
+		slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
 		addr = slice_find_area(mm, len, &potential_mask,
 				       psize, topdown, high_limit);
 	}
@@ -652,17 +643,18 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (addr == -ENOMEM)
 		return -ENOMEM;
 
-	slice_range_to_mask(addr, len, &mask);
+	slice_range_to_mask(addr, len, &potential_mask);
 	slice_dbg(" found potential area at 0x%lx\n", addr);
-	slice_print_mask(" mask", &mask);
+	slice_print_mask(" mask", &potential_mask);
 
  convert:
-	slice_andnot_mask(&mask, &mask, &good_mask);
-	slice_andnot_mask(&mask, &mask, &compat_mask);
-	if (mask.low_slices ||
-	    (SLICE_NUM_HIGH &&
-	     !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH))) {
-		slice_convert(mm, &mask, psize);
+	slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
+	if (compat_maskp && !fixed)
+		slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
+	if (potential_mask.low_slices ||
+		(SLICE_NUM_HIGH &&
+		 !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
+		slice_convert(mm, &potential_mask, psize);
 		if (psize > MMU_PAGE_BASE)
 			on_each_cpu(slice_flush_segments, mm, 1);
 	}
@@ -786,28 +778,25 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 			   unsigned long len)
 {
-	struct slice_mask available;
+	const struct slice_mask *maskp;
 	unsigned int psize = mm->context.user_psize;
 
 	if (radix_enabled())
 		return 0;
 
-	available = *slice_mask_for_size(mm, psize);
+	maskp = slice_mask_for_size(mm, psize);
 #ifdef CONFIG_PPC_64K_PAGES
 	/* We need to account for 4k slices too */
 	if (psize == MMU_PAGE_64K) {
-		struct slice_mask compat_mask;
-		compat_mask = *slice_mask_for_size(mm, MMU_PAGE_4K);
-		slice_or_mask(&available, &available, &compat_mask);
+		const struct slice_mask *compat_maskp;
+		struct slice_mask available;
+
+		compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+		slice_or_mask(&available, maskp, compat_maskp);
+		return !slice_check_range_fits(mm, &available, addr, len);
 	}
 #endif
 
-#if 0 /* too verbose */
-	slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
-		 mm, addr, len);
-	slice_print_mask(" mask", &mask);
-	slice_print_mask(" available", &available);
-#endif
-	return !slice_check_range_fits(mm, &available, addr, len);
+	return !slice_check_range_fits(mm, maskp, addr, len);
 }
 #endif
-- 
2.16.1

  parent reply	other threads:[~2018-03-06 13:25 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-06 13:24 [PATCH 00/10] powerpc/mm/slice: improve slice speed and stack use Nicholas Piggin
2018-03-06 13:24 ` [PATCH 01/10] selftests/powerpc: add process creation benchmark Nicholas Piggin
2018-03-19 22:23   ` [01/10] " Michael Ellerman
2018-03-20 10:15   ` Michael Ellerman
2018-03-06 13:24 ` [PATCH 02/10] powerpc/mm/slice: Simplify and optimise slice context initialisation Nicholas Piggin
2018-03-06 14:32   ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 03/10] powerpc/mm/slice: tidy lpsizes and hpsizes update loops Nicholas Piggin
2018-03-06 13:25 ` [PATCH 04/10] powerpc/mm/slice: pass pointers to struct slice_mask where possible Nicholas Piggin
2018-03-06 13:43   ` Christophe LEROY
2018-03-06 13:59     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 05/10] powerpc/mm/slice: implement a slice mask cache Nicholas Piggin
2018-03-06 13:49   ` Christophe LEROY
2018-03-06 14:01     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 06/10] powerpc/mm/slice: implement slice_check_range_fits Nicholas Piggin
2018-03-06 14:41   ` Christophe LEROY
2018-03-06 23:12     ` Nicholas Piggin
2018-03-07  6:12       ` Christophe LEROY
2018-03-07  7:16         ` Nicholas Piggin
2018-03-07 13:38           ` Christophe LEROY
2018-03-06 13:25 ` [PATCH 07/10] powerpc/mm/slice: Switch to 3-operand slice bitops helpers Nicholas Piggin
2018-03-06 14:44   ` Christophe LEROY
2018-03-06 23:19     ` Nicholas Piggin
2018-03-06 13:25 ` Nicholas Piggin [this message]
2018-03-06 14:55   ` [PATCH 08/10] powerpc/mm/slice: Use const pointers to cached slice masks where possible Christophe LEROY
2018-03-06 23:33     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 09/10] powerpc/mm/slice: use the dynamic high slice size to limit bitmap operations Nicholas Piggin
2018-03-06 15:02   ` Christophe LEROY
2018-03-06 23:32     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 10/10] powerpc/mm/slice: remove radix calls to the slice code Nicholas Piggin
2018-03-06 15:12   ` Christophe LEROY
2018-03-06 23:35     ` Nicholas Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180306132507.10649-9-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=christophe.leroy@c-s.fr \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.