linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Nicholas Piggin <npiggin@gmail.com>
To: linuxppc-dev@lists.ozlabs.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>,
	Christophe Leroy <christophe.leroy@c-s.fr>
Subject: [PATCH 04/10] powerpc/mm/slice: pass pointers to struct slice_mask where possible
Date: Tue,  6 Mar 2018 23:25:01 +1000	[thread overview]
Message-ID: <20180306132507.10649-5-npiggin@gmail.com> (raw)
In-Reply-To: <20180306132507.10649-1-npiggin@gmail.com>

Pass around const pointers to struct slice_mask where possible, rather
than copies of slice_mask, to reduce stack and call overhead.

checkstack.pl gives, before:
0x00000d1c slice_get_unmapped_area [slice.o]:		592
0x00001864 is_hugepage_only_range [slice.o]:		448
0x00000754 slice_find_area_topdown [slice.o]:		400
0x00000484 slice_find_area_bottomup.isra.1 [slice.o]:	272
0x000017b4 slice_set_range_psize [slice.o]:		224
0x00000a4c slice_find_area [slice.o]:			128
0x00000160 slice_check_fit [slice.o]:			112

after:
0x00000ad0 slice_get_unmapped_area [slice.o]:		448
0x00001464 is_hugepage_only_range [slice.o]:		288
0x000006c0 slice_find_area [slice.o]:			144
0x0000016c slice_check_fit [slice.o]:			128
0x00000528 slice_find_area_bottomup.isra.2 [slice.o]:	128
0x000013e4 slice_set_range_psize [slice.o]:		128

This increases vfork+exec+exit performance by 1.5%.

Reduces time to mmap+munmap a 64kB page by 17%.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/slice.c | 87 ++++++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 9625ceb35685..233c42d593dc 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -50,19 +50,21 @@ struct slice_mask {
 #ifdef DEBUG
 int _slice_debug = 1;
 
-static void slice_print_mask(const char *label, struct slice_mask mask)
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
 {
 	if (!_slice_debug)
 		return;
-	pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
-	pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
+	pr_devel("%s low_slice: %*pbl\n", label,
+			(int)SLICE_NUM_LOW, &mask->low_slices);
+	pr_devel("%s high_slice: %*pbl\n", label,
+			(int)SLICE_NUM_HIGH, mask->high_slices);
 }
 
 #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
 
 #else
 
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
 #define slice_dbg(fmt...)
 
 #endif
@@ -147,7 +149,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
 			__set_bit(i, ret->high_slices);
 }
 
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
+static void slice_mask_for_size(struct mm_struct *mm, int psize,
+				struct slice_mask *ret,
 				unsigned long high_limit)
 {
 	unsigned char *hpsizes, *lpsizes;
@@ -179,7 +182,8 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
 }
 
 static int slice_check_fit(struct mm_struct *mm,
-			   struct slice_mask mask, struct slice_mask available)
+			   const struct slice_mask *mask,
+			   const struct slice_mask *available)
 {
 	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
 	/*
@@ -189,14 +193,14 @@ static int slice_check_fit(struct mm_struct *mm,
 	unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
 
 	if (!SLICE_NUM_HIGH)
-		return (mask.low_slices & available.low_slices) ==
-		       mask.low_slices;
+		return (mask->low_slices & available->low_slices) ==
+		       mask->low_slices;
 
-	bitmap_and(result, mask.high_slices,
-		   available.high_slices, slice_count);
+	bitmap_and(result, mask->high_slices,
+		   available->high_slices, slice_count);
 
-	return (mask.low_slices & available.low_slices) == mask.low_slices &&
-		bitmap_equal(result, mask.high_slices, slice_count);
+	return (mask->low_slices & available->low_slices) == mask->low_slices &&
+		bitmap_equal(result, mask->high_slices, slice_count);
 }
 
 static void slice_flush_segments(void *parm)
@@ -216,7 +220,8 @@ static void slice_flush_segments(void *parm)
 #endif
 }
 
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+static void slice_convert(struct mm_struct *mm,
+				const struct slice_mask *mask, int psize)
 {
 	int index, mask_index;
 	/* Write the new slice psize bits */
@@ -233,7 +238,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 
 	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++) {
-		if (!(mask.low_slices & (1u << i)))
+		if (!(mask->low_slices & (1u << i)))
 			continue;
 
 		mask_index = i & 0x1;
@@ -244,7 +249,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 
 	hpsizes = mm->context.high_slices_psize;
 	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
-		if (!test_bit(i, mask.high_slices))
+		if (!test_bit(i, mask->high_slices))
 			continue;
 
 		mask_index = i & 0x1;
@@ -270,26 +275,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
  * 'available' slice_mark.
  */
 static bool slice_scan_available(unsigned long addr,
-				 struct slice_mask available,
-				 int end,
-				 unsigned long *boundary_addr)
+				 const struct slice_mask *available,
+				 int end, unsigned long *boundary_addr)
 {
 	unsigned long slice;
 	if (addr < SLICE_LOW_TOP) {
 		slice = GET_LOW_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
-		return !!(available.low_slices & (1u << slice));
+		return !!(available->low_slices & (1u << slice));
 	} else {
 		slice = GET_HIGH_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) ?
 			((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-		return !!test_bit(slice, available.high_slices);
+		return !!test_bit(slice, available->high_slices);
 	}
 }
 
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 					      unsigned long len,
-					      struct slice_mask available,
+					      const struct slice_mask *available,
 					      int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -335,7 +339,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 					     unsigned long len,
-					     struct slice_mask available,
+					     const struct slice_mask *available,
 					     int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -393,7 +397,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 
 
 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
-				     struct slice_mask mask, int psize,
+				     const struct slice_mask *mask, int psize,
 				     int topdown, unsigned long high_limit)
 {
 	if (topdown)
@@ -402,7 +406,8 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 		return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
 }
 
-static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_or_mask(struct slice_mask *dst,
+					const struct slice_mask *src)
 {
 	dst->low_slices |= src->low_slices;
 	if (!SLICE_NUM_HIGH)
@@ -411,7 +416,8 @@ static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
 		  SLICE_NUM_HIGH);
 }
 
-static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_andnot_mask(struct slice_mask *dst,
+					const struct slice_mask *src)
 {
 	dst->low_slices &= ~src->low_slices;
 
@@ -501,7 +507,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * already
 	 */
 	slice_mask_for_size(mm, psize, &good_mask, high_limit);
-	slice_print_mask(" good_mask", good_mask);
+	slice_print_mask(" good_mask", &good_mask);
 
 	/*
 	 * Here "good" means slices that are already the right page size,
@@ -535,12 +541,12 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (addr != 0 || fixed) {
 		/* Build a mask for the requested range */
 		slice_range_to_mask(addr, len, &mask);
-		slice_print_mask(" mask", mask);
+		slice_print_mask(" mask", &mask);
 
 		/* Check if we fit in the good mask. If we do, we just return,
 		 * nothing else to do
 		 */
-		if (slice_check_fit(mm, mask, good_mask)) {
+		if (slice_check_fit(mm, &mask, &good_mask)) {
 			slice_dbg(" fits good !\n");
 			return addr;
 		}
@@ -548,7 +554,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		/* Now let's see if we can find something in the existing
 		 * slices for that size
 		 */
-		newaddr = slice_find_area(mm, len, good_mask,
+		newaddr = slice_find_area(mm, len, &good_mask,
 					  psize, topdown, high_limit);
 		if (newaddr != -ENOMEM) {
 			/* Found within the good mask, we don't have to setup,
@@ -564,9 +570,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 */
 	slice_mask_for_free(mm, &potential_mask, high_limit);
 	slice_or_mask(&potential_mask, &good_mask);
-	slice_print_mask(" potential", potential_mask);
+	slice_print_mask(" potential", &potential_mask);
 
-	if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
+	if ((addr != 0 || fixed) &&
+			slice_check_fit(mm, &mask, &potential_mask)) {
 		slice_dbg(" fits potential !\n");
 		goto convert;
 	}
@@ -581,7 +588,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * anywhere in the good area.
 	 */
 	if (addr) {
-		addr = slice_find_area(mm, len, good_mask,
+		addr = slice_find_area(mm, len, &good_mask,
 				       psize, topdown, high_limit);
 		if (addr != -ENOMEM) {
 			slice_dbg(" found area at 0x%lx\n", addr);
@@ -592,14 +599,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* Now let's see if we can find something in the existing slices
 	 * for that size plus free slices
 	 */
-	addr = slice_find_area(mm, len, potential_mask,
+	addr = slice_find_area(mm, len, &potential_mask,
 			       psize, topdown, high_limit);
 
 #ifdef CONFIG_PPC_64K_PAGES
 	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
 		slice_or_mask(&potential_mask, &compat_mask);
-		addr = slice_find_area(mm, len, potential_mask,
+		addr = slice_find_area(mm, len, &potential_mask,
 				       psize, topdown, high_limit);
 	}
 #endif
@@ -609,7 +616,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 
 	slice_range_to_mask(addr, len, &mask);
 	slice_dbg(" found potential area at 0x%lx\n", addr);
-	slice_print_mask(" mask", mask);
+	slice_print_mask(" mask", &mask);
 
  convert:
 	slice_andnot_mask(&mask, &good_mask);
@@ -617,7 +624,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (mask.low_slices ||
 	    (SLICE_NUM_HIGH &&
 	     !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH))) {
-		slice_convert(mm, mask, psize);
+		slice_convert(mm, &mask, psize);
 		if (psize > MMU_PAGE_BASE)
 			on_each_cpu(slice_flush_segments, mm, 1);
 	}
@@ -706,7 +713,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 	VM_BUG_ON(radix_enabled());
 
 	slice_range_to_mask(start, len, &mask);
-	slice_convert(mm, mask, psize);
+	slice_convert(mm, &mask, psize);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -753,9 +760,9 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 #if 0 /* too verbose */
 	slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
 		 mm, addr, len);
-	slice_print_mask(" mask", mask);
-	slice_print_mask(" available", available);
+	slice_print_mask(" mask", &mask);
+	slice_print_mask(" available", &available);
 #endif
-	return !slice_check_fit(mm, mask, available);
+	return !slice_check_fit(mm, &mask, &available);
 }
 #endif
-- 
2.16.1

  parent reply	other threads:[~2018-03-06 13:25 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-06 13:24 [PATCH 00/10] powerpc/mm/slice: improve slice speed and stack use Nicholas Piggin
2018-03-06 13:24 ` [PATCH 01/10] selftests/powerpc: add process creation benchmark Nicholas Piggin
2018-03-19 22:23   ` [01/10] " Michael Ellerman
2018-03-20 10:15   ` Michael Ellerman
2018-03-06 13:24 ` [PATCH 02/10] powerpc/mm/slice: Simplify and optimise slice context initialisation Nicholas Piggin
2018-03-06 14:32   ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 03/10] powerpc/mm/slice: tidy lpsizes and hpsizes update loops Nicholas Piggin
2018-03-06 13:25 ` Nicholas Piggin [this message]
2018-03-06 13:43   ` [PATCH 04/10] powerpc/mm/slice: pass pointers to struct slice_mask where possible Christophe LEROY
2018-03-06 13:59     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 05/10] powerpc/mm/slice: implement a slice mask cache Nicholas Piggin
2018-03-06 13:49   ` Christophe LEROY
2018-03-06 14:01     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 06/10] powerpc/mm/slice: implement slice_check_range_fits Nicholas Piggin
2018-03-06 14:41   ` Christophe LEROY
2018-03-06 23:12     ` Nicholas Piggin
2018-03-07  6:12       ` Christophe LEROY
2018-03-07  7:16         ` Nicholas Piggin
2018-03-07 13:38           ` Christophe LEROY
2018-03-06 13:25 ` [PATCH 07/10] powerpc/mm/slice: Switch to 3-operand slice bitops helpers Nicholas Piggin
2018-03-06 14:44   ` Christophe LEROY
2018-03-06 23:19     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 08/10] powerpc/mm/slice: Use const pointers to cached slice masks where possible Nicholas Piggin
2018-03-06 14:55   ` Christophe LEROY
2018-03-06 23:33     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 09/10] powerpc/mm/slice: use the dynamic high slice size to limit bitmap operations Nicholas Piggin
2018-03-06 15:02   ` Christophe LEROY
2018-03-06 23:32     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 10/10] powerpc/mm/slice: remove radix calls to the slice code Nicholas Piggin
2018-03-06 15:12   ` Christophe LEROY
2018-03-06 23:35     ` Nicholas Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180306132507.10649-5-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=christophe.leroy@c-s.fr \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).