All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nicholas Piggin <npiggin@gmail.com>
To: linuxppc-dev@lists.ozlabs.org
Cc: Nicholas Piggin <npiggin@gmail.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>,
	Christophe Leroy <christophe.leroy@c-s.fr>
Subject: [PATCH 04/10] powerpc/mm/slice: pass pointers to struct slice_mask where possible
Date: Tue,  6 Mar 2018 23:25:01 +1000	[thread overview]
Message-ID: <20180306132507.10649-5-npiggin@gmail.com> (raw)
In-Reply-To: <20180306132507.10649-1-npiggin@gmail.com>

Pass around const pointers to struct slice_mask where possible, rather
than copies of slice_mask, to reduce stack and call overhead.

checkstack.pl gives, before:
0x00000d1c slice_get_unmapped_area [slice.o]:		592
0x00001864 is_hugepage_only_range [slice.o]:		448
0x00000754 slice_find_area_topdown [slice.o]:		400
0x00000484 slice_find_area_bottomup.isra.1 [slice.o]:	272
0x000017b4 slice_set_range_psize [slice.o]:		224
0x00000a4c slice_find_area [slice.o]:			128
0x00000160 slice_check_fit [slice.o]:			112

after:
0x00000ad0 slice_get_unmapped_area [slice.o]:		448
0x00001464 is_hugepage_only_range [slice.o]:		288
0x000006c0 slice_find_area [slice.o]:			144
0x0000016c slice_check_fit [slice.o]:			128
0x00000528 slice_find_area_bottomup.isra.2 [slice.o]:	128
0x000013e4 slice_set_range_psize [slice.o]:		128

This increases vfork+exec+exit performance by 1.5%.

Reduces time to mmap+munmap a 64kB page by 17%.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/slice.c | 87 ++++++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 9625ceb35685..233c42d593dc 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -50,19 +50,21 @@ struct slice_mask {
 #ifdef DEBUG
 int _slice_debug = 1;
 
-static void slice_print_mask(const char *label, struct slice_mask mask)
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
 {
 	if (!_slice_debug)
 		return;
-	pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
-	pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
+	pr_devel("%s low_slice: %*pbl\n", label,
+			(int)SLICE_NUM_LOW, &mask->low_slices);
+	pr_devel("%s high_slice: %*pbl\n", label,
+			(int)SLICE_NUM_HIGH, mask->high_slices);
 }
 
 #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
 
 #else
 
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
 #define slice_dbg(fmt...)
 
 #endif
@@ -147,7 +149,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
 			__set_bit(i, ret->high_slices);
 }
 
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
+static void slice_mask_for_size(struct mm_struct *mm, int psize,
+				struct slice_mask *ret,
 				unsigned long high_limit)
 {
 	unsigned char *hpsizes, *lpsizes;
@@ -179,7 +182,8 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
 }
 
 static int slice_check_fit(struct mm_struct *mm,
-			   struct slice_mask mask, struct slice_mask available)
+			   const struct slice_mask *mask,
+			   const struct slice_mask *available)
 {
 	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
 	/*
@@ -189,14 +193,14 @@ static int slice_check_fit(struct mm_struct *mm,
 	unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
 
 	if (!SLICE_NUM_HIGH)
-		return (mask.low_slices & available.low_slices) ==
-		       mask.low_slices;
+		return (mask->low_slices & available->low_slices) ==
+		       mask->low_slices;
 
-	bitmap_and(result, mask.high_slices,
-		   available.high_slices, slice_count);
+	bitmap_and(result, mask->high_slices,
+		   available->high_slices, slice_count);
 
-	return (mask.low_slices & available.low_slices) == mask.low_slices &&
-		bitmap_equal(result, mask.high_slices, slice_count);
+	return (mask->low_slices & available->low_slices) == mask->low_slices &&
+		bitmap_equal(result, mask->high_slices, slice_count);
 }
 
 static void slice_flush_segments(void *parm)
@@ -216,7 +220,8 @@ static void slice_flush_segments(void *parm)
 #endif
 }
 
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+static void slice_convert(struct mm_struct *mm,
+				const struct slice_mask *mask, int psize)
 {
 	int index, mask_index;
 	/* Write the new slice psize bits */
@@ -233,7 +238,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 
 	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++) {
-		if (!(mask.low_slices & (1u << i)))
+		if (!(mask->low_slices & (1u << i)))
 			continue;
 
 		mask_index = i & 0x1;
@@ -244,7 +249,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 
 	hpsizes = mm->context.high_slices_psize;
 	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
-		if (!test_bit(i, mask.high_slices))
+		if (!test_bit(i, mask->high_slices))
 			continue;
 
 		mask_index = i & 0x1;
@@ -270,26 +275,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
  * 'available' slice_mark.
  */
 static bool slice_scan_available(unsigned long addr,
-				 struct slice_mask available,
-				 int end,
-				 unsigned long *boundary_addr)
+				 const struct slice_mask *available,
+				 int end, unsigned long *boundary_addr)
 {
 	unsigned long slice;
 	if (addr < SLICE_LOW_TOP) {
 		slice = GET_LOW_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
-		return !!(available.low_slices & (1u << slice));
+		return !!(available->low_slices & (1u << slice));
 	} else {
 		slice = GET_HIGH_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) ?
 			((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-		return !!test_bit(slice, available.high_slices);
+		return !!test_bit(slice, available->high_slices);
 	}
 }
 
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 					      unsigned long len,
-					      struct slice_mask available,
+					      const struct slice_mask *available,
 					      int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -335,7 +339,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 					     unsigned long len,
-					     struct slice_mask available,
+					     const struct slice_mask *available,
 					     int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -393,7 +397,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 
 
 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
-				     struct slice_mask mask, int psize,
+				     const struct slice_mask *mask, int psize,
 				     int topdown, unsigned long high_limit)
 {
 	if (topdown)
@@ -402,7 +406,8 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 		return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
 }
 
-static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_or_mask(struct slice_mask *dst,
+					const struct slice_mask *src)
 {
 	dst->low_slices |= src->low_slices;
 	if (!SLICE_NUM_HIGH)
@@ -411,7 +416,8 @@ static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
 		  SLICE_NUM_HIGH);
 }
 
-static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_andnot_mask(struct slice_mask *dst,
+					const struct slice_mask *src)
 {
 	dst->low_slices &= ~src->low_slices;
 
@@ -501,7 +507,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * already
 	 */
 	slice_mask_for_size(mm, psize, &good_mask, high_limit);
-	slice_print_mask(" good_mask", good_mask);
+	slice_print_mask(" good_mask", &good_mask);
 
 	/*
 	 * Here "good" means slices that are already the right page size,
@@ -535,12 +541,12 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (addr != 0 || fixed) {
 		/* Build a mask for the requested range */
 		slice_range_to_mask(addr, len, &mask);
-		slice_print_mask(" mask", mask);
+		slice_print_mask(" mask", &mask);
 
 		/* Check if we fit in the good mask. If we do, we just return,
 		 * nothing else to do
 		 */
-		if (slice_check_fit(mm, mask, good_mask)) {
+		if (slice_check_fit(mm, &mask, &good_mask)) {
 			slice_dbg(" fits good !\n");
 			return addr;
 		}
@@ -548,7 +554,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		/* Now let's see if we can find something in the existing
 		 * slices for that size
 		 */
-		newaddr = slice_find_area(mm, len, good_mask,
+		newaddr = slice_find_area(mm, len, &good_mask,
 					  psize, topdown, high_limit);
 		if (newaddr != -ENOMEM) {
 			/* Found within the good mask, we don't have to setup,
@@ -564,9 +570,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 */
 	slice_mask_for_free(mm, &potential_mask, high_limit);
 	slice_or_mask(&potential_mask, &good_mask);
-	slice_print_mask(" potential", potential_mask);
+	slice_print_mask(" potential", &potential_mask);
 
-	if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
+	if ((addr != 0 || fixed) &&
+			slice_check_fit(mm, &mask, &potential_mask)) {
 		slice_dbg(" fits potential !\n");
 		goto convert;
 	}
@@ -581,7 +588,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * anywhere in the good area.
 	 */
 	if (addr) {
-		addr = slice_find_area(mm, len, good_mask,
+		addr = slice_find_area(mm, len, &good_mask,
 				       psize, topdown, high_limit);
 		if (addr != -ENOMEM) {
 			slice_dbg(" found area at 0x%lx\n", addr);
@@ -592,14 +599,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* Now let's see if we can find something in the existing slices
 	 * for that size plus free slices
 	 */
-	addr = slice_find_area(mm, len, potential_mask,
+	addr = slice_find_area(mm, len, &potential_mask,
 			       psize, topdown, high_limit);
 
 #ifdef CONFIG_PPC_64K_PAGES
 	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
 		slice_or_mask(&potential_mask, &compat_mask);
-		addr = slice_find_area(mm, len, potential_mask,
+		addr = slice_find_area(mm, len, &potential_mask,
 				       psize, topdown, high_limit);
 	}
 #endif
@@ -609,7 +616,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 
 	slice_range_to_mask(addr, len, &mask);
 	slice_dbg(" found potential area at 0x%lx\n", addr);
-	slice_print_mask(" mask", mask);
+	slice_print_mask(" mask", &mask);
 
  convert:
 	slice_andnot_mask(&mask, &good_mask);
@@ -617,7 +624,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (mask.low_slices ||
 	    (SLICE_NUM_HIGH &&
 	     !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH))) {
-		slice_convert(mm, mask, psize);
+		slice_convert(mm, &mask, psize);
 		if (psize > MMU_PAGE_BASE)
 			on_each_cpu(slice_flush_segments, mm, 1);
 	}
@@ -706,7 +713,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 	VM_BUG_ON(radix_enabled());
 
 	slice_range_to_mask(start, len, &mask);
-	slice_convert(mm, mask, psize);
+	slice_convert(mm, &mask, psize);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -753,9 +760,9 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 #if 0 /* too verbose */
 	slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
 		 mm, addr, len);
-	slice_print_mask(" mask", mask);
-	slice_print_mask(" available", available);
+	slice_print_mask(" mask", &mask);
+	slice_print_mask(" available", &available);
 #endif
-	return !slice_check_fit(mm, mask, available);
+	return !slice_check_fit(mm, &mask, &available);
 }
 #endif
-- 
2.16.1

  parent reply	other threads:[~2018-03-06 13:25 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-06 13:24 [PATCH 00/10] powerpc/mm/slice: improve slice speed and stack use Nicholas Piggin
2018-03-06 13:24 ` [PATCH 01/10] selftests/powerpc: add process creation benchmark Nicholas Piggin
2018-03-19 22:23   ` [01/10] " Michael Ellerman
2018-03-20 10:15   ` Michael Ellerman
2018-03-06 13:24 ` [PATCH 02/10] powerpc/mm/slice: Simplify and optimise slice context initialisation Nicholas Piggin
2018-03-06 14:32   ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 03/10] powerpc/mm/slice: tidy lpsizes and hpsizes update loops Nicholas Piggin
2018-03-06 13:25 ` Nicholas Piggin [this message]
2018-03-06 13:43   ` [PATCH 04/10] powerpc/mm/slice: pass pointers to struct slice_mask where possible Christophe LEROY
2018-03-06 13:59     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 05/10] powerpc/mm/slice: implement a slice mask cache Nicholas Piggin
2018-03-06 13:49   ` Christophe LEROY
2018-03-06 14:01     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 06/10] powerpc/mm/slice: implement slice_check_range_fits Nicholas Piggin
2018-03-06 14:41   ` Christophe LEROY
2018-03-06 23:12     ` Nicholas Piggin
2018-03-07  6:12       ` Christophe LEROY
2018-03-07  7:16         ` Nicholas Piggin
2018-03-07 13:38           ` Christophe LEROY
2018-03-06 13:25 ` [PATCH 07/10] powerpc/mm/slice: Switch to 3-operand slice bitops helpers Nicholas Piggin
2018-03-06 14:44   ` Christophe LEROY
2018-03-06 23:19     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 08/10] powerpc/mm/slice: Use const pointers to cached slice masks where possible Nicholas Piggin
2018-03-06 14:55   ` Christophe LEROY
2018-03-06 23:33     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 09/10] powerpc/mm/slice: use the dynamic high slice size to limit bitmap operations Nicholas Piggin
2018-03-06 15:02   ` Christophe LEROY
2018-03-06 23:32     ` Nicholas Piggin
2018-03-06 13:25 ` [PATCH 10/10] powerpc/mm/slice: remove radix calls to the slice code Nicholas Piggin
2018-03-06 15:12   ` Christophe LEROY
2018-03-06 23:35     ` Nicholas Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180306132507.10649-5-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=christophe.leroy@c-s.fr \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.