public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf bench mem: Always memset source before memcpy
@ 2020-08-10 13:34 Vincent Whitchurch
  2020-08-12 12:25 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 2+ messages in thread
From: Vincent Whitchurch @ 2020-08-10 13:34 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim
  Cc: kernel, linux-kernel, Vincent Whitchurch

For memcpy, the source pages are memset to zero only when --cycles is
used.  This leads to wildly different results with or without --cycles,
since all sources pages are likely to be mapped to the same zero page
without explicit writes.

Before this fix:

$ export cmd="./perf stat -e LLC-loads -- ./perf bench \
  mem memcpy -s 1024MB -l 100 -f default"
$ $cmd

         2,935,826      LLC-loads
       3.821677452 seconds time elapsed

$ $cmd --cycles

       217,533,436      LLC-loads
       8.616725985 seconds time elapsed

After this fix:

$ $cmd

       214,459,686      LLC-loads
       8.674301124 seconds time elapsed

$ $cmd --cycles

       214,758,651      LLC-loads
       8.644480006 seconds time elapsed

Fixes: 47b5757bac03c3387c ("perf bench mem: Move boilerplate memory allocation to the infrastructure")
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 tools/perf/bench/mem-functions.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9235b76501be..19d45c377ac1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
 	return 0;
 }
 
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
 {
-	u64 cycle_start = 0ULL, cycle_end = 0ULL;
-	memcpy_t fn = r->fn.memcpy;
-	int i;
-
 	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
 	memset(src, 0, size);
 
@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
 	 * to not measure page fault overhead:
 	 */
 	fn(dst, src, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	memcpy_t fn = r->fn.memcpy;
+	int i;
+
+	memcpy_prefault(fn, size, src, dst);
 
 	cycle_start = get_cycles();
 	for (i = 0; i < nr_loops; ++i)
@@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
 	memcpy_t fn = r->fn.memcpy;
 	int i;
 
-	/*
-	 * We prefault the freshly allocated memory range here,
-	 * to not measure page fault overhead:
-	 */
-	fn(dst, src, size);
+	memcpy_prefault(fn, size, src, dst);
 
 	BUG_ON(gettimeofday(&tv_start, NULL));
 	for (i = 0; i < nr_loops; ++i)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] perf bench mem: Always memset source before memcpy
  2020-08-10 13:34 [PATCH] perf bench mem: Always memset source before memcpy Vincent Whitchurch
@ 2020-08-12 12:25 ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 2+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-08-12 12:25 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, kernel, linux-kernel

Em Mon, Aug 10, 2020 at 03:34:04PM +0200, Vincent Whitchurch escreveu:
> For memcpy, the source pages are memset to zero only when --cycles is
> used.  This leads to wildly different results with or without --cycles,
> since all sources pages are likely to be mapped to the same zero page
> without explicit writes.

Thanks, applied.

- Arnaldo
 
> Before this fix:
> 
> $ export cmd="./perf stat -e LLC-loads -- ./perf bench \
>   mem memcpy -s 1024MB -l 100 -f default"
> $ $cmd
> 
>          2,935,826      LLC-loads
>        3.821677452 seconds time elapsed
> 
> $ $cmd --cycles
> 
>        217,533,436      LLC-loads
>        8.616725985 seconds time elapsed
> 
> After this fix:
> 
> $ $cmd
> 
>        214,459,686      LLC-loads
>        8.674301124 seconds time elapsed
> 
> $ $cmd --cycles
> 
>        214,758,651      LLC-loads
>        8.644480006 seconds time elapsed
> 
> Fixes: 47b5757bac03c3387c ("perf bench mem: Move boilerplate memory allocation to the infrastructure")
> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
> ---
>  tools/perf/bench/mem-functions.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
> index 9235b76501be..19d45c377ac1 100644
> --- a/tools/perf/bench/mem-functions.c
> +++ b/tools/perf/bench/mem-functions.c
> @@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
>  	return 0;
>  }
>  
> -static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
> +static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
>  {
> -	u64 cycle_start = 0ULL, cycle_end = 0ULL;
> -	memcpy_t fn = r->fn.memcpy;
> -	int i;
> -
>  	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
>  	memset(src, 0, size);
>  
> @@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
>  	 * to not measure page fault overhead:
>  	 */
>  	fn(dst, src, size);
> +}
> +
> +static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
> +{
> +	u64 cycle_start = 0ULL, cycle_end = 0ULL;
> +	memcpy_t fn = r->fn.memcpy;
> +	int i;
> +
> +	memcpy_prefault(fn, size, src, dst);
>  
>  	cycle_start = get_cycles();
>  	for (i = 0; i < nr_loops; ++i)
> @@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
>  	memcpy_t fn = r->fn.memcpy;
>  	int i;
>  
> -	/*
> -	 * We prefault the freshly allocated memory range here,
> -	 * to not measure page fault overhead:
> -	 */
> -	fn(dst, src, size);
> +	memcpy_prefault(fn, size, src, dst);
>  
>  	BUG_ON(gettimeofday(&tv_start, NULL));
>  	for (i = 0; i < nr_loops; ++i)
> -- 
> 2.25.1
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-08-12 12:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-08-10 13:34 [PATCH] perf bench mem: Always memset source before memcpy Vincent Whitchurch
2020-08-12 12:25 ` Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox