All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Ankur Arora <ankur.a.arora@oracle.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org, x86@kernel.org,
	akpm@linux-foundation.org, david@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, hpa@zytor.com, mingo@redhat.com,
	mjguzik@gmail.com, luto@kernel.org, peterz@infradead.org,
	acme@kernel.org, tglx@linutronix.de, willy@infradead.org,
	raghavendra.kt@amd.com, boris.ostrovsky@oracle.com,
	konrad.wilk@oracle.com
Subject: Re: [PATCH v5 09/14] perf bench mem: Add mmap() workloads
Date: Tue, 15 Jul 2025 13:20:15 -0700	[thread overview]
Message-ID: <aHa3_9ijsm3FC6_8@google.com> (raw)
In-Reply-To: <20250710005926.1159009-10-ankur.a.arora@oracle.com>

On Wed, Jul 09, 2025 at 05:59:21PM -0700, Ankur Arora wrote:
> Add two mmap() workloads: one that eagerly populates a region and
> another that demand faults it in.
> 
> The intent is to probe the memory subsytem performance incurred
> by mmap().

Maybe better to name 'mmap' as other tests named after the actual
function.  Also please update the documentation.

Thanks,
Namhyung


> 
>   $ perf bench mem map -s 4gb -p 4kb -l 10 -f populate
>   # Running 'mem/map' benchmark:
>   # function 'populate' (Eagerly populated map)
>   # Copying 4gb bytes ...
> 
>        1.811691 GB/sec
> 
>   $ perf bench mem map -s 4gb -p 2mb -l 10 -f populate
>   # Running 'mem/map' benchmark:
>   # function 'populate' (Eagerly populated map)
>   # Copying 4gb bytes ...
> 
>       12.272017 GB/sec
> 
>   $ perf bench mem map -s 4gb -p 1gb -l 10 -f populate
>   # Running 'mem/map' benchmark:
>   # function 'populate' (Eagerly populated map)
>   # Copying 4gb bytes ...
> 
>       17.085927 GB/sec
> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  tools/perf/bench/bench.h         |  1 +
>  tools/perf/bench/mem-functions.c | 96 ++++++++++++++++++++++++++++++++
>  tools/perf/builtin-bench.c       |  1 +
>  3 files changed, 98 insertions(+)
> 
> diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
> index 9f736423af53..46484bb0eefb 100644
> --- a/tools/perf/bench/bench.h
> +++ b/tools/perf/bench/bench.h
> @@ -28,6 +28,7 @@ int bench_syscall_fork(int argc, const char **argv);
>  int bench_syscall_execve(int argc, const char **argv);
>  int bench_mem_memcpy(int argc, const char **argv);
>  int bench_mem_memset(int argc, const char **argv);
> +int bench_mem_map(int argc, const char **argv);
>  int bench_mem_find_bit(int argc, const char **argv);
>  int bench_futex_hash(int argc, const char **argv);
>  int bench_futex_wake(int argc, const char **argv);
> diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
> index 8a37da149327..ea62e3583a70 100644
> --- a/tools/perf/bench/mem-functions.c
> +++ b/tools/perf/bench/mem-functions.c
> @@ -40,6 +40,7 @@ static const char	*chunk_size_str	= "0";
>  static unsigned int	nr_loops	= 1;
>  static bool		use_cycles;
>  static int		cycles_fd;
> +static unsigned int	seed;
>  
>  static const struct option bench_common_options[] = {
>  	OPT_STRING('s', "size", &size_str, "1MB",
> @@ -81,6 +82,7 @@ struct bench_params {
>  	size_t		chunk_size;
>  	unsigned int	nr_loops;
>  	unsigned int	page_shift;
> +	unsigned int	seed;
>  };
>  
>  struct bench_mem_info {
> @@ -98,6 +100,7 @@ typedef void (*mem_fini_t)(struct bench_mem_info *, struct bench_params *,
>  			   void **, void **);
>  typedef void *(*memcpy_t)(void *, const void *, size_t);
>  typedef void *(*memset_t)(void *, int, size_t);
> +typedef void (*map_op_t)(void *, size_t, unsigned int, bool);
>  
>  struct function {
>  	const char *name;
> @@ -108,6 +111,7 @@ struct function {
>  		union {
>  			memcpy_t memcpy;
>  			memset_t memset;
> +			map_op_t map_op;
>  		};
>  	} fn;
>  };
> @@ -160,6 +164,14 @@ static union bench_clock clock_diff(union bench_clock *s, union bench_clock *e)
>  	return t;
>  }
>  
> +static void clock_accum(union bench_clock *a, union bench_clock *b)
> +{
> +	if (use_cycles)
> +		a->cycles += b->cycles;
> +	else
> +		timeradd(&a->tv, &b->tv, &a->tv);
> +}
> +
>  static double timeval2double(struct timeval *ts)
>  {
>  	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
> @@ -270,6 +282,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
>  	}
>  	p.page_shift = ilog2(page_size);
>  
> +	p.seed = seed;
> +
>  	if (!strncmp(function_str, "all", 3)) {
>  		for (i = 0; info->functions[i].name; i++)
>  			__bench_mem_function(info, &p, i);
> @@ -464,3 +478,85 @@ int bench_mem_memset(int argc, const char **argv)
>  
>  	return bench_mem_common(argc, argv, &info);
>  }
> +
> +static void map_page_touch(void *dst, size_t size, unsigned int page_shift, bool random)
> +{
> +	unsigned long npages = size / (1 << page_shift);
> +	unsigned long offset = 0, r = 0;
> +
> +	for (unsigned long i = 0; i < npages; i++) {
> +		if (random)
> +			r = rand() % (1 << page_shift);
> +
> +		*((char *)dst + offset + r) = *(char *)(dst + offset + r) + i;
> +		offset += 1 << page_shift;
> +	}
> +}
> +
> +static int do_map(const struct function *r, struct bench_params *p,
> +		  void *src __maybe_unused, void *dst __maybe_unused,
> +		  union bench_clock *accum)
> +{
> +	union bench_clock start, end, diff;
> +	map_op_t fn = r->fn.map_op;
> +	bool populate = strcmp(r->name, "populate") == 0;
> +
> +	if (p->seed)
> +		srand(p->seed);
> +
> +	for (unsigned int i = 0; i < p->nr_loops; i++) {
> +		clock_get(&start);
> +		dst = bench_mmap(p->size, populate, p->page_shift);
> +		if (!dst)
> +			goto out;
> +
> +		fn(dst, p->size, p->page_shift, p->seed);
> +		clock_get(&end);
> +		diff = clock_diff(&start, &end);
> +		clock_accum(accum, &diff);
> +
> +		bench_munmap(dst, p->size);
> +	}
> +
> +	return 0;
> +out:
> +	printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str,
> +			p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large");
> +	return -1;
> +}
> +
> +static const char * const bench_mem_map_usage[] = {
> +	"perf bench mem map <options>",
> +	NULL
> +};
> +
> +static const struct function map_functions[] = {
> +	{ .name		= "populate",
> +	  .desc		= "Eagerly populated map",
> +	  .fn.map_op	= map_page_touch },
> +
> +	{ .name		= "demand",
> +	  .desc		= "Demand loaded map",
> +	  .fn.map_op	= map_page_touch },
> +
> +	{ .name = NULL, }
> +};
> +
> +int bench_mem_map(int argc, const char **argv)
> +{
> +	static const struct option bench_map_options[] = {
> +		OPT_UINTEGER('r', "randomize", &seed,
> +			    "Seed to randomize page RW offset with."),
> +		OPT_PARENT(bench_common_options),
> +		OPT_END()
> +	};
> +
> +	struct bench_mem_info info = {
> +		.functions		= map_functions,
> +		.do_op			= do_map,
> +		.usage			= bench_mem_map_usage,
> +		.options		= bench_map_options,
> +	};
> +
> +	return bench_mem_common(argc, argv, &info);
> +}
> diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
> index 2c1a9f3d847a..a20bd9882f0a 100644
> --- a/tools/perf/builtin-bench.c
> +++ b/tools/perf/builtin-bench.c
> @@ -65,6 +65,7 @@ static struct bench mem_benchmarks[] = {
>  	{ "memcpy",	"Benchmark for memcpy() functions",		bench_mem_memcpy	},
>  	{ "memset",	"Benchmark for memset() functions",		bench_mem_memset	},
>  	{ "find_bit",	"Benchmark for find_bit() functions",		bench_mem_find_bit	},
> +	{ "map",	"Benchmark for mmap() mappings",		bench_mem_map		},
>  	{ "all",	"Run all memory access benchmarks",		NULL			},
>  	{ NULL,		NULL,						NULL			}
>  };
> -- 
> 2.43.5
> 


  reply	other threads:[~2025-07-15 20:20 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-10  0:59 [PATCH v5 00/14] mm: folio_zero_user: clearing of page-extents Ankur Arora
2025-07-10  0:59 ` [PATCH v5 01/14] perf bench mem: Remove repetition around time measurement Ankur Arora
2025-07-15 20:04   ` Namhyung Kim
2025-07-10  0:59 ` [PATCH v5 02/14] perf bench mem: Defer type munging of size to float Ankur Arora
2025-07-15 20:05   ` Namhyung Kim
2025-07-16  2:17     ` Ankur Arora
2025-07-10  0:59 ` [PATCH v5 03/14] perf bench mem: Move mem op parameters into a structure Ankur Arora
2025-07-15 20:06   ` Namhyung Kim
2025-07-10  0:59 ` [PATCH v5 04/14] perf bench mem: Pull out init/fini logic Ankur Arora
2025-07-15 20:09   ` Namhyung Kim
2025-07-10  0:59 ` [PATCH v5 05/14] perf bench mem: Switch from zalloc() to mmap() Ankur Arora
2025-07-15 20:09   ` Namhyung Kim
2025-07-10  0:59 ` [PATCH v5 06/14] perf bench mem: Allow mapping of hugepages Ankur Arora
2025-07-15 20:12   ` Namhyung Kim
2025-07-16  2:32     ` Ankur Arora
2025-07-10  0:59 ` [PATCH v5 07/14] perf bench mem: Allow chunking on a memory region Ankur Arora
2025-07-15 20:17   ` Namhyung Kim
2025-07-16  2:34     ` Ankur Arora
2025-07-10  0:59 ` [PATCH v5 08/14] perf bench mem: Refactor mem_options Ankur Arora
2025-07-15 20:18   ` Namhyung Kim
2025-07-10  0:59 ` [PATCH v5 09/14] perf bench mem: Add mmap() workloads Ankur Arora
2025-07-15 20:20   ` Namhyung Kim [this message]
2025-07-16  2:40     ` Ankur Arora
2025-07-10  0:59 ` [PATCH v5 10/14] x86/mm: Simplify clear_page_* Ankur Arora
2025-07-11 11:47   ` David Hildenbrand
2025-07-11 17:26     ` Ankur Arora
2025-07-11 19:03       ` David Hildenbrand
2025-07-11 19:24         ` Ankur Arora
2025-07-11 19:27           ` David Hildenbrand
2025-07-10  0:59 ` [PATCH v5 11/14] x86/clear_page: Introduce clear_pages() Ankur Arora
2025-07-10  0:59 ` [PATCH v5 12/14] mm: add config option for clearing page-extents Ankur Arora
2025-07-10  7:58   ` Andrew Morton
2025-07-10 16:31     ` Ankur Arora
2025-07-11 11:39   ` David Hildenbrand
2025-07-11 17:25     ` Ankur Arora
2025-07-11 19:14       ` David Hildenbrand
2025-07-11 19:35         ` Ankur Arora
2025-07-11 11:40   ` David Hildenbrand
2025-07-11 17:32     ` Ankur Arora
2025-07-11 19:26       ` David Hildenbrand
2025-07-11 19:42         ` Ankur Arora
2025-07-14 20:35         ` Ankur Arora
2025-07-15 20:59           ` David Hildenbrand
2025-07-10  0:59 ` [PATCH v5 13/14] mm: memory: support " Ankur Arora
2025-07-11 11:44   ` David Hildenbrand
2025-07-11 13:27     ` Raghavendra K T
2025-07-11 17:39     ` Ankur Arora
2025-07-15 22:08   ` David Hildenbrand
2025-07-16  3:19     ` Ankur Arora
2025-07-16  8:03       ` David Hildenbrand
2025-07-16 17:54         ` Ankur Arora
2025-07-10  0:59 ` [PATCH v5 14/14] x86/clear_pages: Support clearing of page-extents Ankur Arora

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aHa3_9ijsm3FC6_8@google.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=ankur.a.arora@oracle.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=hpa@zytor.com \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=mjguzik@gmail.com \
    --cc=peterz@infradead.org \
    --cc=raghavendra.kt@amd.com \
    --cc=tglx@linutronix.de \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.