All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <arnaldo.melo@gmail.com>
To: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Andi Kleen <ak@linux.intel.com>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v5 3/3] perf record: adapt affinity to machines with #CPUs > 1K
Date: Wed, 4 Dec 2019 10:48:36 -0300	[thread overview]
Message-ID: <20191204134836.GA31283@kernel.org> (raw)
In-Reply-To: <96d7e2ff-ce8b-c1e0-d52c-aa59ea96f0ea@linux.intel.com>

Em Tue, Dec 03, 2019 at 02:45:27PM +0300, Alexey Budankov escreveu:
> 
> Use struct mmap_cpu_mask type for tool's thread and mmap data
> buffers to overcome current 1024 CPUs mask size limitation of
> cpu_set_t type.
> 
> Currently glibc cpu_set_t type has internal mask size limit
> of 1024 CPUs. Moving to struct mmap_cpu_mask type allows
> overcoming that limit. tools bitmap API is used to manipulate
> objects of struct mmap_cpu_mask type.

Had to apply this to fix the build in some toolchains/arches:

[acme@quaco perf]$ git diff
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bc83755ef8c..4c301466101b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2508,10 +2508,10 @@ int cmd_record(int argc, const char **argv)
                rec->affinity_mask.nbits = cpu__max_cpu();
                rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
                if (!rec->affinity_mask.bits) {
-                       pr_err("Failed to allocate thread mask for %ld cpus\n", rec->affinity_mask.nbits);
+                       pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
                        return -ENOMEM;
                }
-               pr_debug2("thread mask[%ld]: empty\n", rec->affinity_mask.nbits);
+               pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
        }

        err = record__auxtrace_init(rec);


 
> Reported-by: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
> ---
>  tools/perf/builtin-record.c | 28 ++++++++++++++++++++++------
>  tools/perf/util/mmap.c      | 28 ++++++++++++++++++++++------
>  tools/perf/util/mmap.h      |  2 +-
>  3 files changed, 45 insertions(+), 13 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index fb19ef63cc35..7bc83755ef8c 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -62,6 +62,7 @@
>  #include <linux/string.h>
>  #include <linux/time64.h>
>  #include <linux/zalloc.h>
> +#include <linux/bitmap.h>
>  
>  struct switch_output {
>  	bool		 enabled;
> @@ -93,7 +94,7 @@ struct record {
>  	bool			timestamp_boundary;
>  	struct switch_output	switch_output;
>  	unsigned long long	samples;
> -	cpu_set_t		affinity_mask;
> +	struct mmap_cpu_mask	affinity_mask;
>  	unsigned long		output_max_size;	/* = 0: unlimited */
>  };
>  
> @@ -961,10 +962,15 @@ static struct perf_event_header finished_round_event = {
>  static void record__adjust_affinity(struct record *rec, struct mmap *map)
>  {
>  	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
> -	    !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
> -		CPU_ZERO(&rec->affinity_mask);
> -		CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
> -		sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
> +	    !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
> +			  rec->affinity_mask.nbits)) {
> +		bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
> +		bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
> +			  map->affinity_mask.bits, rec->affinity_mask.nbits);
> +		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
> +				  (cpu_set_t *)rec->affinity_mask.bits);
> +		if (verbose == 2)
> +			mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
>  	}
>  }
>  
> @@ -2433,7 +2439,6 @@ int cmd_record(int argc, const char **argv)
>  # undef REASON
>  #endif
>  
> -	CPU_ZERO(&rec->affinity_mask);
>  	rec->opts.affinity = PERF_AFFINITY_SYS;
>  
>  	rec->evlist = evlist__new();
> @@ -2499,6 +2504,16 @@ int cmd_record(int argc, const char **argv)
>  
>  	symbol__init(NULL);
>  
> +	if (rec->opts.affinity != PERF_AFFINITY_SYS) {
> +		rec->affinity_mask.nbits = cpu__max_cpu();
> +		rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
> +		if (!rec->affinity_mask.bits) {
> +			pr_err("Failed to allocate thread mask for %ld cpus\n", rec->affinity_mask.nbits);
> +			return -ENOMEM;
> +		}
> +		pr_debug2("thread mask[%ld]: empty\n", rec->affinity_mask.nbits);
> +	}
> +
>  	err = record__auxtrace_init(rec);
>  	if (err)
>  		goto out;
> @@ -2613,6 +2628,7 @@ int cmd_record(int argc, const char **argv)
>  
>  	err = __cmd_record(&record, argc, argv);
>  out:
> +	bitmap_free(rec->affinity_mask.bits);
>  	evlist__delete(rec->evlist);
>  	symbol__exit();
>  	auxtrace_record__free(rec->itr);
> diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
> index 43c12b4a3e17..832d2cb94b2c 100644
> --- a/tools/perf/util/mmap.c
> +++ b/tools/perf/util/mmap.c
> @@ -219,6 +219,8 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused)
>  
>  void mmap__munmap(struct mmap *map)
>  {
> +	bitmap_free(map->affinity_mask.bits);
> +
>  	perf_mmap__aio_munmap(map);
>  	if (map->data != NULL) {
>  		munmap(map->data, mmap__mmap_len(map));
> @@ -227,7 +229,7 @@ void mmap__munmap(struct mmap *map)
>  	auxtrace_mmap__munmap(&map->auxtrace_mmap);
>  }
>  
> -static void build_node_mask(int node, cpu_set_t *mask)
> +static void build_node_mask(int node, struct mmap_cpu_mask *mask)
>  {
>  	int c, cpu, nr_cpus;
>  	const struct perf_cpu_map *cpu_map = NULL;
> @@ -240,17 +242,23 @@ static void build_node_mask(int node, cpu_set_t *mask)
>  	for (c = 0; c < nr_cpus; c++) {
>  		cpu = cpu_map->map[c]; /* map c index to online cpu index */
>  		if (cpu__get_node(cpu) == node)
> -			CPU_SET(cpu, mask);
> +			set_bit(cpu, mask->bits);
>  	}
>  }
>  
> -static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
> +static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp)
>  {
> -	CPU_ZERO(&map->affinity_mask);
> +	map->affinity_mask.nbits = cpu__max_cpu();
> +	map->affinity_mask.bits = bitmap_alloc(map->affinity_mask.nbits);
> +	if (!map->affinity_mask.bits)
> +		return -1;
> +
>  	if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
>  		build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
>  	else if (mp->affinity == PERF_AFFINITY_CPU)
> -		CPU_SET(map->core.cpu, &map->affinity_mask);
> +		set_bit(map->core.cpu, map->affinity_mask.bits);
> +
> +	return 0;
>  }
>  
>  int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
> @@ -261,7 +269,15 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
>  		return -1;
>  	}
>  
> -	perf_mmap__setup_affinity_mask(map, mp);
> +	if (mp->affinity != PERF_AFFINITY_SYS &&
> +		perf_mmap__setup_affinity_mask(map, mp)) {
> +		pr_debug2("failed to alloc mmap affinity mask, error %d\n",
> +			  errno);
> +		return -1;
> +	}
> +
> +	if (verbose == 2)
> +		mmap_cpu_mask__scnprintf(&map->affinity_mask, "mmap");
>  
>  	map->core.flush = mp->flush;
>  
> diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
> index ef51667fabcb..9d5f589f02ae 100644
> --- a/tools/perf/util/mmap.h
> +++ b/tools/perf/util/mmap.h
> @@ -40,7 +40,7 @@ struct mmap {
>  		int		 nr_cblocks;
>  	} aio;
>  #endif
> -	cpu_set_t	affinity_mask;
> +	struct mmap_cpu_mask	affinity_mask;
>  	void		*data;
>  	int		comp_level;
>  };
> -- 
> 2.20.1
> 

-- 

- Arnaldo

  reply	other threads:[~2019-12-04 13:48 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-03 11:41 [PATCH v5 0/3] perf record: adapt NUMA awareness to machines with #CPUs > 1K Alexey Budankov
2019-12-03 11:43 ` [PATCH v5 1/3] tools bitmap: implement bitmap_equal() operation at bitmap API Alexey Budankov
2020-01-10 17:53   ` [tip: perf/core] tools bitmap: Implement " tip-bot2 for Alexey Budankov
2019-12-03 11:44 ` [PATCH v5 2/3] perf mmap: declare type for cpu mask of arbitrary length Alexey Budankov
2019-12-04 13:49   ` Arnaldo Carvalho de Melo
2019-12-05  7:31     ` Alexey Budankov
2020-01-10 17:53   ` [tip: perf/core] perf mmap: Declare " tip-bot2 for Alexey Budankov
2019-12-03 11:45 ` [PATCH v5 3/3] perf record: adapt affinity to machines with #CPUs > 1K Alexey Budankov
2019-12-04 13:48   ` Arnaldo Carvalho de Melo [this message]
2019-12-05  7:30     ` Alexey Budankov
2020-01-10 17:53   ` [tip: perf/core] perf record: Adapt " tip-bot2 for Alexey Budankov
2019-12-03 12:17 ` [PATCH v5 0/3] perf record: adapt NUMA awareness " Jiri Olsa
2019-12-03 18:36   ` Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191204134836.GA31283@kernel.org \
    --to=arnaldo.melo@gmail.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=alexey.budankov@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.