From: "Yan, Zheng" <zheng.z.yan@intel.com>
To: Stephane Eranian <eranian@google.com>
Cc: a.p.zijlstra@chello.nl, mingo@elte.hu, andi@firstfloor.org,
jolsa@redhat.com, linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH] perf/x86: Add cpumask for uncore PMU.
Date: Fri, 17 Aug 2012 16:55:38 +0800 [thread overview]
Message-ID: <502E070A.9010604@intel.com> (raw)
In-Reply-To: <CABPqkBTKwk1wqV0YWGRn1sHXBM0yix7jrGRFsW-JT4_=Y+F+wg@mail.gmail.com>
On 08/17/2012 04:00 PM, Stephane Eranian wrote:
> On Mon, Aug 6, 2012 at 9:35 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
>>
>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>
>> This RFC patch adds a cpumask file to the uncore pmu sysfs directory.
>> If user doesn't explicitly specify CPU list, perf-stat only collects
>> uncore events on CPUs listed in the cpumask file.
>>
>> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
>> ---
>> arch/x86/kernel/cpu/perf_event_intel_uncore.c | 28 ++++++++++++++++++++++---
>> arch/x86/kernel/cpu/perf_event_intel_uncore.h | 6 ++++--
>> tools/perf/builtin-stat.c | 30 ++++++++++++++++++---------
>> tools/perf/util/cpumap.c | 22 +++++++++++++-------
>> tools/perf/util/cpumap.h | 2 +-
>> tools/perf/util/evsel.h | 1 +
>> tools/perf/util/parse-events.c | 14 +++++++------
>> tools/perf/util/pmu.c | 30 +++++++++++++++++++++++++++
>> tools/perf/util/pmu.h | 1 +
>> 9 files changed, 105 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
>> index 0a55710..62ec3e6 100644
>> --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
>> +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
>> @@ -2341,6 +2341,27 @@ int uncore_pmu_event_init(struct perf_event *event)
>> return ret;
>> }
>>
>> +static ssize_t uncore_get_attr_cpumask(struct device *dev,
>> + struct device_attribute *attr, char *buf)
>> +{
>> + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
>> +
>> + buf[n++] = '\n';
>
> Why the \n. I thought there were no \n in sysfs files.
>
>> + buf[n] = '\0';
>> + return n;
>> +}
>> +
>> +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
>> +
>> +static struct attribute *uncore_pmu_attrs[] = {
>> + &dev_attr_cpumask.attr,
>> + NULL,
>> +};
>> +
>> +static struct attribute_group uncore_pmu_attr_group = {
>> + .attrs = uncore_pmu_attrs,
>> +};
>> +
>> static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
>> {
>> int ret;
>> @@ -2378,8 +2399,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
>> free_percpu(type->pmus[i].box);
>> kfree(type->pmus);
>> type->pmus = NULL;
>> - kfree(type->attr_groups[1]);
>> - type->attr_groups[1] = NULL;
>> + kfree(type->events_group);
>> + type->events_group = NULL;
>> }
>>
>> static void __init uncore_types_exit(struct intel_uncore_type **types)
>> @@ -2431,9 +2452,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
>> for (j = 0; j < i; j++)
>> attrs[j] = &type->event_descs[j].attr.attr;
>>
>> - type->attr_groups[1] = events_group;
>> + type->events_group = events_group;
>> }
>>
>> + type->pmu_group = &uncore_pmu_attr_group;
>> type->pmus = pmus;
>> return 0;
>> fail:
>> diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
>> index 5b81c18..e68a455 100644
>> --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
>> +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
>> @@ -369,10 +369,12 @@ struct intel_uncore_type {
>> struct intel_uncore_pmu *pmus;
>> struct intel_uncore_ops *ops;
>> struct uncore_event_desc *event_descs;
>> - const struct attribute_group *attr_groups[3];
>> + const struct attribute_group *attr_groups[4];
>> };
>>
>> -#define format_group attr_groups[0]
>> +#define pmu_group attr_groups[0]
>> +#define format_group attr_groups[1]
>> +#define events_group attr_groups[2]
>>
>> struct intel_uncore_ops {
>> void (*init_box)(struct intel_uncore_box *);
>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>> index 861f0ae..8b4275c 100644
>> --- a/tools/perf/builtin-stat.c
>> +++ b/tools/perf/builtin-stat.c
>> @@ -220,6 +220,16 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
>> evsel->priv = NULL;
>> }
>>
>> +static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
>> +{
>> + return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
>> +}
>> +
>> +static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
>> +{
>> + return perf_evsel__cpus(evsel)->nr;
>> +}
>> +
>> static void update_stats(struct stats *stats, u64 val)
>> {
>> double delta;
>> @@ -299,7 +309,7 @@ retry:
>> evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
>>
>> if (perf_target__has_cpu(&target)) {
>> - ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
>> + ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel),
>> group, group_fd);
>> if (ret)
>> goto check_ret;
>> @@ -382,7 +392,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
>> u64 *count = counter->counts->aggr.values;
>> int i;
>>
>> - if (__perf_evsel__read(counter, evsel_list->cpus->nr,
>> + if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
>> evsel_list->threads->nr, scale) < 0)
>> return -1;
>>
>> @@ -411,7 +421,7 @@ static int read_counter(struct perf_evsel *counter)
>> u64 *count;
>> int cpu;
>>
>> - for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
>> + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
>> if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
>> return -1;
>>
>> @@ -546,12 +556,12 @@ static int run_perf_stat(int argc __used, const char **argv)
>> if (no_aggr) {
>> list_for_each_entry(counter, &evsel_list->entries, node) {
>> read_counter(counter);
>> - perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
>> + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
>> }
>> } else {
>> list_for_each_entry(counter, &evsel_list->entries, node) {
>> read_counter_aggr(counter);
>> - perf_evsel__close_fd(counter, evsel_list->cpus->nr,
>> + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
>> evsel_list->threads->nr);
>> }
>> }
>> @@ -592,7 +602,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
>> if (no_aggr)
>> sprintf(cpustr, "CPU%*d%s",
>> csv_output ? 0 : -4,
>> - evsel_list->cpus->map[cpu], csv_sep);
>> + perf_evsel__cpus(evsel)->map[cpu], csv_sep);
>>
>> fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
>>
>> @@ -788,7 +798,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
>> if (no_aggr)
>> sprintf(cpustr, "CPU%*d%s",
>> csv_output ? 0 : -4,
>> - evsel_list->cpus->map[cpu], csv_sep);
>> + perf_evsel__cpus(evsel)->map[cpu], csv_sep);
>> else
>> cpu = 0;
>>
>> @@ -949,14 +959,14 @@ static void print_counter(struct perf_evsel *counter)
>> u64 ena, run, val;
>> int cpu;
>>
>> - for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
>> + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
>> val = counter->counts->cpu[cpu].val;
>> ena = counter->counts->cpu[cpu].ena;
>> run = counter->counts->cpu[cpu].run;
>> if (run == 0 || ena == 0) {
>> fprintf(output, "CPU%*d%s%*s%s%*s",
>> csv_output ? 0 : -4,
>> - evsel_list->cpus->map[cpu], csv_sep,
>> + perf_evsel__cpus(counter)->map[cpu], csv_sep,
>> csv_output ? 0 : 18,
>> counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
>> csv_sep,
>> @@ -1255,7 +1265,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
>>
>> list_for_each_entry(pos, &evsel_list->entries, node) {
>> if (perf_evsel__alloc_stat_priv(pos) < 0 ||
>> - perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0)
>> + perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
>> goto out_free_fd;
>> }
>>
>> diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
>> index adc72f0..2b32ffa 100644
>> --- a/tools/perf/util/cpumap.c
>> +++ b/tools/perf/util/cpumap.c
>> @@ -38,24 +38,19 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
>> return cpus;
>> }
>>
>> -static struct cpu_map *cpu_map__read_all_cpu_map(void)
>> +struct cpu_map *cpu_map__read(FILE *file)
>> {
>> struct cpu_map *cpus = NULL;
>> - FILE *onlnf;
>> int nr_cpus = 0;
>> int *tmp_cpus = NULL, *tmp;
>> int max_entries = 0;
>> int n, cpu, prev;
>> char sep;
>>
>> - onlnf = fopen("/sys/devices/system/cpu/online", "r");
>> - if (!onlnf)
>> - return cpu_map__default_new();
>> -
>> sep = 0;
>> prev = -1;
>> for (;;) {
>> - n = fscanf(onlnf, "%u%c", &cpu, &sep);
>> + n = fscanf(file, "%u%c", &cpu, &sep);
>> if (n <= 0)
>> break;
>> if (prev >= 0) {
>> @@ -95,6 +90,19 @@ static struct cpu_map *cpu_map__read_all_cpu_map(void)
>> cpus = cpu_map__default_new();
>> out_free_tmp:
>> free(tmp_cpus);
>> + return cpus;
>> +}
>> +
>> +static struct cpu_map *cpu_map__read_all_cpu_map(void)
>> +{
>> + struct cpu_map *cpus = NULL;
>> + FILE *onlnf;
>> +
>> + onlnf = fopen("/sys/devices/system/cpu/online", "r");
>> + if (!onlnf)
>> + return cpu_map__default_new();
>> +
>> + cpus = cpu_map__read(onlnf);
>> fclose(onlnf);
>> return cpus;
>> }
>> diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
>> index c415185..17b5264 100644
>> --- a/tools/perf/util/cpumap.h
>> +++ b/tools/perf/util/cpumap.h
>> @@ -11,7 +11,7 @@ struct cpu_map {
>> struct cpu_map *cpu_map__new(const char *cpu_list);
>> struct cpu_map *cpu_map__dummy_new(void);
>> void cpu_map__delete(struct cpu_map *map);
>> -
>> +struct cpu_map *cpu_map__read(FILE *file);
>> size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
>>
>> #endif /* __PERF_CPUMAP_H */
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index b559929..26c0095 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -65,6 +65,7 @@ struct perf_evsel {
>> void *func;
>> void *data;
>> } handler;
>> + struct cpu_map *cpus;
>> unsigned int sample_size;
>> bool supported;
>> };
>> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
>> index 74a5af4..af07c4f 100644
>> --- a/tools/perf/util/parse-events.c
>> +++ b/tools/perf/util/parse-events.c
>> @@ -240,7 +240,8 @@ const char *event_type(int type)
>> }
>>
>> static int add_event(struct list_head **_list, int *idx,
>> - struct perf_event_attr *attr, char *name)
>> + struct perf_event_attr *attr,
>> + struct cpu_map *cpus, char *name)
>> {
>> struct perf_evsel *evsel;
>> struct list_head *list = *_list;
>> @@ -260,6 +261,7 @@ static int add_event(struct list_head **_list, int *idx,
>> return -ENOMEM;
>> }
>>
>> + evsel->cpus = cpus;
>> if (name)
>> evsel->name = strdup(name);
>> list_add_tail(&evsel->node, list);
>> @@ -343,7 +345,7 @@ int parse_events_add_cache(struct list_head **list, int *idx,
>> memset(&attr, 0, sizeof(attr));
>> attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
>> attr.type = PERF_TYPE_HW_CACHE;
>> - return add_event(list, idx, &attr, name);
>> + return add_event(list, idx, &attr, NULL, name);
>> }
>>
>> static int add_tracepoint(struct list_head **list, int *idx,
>> @@ -381,7 +383,7 @@ static int add_tracepoint(struct list_head **list, int *idx,
>> attr.sample_period = 1;
>>
>> snprintf(name, MAX_NAME_LEN, "%s:%s", sys_name, evt_name);
>> - return add_event(list, idx, &attr, name);
>> + return add_event(list, idx, &attr, NULL, name);
>> }
>>
>> static int add_tracepoint_multi(struct list_head **list, int *idx,
>> @@ -492,7 +494,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,
>> attr.type = PERF_TYPE_BREAKPOINT;
>> attr.sample_period = 1;
>>
>> - return add_event(list, idx, &attr, NULL);
>> + return add_event(list, idx, &attr, NULL, NULL);
>> }
>>
>> static int config_term(struct perf_event_attr *attr,
>> @@ -564,7 +566,7 @@ int parse_events_add_numeric(struct list_head **list, int *idx,
>> config_attr(&attr, head_config, 1))
>> return -EINVAL;
>>
>> - return add_event(list, idx, &attr, NULL);
>> + return add_event(list, idx, &attr, NULL, NULL);
>> }
>>
>> static int parse_events__is_name_term(struct parse_events__term *term)
>> @@ -607,7 +609,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx,
>> if (perf_pmu__config(pmu, &attr, head_config))
>> return -EINVAL;
>>
>> - return add_event(list, idx, &attr,
>> + return add_event(list, idx, &attr, pmu->cpus,
>> pmu_event_name(head_config));
>> }
>>
>> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
>> index 67715a4..0b9aca6 100644
>> --- a/tools/perf/util/pmu.c
>> +++ b/tools/perf/util/pmu.c
>> @@ -9,6 +9,7 @@
>> #include "util.h"
>> #include "pmu.h"
>> #include "parse-events.h"
>> +#include "cpumap.h"
>>
>> int perf_pmu_parse(struct list_head *list, char *name);
>> extern FILE *perf_pmu_in;
>> @@ -222,6 +223,33 @@ static int pmu_type(char *name, __u32 *type)
>> return ret;
>> }
>>
>> +static struct cpu_map *pmu_cpumask(char *name)
>> +{
>> + struct stat st;
>> + char path[PATH_MAX];
>> + const char *sysfs;
>> + FILE *file;
>> + struct cpu_map *cpus;
>> +
>> + sysfs = sysfs_find_mountpoint();
>> + if (!sysfs)
>> + return NULL;
>> +
>> + snprintf(path, PATH_MAX,
>> + "%s/bus/event_source/devices/%s/cpumask", sysfs, name);
>> +
>> + if (stat(path, &st) < 0)
>> + return NULL;
>> +
>> + file = fopen(path, "r");
>> + if (!file)
>> + return NULL;
>> +
>> + cpus = cpu_map__read(file);
>> + fclose(file);
>> + return cpus;
>> +}
>> +
>> static struct perf_pmu *pmu_lookup(char *name)
>> {
>> struct perf_pmu *pmu;
>> @@ -244,6 +272,8 @@ static struct perf_pmu *pmu_lookup(char *name)
>> if (!pmu)
>> return NULL;
>>
>> + pmu->cpus = pmu_cpumask(name);
>> +
>> pmu_aliases(name, &aliases);
>>
>> INIT_LIST_HEAD(&pmu->format);
>> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
>> index 535f2c5..277c874 100644
>> --- a/tools/perf/util/pmu.h
>> +++ b/tools/perf/util/pmu.h
>> @@ -28,6 +28,7 @@ struct perf_pmu__alias {
>> struct perf_pmu {
>> char *name;
>> __u32 type;
>> + struct cpu_map *cpus;
>> struct list_head format;
>> struct list_head aliases;
>> struct list_head list;
>> --
>> 1.7.11.2
>>
>
> The key here is to ensure that perf stat -a picks one CPU on each socket
> and that from that CPU we can measure events on ALL the uncore PMU
> boxes on the socket. I think this is the case today. For instance, it is does
> not really make sense to measure one C-Box at a time. For any C-box-level
> metric, you need to cover all 8 of them.
>
how about making perf-stat recognize wildcard character in the pmu name and
aggregate counts across all PMUs? For example:
# perf stat -a uncore_cbox_*/event=0/ sleep 1
> How does this play with the -C option of perf stat? I assume -C overrides.
>
yes
Regards
Yan, Zheng
> In case perf stat picks up the CPUs from cpumask, then I'd like to keep
> a trace of which CPUs were actually used. In other words, I think it would
> be useful to have this printed somewhere in the output. Unless this shows
> up only when you use the -A option.
>
prev parent reply other threads:[~2012-08-17 8:56 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-08-06 7:35 [RFC PATCH] perf/x86: Add cpumask for uncore PMU Yan, Zheng
2012-08-16 11:11 ` Peter Zijlstra
2012-08-17 8:00 ` Stephane Eranian
2012-08-17 8:55 ` Yan, Zheng [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=502E070A.9010604@intel.com \
--to=zheng.z.yan@intel.com \
--cc=a.p.zijlstra@chello.nl \
--cc=andi@firstfloor.org \
--cc=eranian@google.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.