From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754025AbdHXUjS (ORCPT ); Thu, 24 Aug 2017 16:39:18 -0400 Received: from smtp.codeaurora.org ([198.145.29.96]:59832 "EHLO smtp.codeaurora.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753593AbdHXUjP (ORCPT ); Thu, 24 Aug 2017 16:39:15 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 smtp.codeaurora.org 502D46072C Authentication-Results: pdx-caf-mail.web.codeaurora.org; dmarc=none (p=none dis=none) header.from=codeaurora.org Authentication-Results: pdx-caf-mail.web.codeaurora.org; spf=none smtp.mailfrom=alindsay@codeaurora.org Date: Thu, 24 Aug 2017 16:39:02 -0400 From: Aaron Lindsay To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Alexander Shishkin , linux-kernel@vger.kernel.org, Andi Kleen Cc: Digant Desai , timur@codeaurora.org Subject: Re: [PATCH] perf stat: Add event-interval-print option Message-ID: <20170824203902.GA5620@codeaurora.org> References: <1502896117-19420-1-git-send-email-alindsay@codeaurora.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1502896117-19420-1-git-send-email-alindsay@codeaurora.org> User-Agent: Mutt/1.5.23 (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Does anyone have any feedback on this patch or the idea in general? Also, (for our bookkeeping - not trying to rush things) is there any chance this will still make it in for the 4.14 merge window, or is it 4.15 material at the earliest? -Aaron On Aug 16 11:08, Aaron Lindsay wrote: > This allows for printing counts at regular intervals based on > instances of an arbitrary event (currently the first event provided). > > This can be useful when comparing `perf stat` runs of fixed-work > workloads. For instance, you may want to compare interval-by-interval > stats between two runs based on instruction count rather than time > intervals to ensure the same sections of the userspace component of code > are aligned when comparing them (which wouldn't necessarily be the case > if the kernel was misbehaving in only one of the compared runs). > > Signed-off-by: Aaron Lindsay > Tested-by: Digant Desai > --- > tools/perf/builtin-stat.c | 114 ++++++++++++++++++++++++++++++++++++++++++---- > tools/perf/util/evsel.h | 1 + > tools/perf/util/stat.h | 1 + > 3 files changed, 107 insertions(+), 9 deletions(-) > > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c > index 48ac53b..e400cd3 100644 > --- a/tools/perf/builtin-stat.c > +++ b/tools/perf/builtin-stat.c > @@ -71,7 +71,9 @@ > #include > #include > #include > +#include > #include > +#include > #include > #include > #include > @@ -224,7 +226,8 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) > * Some events get initialized with sample_(period/type) set, > * like tracepoints. Clear it up for counting. > */ > - attr->sample_period = 0; > + if (!evsel->event_interval) > + attr->sample_period = 0; > > /* > * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless > @@ -562,6 +565,7 @@ static int store_counter_ids(struct perf_evsel *counter) > static int __run_perf_stat(int argc, const char **argv) > { > int interval = stat_config.interval; > + int event_interval = stat_config.event_interval; > char msg[BUFSIZ]; > unsigned long long t0, t1; > struct perf_evsel *counter; > @@ -571,6 +575,9 @@ static int __run_perf_stat(int argc, const char **argv) > const bool forks = (argc > 0); > bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; > struct perf_evsel_config_term *err_term; > + bool first_counter = true; > + > + struct pollfd interval_pollfd = {.fd = 0, .events = POLLIN}; > > if (interval) { > ts.tv_sec = interval / USEC_PER_MSEC; > @@ -594,6 +601,32 @@ static int __run_perf_stat(int argc, const char **argv) > > evlist__for_each_entry(evsel_list, counter) { > try_again: > + if (first_counter && event_interval) { > + /* > + * Setup this counter as an event interval by pinning > + * it, setting it to sample based on the user-specified > + * period, and to return from polling on expiration > + * (i.e. when any data is written to the buffer) > + */ > + int nthreads = thread_map__nr(evsel_list->threads); > + int ncpus = perf_evsel__nr_cpus(counter); > + > + if (nthreads > 1 || ncpus > 1) { > + perror("Failed to setup event-interval-print " > + "because the event has more than one " > + "CPU or thread."); > + return -1; > + } > + > + counter->attr.pinned = 1; > + counter->attr.watermark = 1; > + counter->attr.wakeup_watermark = 1; > + counter->attr.sample_period = event_interval; > + counter->attr.freq = 0; > + counter->event_interval = true; > + > + pr_debug2("Polling on %s\n", perf_evsel__name(counter)); > + } > if (create_perf_stat_counter(counter) < 0) { > /* > * PPC returns ENXIO for HW counters until 2.6.37 > @@ -633,6 +666,33 @@ static int __run_perf_stat(int argc, const char **argv) > > if (STAT_RECORD && store_counter_ids(counter)) > return -1; > + > + if (first_counter && event_interval) { > + void *m; > + > + interval_pollfd.fd = > + *(int *)xyarray__entry(counter->fd, 0, 0); > + > + /* > + * Do not set PROT_WRITE since the kernel will stop > + * writing to the mmap buffer when it fills if > + * data_tail is not set. Map 2 pages since the kernel > + * requires the first for metadata. > + */ > + m = mmap(NULL, 2 * getpagesize(), PROT_READ, MAP_SHARED, > + interval_pollfd.fd, 0); > + if (m == (void *) -1) { > + perror("Failed to mmap print-event-interval " > + "fd"); > + return -1; > + } > + } > + > + first_counter = false; > + } > + if (event_interval && !interval_pollfd.fd) { > + perror("Failed to initialize print-event-interval fd"); > + return -1; > } > > if (perf_evlist__apply_filters(evsel_list, &counter)) { > @@ -677,7 +737,18 @@ static int __run_perf_stat(int argc, const char **argv) > perf_evlist__start_workload(evsel_list); > enable_counters(); > > - if (interval) { > + if (event_interval) { > + while (!waitpid(child_pid, &status, WNOHANG)) { > + /* > + * Wait for the event to expire for at most 1 > + * second > + */ > + if (poll(&interval_pollfd, 1, 1000) > 0) { > + if (interval_pollfd.revents & (POLLIN)) > + process_interval(); > + } > + } > + } else if (interval) { > while (!waitpid(child_pid, &status, WNOHANG)) { > nanosleep(&ts, NULL); > process_interval(); > @@ -696,9 +767,20 @@ static int __run_perf_stat(int argc, const char **argv) > } else { > enable_counters(); > while (!done) { > - nanosleep(&ts, NULL); > - if (interval) > - process_interval(); > + if (event_interval) { > + /* > + * Wait for the event to expire for at most 1 > + * second > + */ > + if (poll(&interval_pollfd, 1, 1000) > 0) { > + if (interval_pollfd.revents & (POLLIN)) > + process_interval(); > + } > + } else { > + nanosleep(&ts, NULL); > + if (interval) > + process_interval(); > + } > } > } > > @@ -1614,7 +1696,7 @@ static void print_footer(void) > > static void print_counters(struct timespec *ts, int argc, const char **argv) > { > - int interval = stat_config.interval; > + int interval = stat_config.interval || stat_config.event_interval; > struct perf_evsel *counter; > char buf[64], *prefix = NULL; > > @@ -1781,6 +1863,8 @@ static int enable_metric_only(const struct option *opt __maybe_unused, > "command to run after to the measured command"), > OPT_UINTEGER('I', "interval-print", &stat_config.interval, > "print counts at regular interval in ms (>= 10)"), > + OPT_UINTEGER('E', "event-interval-print", &stat_config.event_interval, > + "print counts every of the first event specified"), > OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, > "aggregate counts per processor socket", AGGR_SOCKET), > OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, > @@ -2546,7 +2630,7 @@ int cmd_stat(int argc, const char **argv) > int status = -EINVAL, run_idx; > const char *mode; > FILE *output = stderr; > - unsigned int interval; > + unsigned int interval, event_interval; > const char * const stat_subcommands[] = { "record", "report" }; > > setlocale(LC_ALL, ""); > @@ -2577,6 +2661,7 @@ int cmd_stat(int argc, const char **argv) > return __cmd_report(argc, argv); > > interval = stat_config.interval; > + event_interval = stat_config.event_interval; > > /* > * For record command the -o is already taken care of. > @@ -2704,6 +2789,17 @@ int cmd_stat(int argc, const char **argv) > if (stat_config.aggr_mode == AGGR_THREAD) > thread_map__read_comms(evsel_list->threads); > > + if (interval && event_interval) { > + pr_err("interval-print and event-interval-print options " > + "cannot be used together\n"); > + goto out; > + } > + > + if (event_interval && !no_inherit) { > + pr_err("The event-interval-print option requires no-inherit\n"); > + goto out; > + } > + > if (interval && interval < 100) { > if (interval < 10) { > pr_err("print interval must be >= 10ms\n"); > @@ -2715,7 +2811,7 @@ int cmd_stat(int argc, const char **argv) > "Please proceed with caution.\n"); > } > > - if (perf_evlist__alloc_stats(evsel_list, interval)) > + if (perf_evlist__alloc_stats(evsel_list, interval || event_interval)) > goto out; > > if (perf_stat_init_aggr_mode()) > @@ -2747,7 +2843,7 @@ int cmd_stat(int argc, const char **argv) > } > } > > - if (!forever && status != -1 && !interval) > + if (!forever && status != -1 && !interval && !event_interval) > print_counters(NULL, argc, argv); > > if (STAT_RECORD) { > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h > index d101695..44a65e6 100644 > --- a/tools/perf/util/evsel.h > +++ b/tools/perf/util/evsel.h > @@ -121,6 +121,7 @@ struct perf_evsel { > bool per_pkg; > bool precise_max; > bool ignore_missing_thread; > + bool event_interval; > /* parse modifier helper */ > int exclude_GH; > int nr_members; > diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h > index 7522bf1..db4dda4 100644 > --- a/tools/perf/util/stat.h > +++ b/tools/perf/util/stat.h > @@ -46,6 +46,7 @@ struct perf_stat_config { > bool scale; > FILE *output; > unsigned int interval; > + unsigned int event_interval; > }; > > void update_stats(struct stats *stats, u64 val); > -- > Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies, Inc. > Qualcomm Technologies, Inc. is a member of the > Code Aurora Forum, a Linux Foundation Collaborative Project. > -- Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.