* [PATCH 1/2 -tip] perf_counter: parse-events.c define separate declarations for H/W and S/W events @ 2009-06-22 11:13 Jaswinder Singh Rajput 2009-06-22 11:14 ` [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol Jaswinder Singh Rajput 2009-06-22 11:38 ` [tip:perfcounters/urgent] perf_counter tools: Define separate declarations for H/W and S/W events tip-bot for Jaswinder Singh Rajput 0 siblings, 2 replies; 14+ messages in thread From: Jaswinder Singh Rajput @ 2009-06-22 11:13 UTC (permalink / raw) To: Ingo Molnar, Thomas Gleixner, Peter Zijlstra, LKML Define separate declarations for H/W and S/W events to : 1. shorten name to save some space so that we can add more members 2. Fix alignment 3. Avoid declaring HARDWARE/SOFTWARE again and again. Removed unused CR(x, y) Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> --- tools/perf/util/parse-events.c | 42 ++++++++++++++++++++-------------------- 1 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 35d04da..12abab3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -18,30 +18,30 @@ struct event_symbol { char *symbol; }; -#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y -#define CR(x, y) .type = PERF_TYPE_##x, .config = y +#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x +#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, - { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, - { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, - { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, - { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, - { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, - { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, - - { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, - { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, + { CHW(CPU_CYCLES), "cpu-cycles", }, + { CHW(CPU_CYCLES), "cycles", }, + { CHW(INSTRUCTIONS), "instructions", }, + { CHW(CACHE_REFERENCES), "cache-references", }, + { CHW(CACHE_MISSES), "cache-misses", }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, + { CHW(BRANCH_INSTRUCTIONS), "branches", }, + { CHW(BRANCH_MISSES), "branch-misses", }, + { CHW(BUS_CYCLES), "bus-cycles", }, + + { CSW(CPU_CLOCK), "cpu-clock", }, + { CSW(TASK_CLOCK), "task-clock", }, + { CSW(PAGE_FAULTS), "page-faults", }, + { CSW(PAGE_FAULTS), "faults", }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", }, + { CSW(CONTEXT_SWITCHES), "context-switches", }, + { CSW(CONTEXT_SWITCHES), "cs", }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", }, + { CSW(CPU_MIGRATIONS), "migrations", }, }; #define __PERF_COUNTER_FIELD(config, name) \ -- 1.6.0.6 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 11:13 [PATCH 1/2 -tip] perf_counter: parse-events.c define separate declarations for H/W and S/W events Jaswinder Singh Rajput @ 2009-06-22 11:14 ` Jaswinder Singh Rajput 2009-06-22 11:32 ` Ingo Molnar 2009-06-22 11:38 ` [tip:perfcounters/urgent] perf_counter tools: Introduce " tip-bot for Jaswinder Singh Rajput 2009-06-22 11:38 ` [tip:perfcounters/urgent] perf_counter tools: Define separate declarations for H/W and S/W events tip-bot for Jaswinder Singh Rajput 1 sibling, 2 replies; 14+ messages in thread From: Jaswinder Singh Rajput @ 2009-06-22 11:14 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, Peter Zijlstra, LKML By introducing alias member in event_symbol : 1. duplicate lines are removed, like: cpu-cycles and cycles branch-instructions and branches context-switches and cs cpu-migrations and migrations 2. We can also add alias for another events. Now ./perf list looks like : List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults [Software event] faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] rNNN [raw hardware event descriptor] Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> --- tools/perf/util/parse-events.c | 63 ++++++++++++++++++++++++---------------- 1 files changed, 38 insertions(+), 25 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 12abab3..f569548 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -16,32 +16,29 @@ struct event_symbol { u8 type; u64 config; char *symbol; + char *alias; }; #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { CHW(CPU_CYCLES), "cpu-cycles", }, - { CHW(CPU_CYCLES), "cycles", }, - { CHW(INSTRUCTIONS), "instructions", }, - { CHW(CACHE_REFERENCES), "cache-references", }, - { CHW(CACHE_MISSES), "cache-misses", }, - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, - { CHW(BRANCH_INSTRUCTIONS), "branches", }, - { CHW(BRANCH_MISSES), "branch-misses", }, - { CHW(BUS_CYCLES), "bus-cycles", }, - - { CSW(CPU_CLOCK), "cpu-clock", }, - { CSW(TASK_CLOCK), "task-clock", }, - { CSW(PAGE_FAULTS), "page-faults", }, - { CSW(PAGE_FAULTS), "faults", }, - { CSW(PAGE_FAULTS_MIN), "minor-faults", }, - { CSW(PAGE_FAULTS_MAJ), "major-faults", }, - { CSW(CONTEXT_SWITCHES), "context-switches", }, - { CSW(CONTEXT_SWITCHES), "cs", }, - { CSW(CPU_MIGRATIONS), "cpu-migrations", }, - { CSW(CPU_MIGRATIONS), "migrations", }, + { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, + { CHW(INSTRUCTIONS), "instructions", "" }, + { CHW(CACHE_REFERENCES), "cache-references", "" }, + { CHW(CACHE_MISSES), "cache-misses", "" }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, + { CHW(BRANCH_MISSES), "branch-misses", "" }, + { CHW(BUS_CYCLES), "bus-cycles", "" }, + + { CSW(CPU_CLOCK), "cpu-clock", "" }, + { CSW(TASK_CLOCK), "task-clock", "" }, + { CSW(PAGE_FAULTS), "page-faults", "" }, + { CSW(PAGE_FAULTS), "faults", "" }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, + { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, }; #define __PERF_COUNTER_FIELD(config, name) \ @@ -196,6 +193,19 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a return 0; } +static int check_events(const char *str, unsigned int i) +{ + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return 1; + + if (strlen(event_symbols[i].alias)) + if (!strncmp(str, event_symbols[i].alias, + strlen(event_symbols[i].alias))) + return 1; + return 0; +} + /* * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. @@ -235,9 +245,7 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) } for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) { - + if (check_events(str, i)) { attr->type = event_symbols[i].type; attr->config = event_symbols[i].config; @@ -289,6 +297,7 @@ void print_events(void) { struct event_symbol *syms = event_symbols; unsigned int i, type, prev_type = -1; + char name[40]; fprintf(stderr, "\n"); fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); @@ -301,14 +310,18 @@ void print_events(void) if (type != prev_type) fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [%s]\n", syms->symbol, + if (strlen(syms->alias)) + sprintf(name, "%s OR %s", syms->symbol, syms->alias); + else + strcpy(name, syms->symbol); + fprintf(stderr, " %-40s [%s]\n", name, event_type_descriptors[type]); prev_type = type; } fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [raw hardware event descriptor]\n", + fprintf(stderr, " %-40s [raw hardware event descriptor]\n", "rNNN"); fprintf(stderr, "\n"); -- 1.6.0.6 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 11:14 ` [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol Jaswinder Singh Rajput @ 2009-06-22 11:32 ` Ingo Molnar 2009-06-22 13:00 ` Jaswinder Singh Rajput 2009-06-22 11:38 ` [tip:perfcounters/urgent] perf_counter tools: Introduce " tip-bot for Jaswinder Singh Rajput 1 sibling, 1 reply; 14+ messages in thread From: Ingo Molnar @ 2009-06-22 11:32 UTC (permalink / raw) To: Jaswinder Singh Rajput; +Cc: Thomas Gleixner, Peter Zijlstra, LKML * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote: > > By introducing alias member in event_symbol : > > 1. duplicate lines are removed, like: > cpu-cycles and cycles > branch-instructions and branches > context-switches and cs > cpu-migrations and migrations > > 2. We can also add alias for another events. > > Now ./perf list looks like : > > List of pre-defined events (to be used in -e): > > cpu-cycles OR cycles [Hardware event] > instructions [Hardware event] > cache-references [Hardware event] > cache-misses [Hardware event] > branch-instructions OR branches [Hardware event] > branch-misses [Hardware event] > bus-cycles [Hardware event] > > cpu-clock [Software event] > task-clock [Software event] > page-faults [Software event] > faults [Software event] > minor-faults [Software event] > major-faults [Software event] > context-switches OR cs [Software event] > cpu-migrations OR migrations [Software event] > > rNNN [raw hardware event descriptor] > > Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> > --- > tools/perf/util/parse-events.c | 63 ++++++++++++++++++++++++---------------- > 1 files changed, 38 insertions(+), 25 deletions(-) That looks useful - applied, thanks! Another nice thing would be for 'perf list' to actually start each counter and stop it - and see whether it ticks. Perhaps that could be a new sub-command: 'perf test' ? New 'perf' subcommands are added easily: create a new tools/perf/builtin-foo.c file, add it to command-list.txt and to the Makefile - add it to perf.c's array of built-in commands and add a Documentation/perf-foo.txt file to generate manpages and usage strings for it. Ingo ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 11:32 ` Ingo Molnar @ 2009-06-22 13:00 ` Jaswinder Singh Rajput 2009-06-22 13:23 ` Jaswinder Singh Rajput 2009-06-22 14:10 ` Ingo Molnar 0 siblings, 2 replies; 14+ messages in thread From: Jaswinder Singh Rajput @ 2009-06-22 13:00 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, Peter Zijlstra, LKML On Mon, 2009-06-22 at 13:32 +0200, Ingo Molnar wrote: > Another nice thing would be for 'perf list' to actually start each > counter and stop it - and see whether it ticks. Perhaps that could > be a new sub-command: 'perf test' ? > > New 'perf' subcommands are added easily: > > create a new tools/perf/builtin-foo.c file, add it to > command-list.txt and to the Makefile - add it to perf.c's array of > built-in commands and add a Documentation/perf-foo.txt file to > generate manpages and usage strings for it. > Ok this is just a ugly quick hack to get the idea what actually you are looking for : [RFC] perf_counter tools: introduce perf test to test each event for ticks perf test to Test all events for whether it ticks $ perf test Performance counter stats for 'test': task-clock-msecs Tick context-switches Tick CPU-migrations Tick page-faults Tick cycles Tick instructions Tick cache-references Tick cache-misses Tick 0.007693869 seconds time elapsed. Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> --- tools/perf/Documentation/perf-test.txt | 24 ++ tools/perf/Makefile | 1 + tools/perf/builtin-test.c | 399 ++++++++++++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + 6 files changed, 427 insertions(+), 0 deletions(-) create mode 100644 tools/perf/Documentation/perf-test.txt create mode 100644 tools/perf/builtin-test.c diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt new file mode 100644 index 0000000..44cf495 --- /dev/null +++ b/tools/perf/Documentation/perf-test.txt @@ -0,0 +1,24 @@ +perf-test(1) +============ + +NAME +---- +perf-test - Test all events for whether it ticks + +SYNOPSIS +-------- +[verse] +'perf test' + +DESCRIPTION +----------- +This command test all events whether it ticks. + +OPTIONS +------- +None + +SEE ALSO +-------- +linkperf:perf-stat[1], linkperf:perf-top[1], +linkperf:perf-record[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 36d7eef..f5ac83f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -335,6 +335,7 @@ BUILTIN_OBJS += builtin-list.o BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-report.o BUILTIN_OBJS += builtin-stat.o +BUILTIN_OBJS += builtin-test.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c new file mode 100644 index 0000000..0b8b25a --- /dev/null +++ b/tools/perf/builtin-test.c @@ -0,0 +1,399 @@ +/* + * builtin-test.c + * + * Builtin test command: Tests each event and see whether it ticks + * + * Sample output: + + $ perf test + + Performance counter stats for 'test': + + task-clock-msecs Tick + context-switches Tick + CPU-migrations Tick + page-faults Tick + cycles Tick + instructions Tick + cache-references Tick + cache-misses Tick + + 0.007693869 seconds time elapsed. + + * (based on builtin-stat.c) + * + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2009, Jaswinder Singh Rajput <jaswinder@kernel.org> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "perf.h" +#include "builtin.h" +#include "util/util.h" +#include "util/parse-options.h" +#include "util/parse-events.h" + +#include <sys/prctl.h> +#include <math.h> + +static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { + + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + +}; + +static int system_wide = 0; +static int inherit = 1; +static int verbose = 0; + +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static int nr_cpus = 0; +static unsigned int page_size; + +static int scale = 1; + +#define MAX_RUN 100 + +static int run_count = 1; +static int run_idx = 0; + +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + +static u64 runtime_nsecs[MAX_RUN]; +static u64 walltime_nsecs[MAX_RUN]; +static u64 runtime_cycles[MAX_RUN]; + +static u64 event_res_avg[MAX_COUNTERS][3]; +static u64 event_res_noise[MAX_COUNTERS][3]; + +static u64 event_scaled_avg[MAX_COUNTERS]; + +static u64 runtime_nsecs_avg; +static u64 runtime_nsecs_noise; + +static u64 walltime_nsecs_avg; +static u64 walltime_nsecs_noise; + +static u64 runtime_cycles_avg; +static u64 runtime_cycles_noise; + +static void create_perf_stat_counter(int counter) +{ + struct perf_counter_attr *attr = attrs + counter; + + if (scale) + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + + if (system_wide) { + int cpu; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0 && verbose) { + printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); + } + } + } else { + attr->inherit = inherit; + attr->disabled = 1; + + fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); + if (fd[0][counter] < 0 && verbose) { + printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); + } + } +} + +/* + * Does the counter have nsecs as a unit? + */ +static inline int nsec_counter(int counter) +{ + if (attrs[counter].type != PERF_TYPE_SOFTWARE) + return 0; + + if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) + return 1; + + if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + return 1; + + return 0; +} + +/* + * Read out the results of a single counter: + */ +static void read_counter(int counter) +{ + u64 *count, single_count[3]; + ssize_t res; + int cpu, nv; + int scaled; + + count = event_res[run_idx][counter]; + + count[0] = count[1] = count[2] = 0; + + nv = scale ? 3 : 1; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + if (fd[cpu][counter] < 0) + continue; + + res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); + assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); + fd[cpu][counter] = -1; + + count[0] += single_count[0]; + if (scale) { + count[1] += single_count[1]; + count[2] += single_count[2]; + } + } + + scaled = 0; + if (scale) { + if (count[2] == 0) { + event_scaled[run_idx][counter] = -1; + count[0] = 0; + return; + } + + if (count[2] < count[1]) { + event_scaled[run_idx][counter] = 1; + count[0] = (unsigned long long) + ((double)count[0] * count[1] / count[2] + 0.5); + } + } + /* + * Save the full runtime - to allow normalization during printout: + */ + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + runtime_nsecs[run_idx] = count[0]; + if (attrs[counter].type == PERF_TYPE_HARDWARE && + attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) + runtime_cycles[run_idx] = count[0]; +} + +static int run_perf_stat(int argc, const char **argv) +{ + unsigned long long t0, t1; + int status = 0; + int counter; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perf_stat_counter(counter); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], (char **)argv)) { + perror(argv[0]); + exit(-1); + } + } + + wait(&status); + + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + walltime_nsecs[run_idx] = t1 - t0; + + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + + return WEXITSTATUS(status); +} + +static void test_printout(int counter, u64 *count) +{ + fprintf(stderr, " %-20s", event_name(counter)); + + if (count[0]) + fprintf(stderr, " Tick"); + else + fprintf(stderr, " No Tick"); +} + +/* + * Print out the results of a single counter: + */ +static void print_counter(int counter) +{ + u64 *count; + + count = event_res_avg[counter]; + + test_printout(counter, count); + + fprintf(stderr, "\n"); +} + +static void update_avg(const char *name, int idx, u64 *avg, u64 *val) +{ + *avg += *val; + + if (verbose > 1) + fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); +} +/* + * Calculate the averages and noises: + */ +static void calc_avg(void) +{ + int i, j; + + if (verbose > 1) + fprintf(stderr, "\n"); + + for (i = 0; i < run_count; i++) { + update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); + update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); + update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); + for (j = 0; j < nr_counters; j++) { + update_avg("counter/0", j, + event_res_avg[j]+0, event_res[i][j]+0); + update_avg("counter/1", j, + event_res_avg[j]+1, event_res[i][j]+1); + update_avg("counter/2", j, + event_res_avg[j]+2, event_res[i][j]+2); + update_avg("scaled", j, + event_scaled_avg + j, event_scaled[i]+j); + } + } + runtime_nsecs_avg /= run_count; + walltime_nsecs_avg /= run_count; + runtime_cycles_avg /= run_count; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] /= run_count; + event_res_avg[j][1] /= run_count; + event_res_avg[j][2] /= run_count; + } + + for (i = 0; i < run_count; i++) { + runtime_nsecs_noise += + abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); + walltime_nsecs_noise += + abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); + runtime_cycles_noise += + abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); + + for (j = 0; j < nr_counters; j++) { + event_res_noise[j][0] += + abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); + event_res_noise[j][1] += + abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); + event_res_noise[j][2] += + abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); + } + } +} + +static void print_stat(int argc, const char **argv) +{ + int i, counter; + + calc_avg(); + + fflush(stdout); + + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for \'%s", argv[0]); + + for (i = 1; i < argc; i++) + fprintf(stderr, " %s", argv[i]); + + fprintf(stderr, "\'"); + if (run_count > 1) + fprintf(stderr, " (%d runs)", run_count); + fprintf(stderr, ":\n\n"); + + for (counter = 0; counter < nr_counters; counter++) + print_counter(counter); + + + fprintf(stderr, "\n"); + fprintf(stderr, " %14.9f seconds time elapsed.\n", + (double)walltime_nsecs_avg/1e9); + fprintf(stderr, "\n"); +} + +static volatile int signr = -1; + +static void skip_signal(int signo) +{ + signr = signo; +} + +static void sig_atexit(void) +{ + if (signr == -1) + return; + + signal(signr, SIG_DFL); + kill(getpid(), signr); +} + +int cmd_test(int argc, const char **argv, const char *prefix) +{ + int status; + + page_size = sysconf(_SC_PAGE_SIZE); + + memcpy(attrs, default_attrs, sizeof(attrs)); + + if (!nr_counters) + nr_counters = 8; + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + /* + * We dont want to block the signals - that would cause + * child tasks to inherit that and Ctrl-C would not work. + * What we want is for Ctrl-C to work in the exec()-ed + * task, but being ignored by perf stat itself: + */ + atexit(sig_atexit); + signal(SIGINT, skip_signal); + signal(SIGALRM, skip_signal); + signal(SIGABRT, skip_signal); + + status = 0; + for (run_idx = 0; run_idx < run_count; run_idx++) { + if (run_count != 1 && verbose) + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + status = run_perf_stat(argc, argv); + } + + print_stat(argc, argv); + + return status; +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 51d1682..3ed0362 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -22,5 +22,6 @@ extern int cmd_stat(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix); +extern int cmd_test(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index eebce30..f53544c 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -7,4 +7,5 @@ perf-list mainporcelain common perf-record mainporcelain common perf-report mainporcelain common perf-stat mainporcelain common +perf-test mainporcelain common perf-top mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4eb7259..9f98f5e 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -262,6 +262,7 @@ static void handle_internal_command(int argc, const char **argv) { "record", cmd_record, 0 }, { "report", cmd_report, 0 }, { "stat", cmd_stat, 0 }, + { "test", cmd_test, 0 }, { "top", cmd_top, 0 }, { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, -- 1.6.0.6 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 13:00 ` Jaswinder Singh Rajput @ 2009-06-22 13:23 ` Jaswinder Singh Rajput 2009-06-22 14:10 ` Ingo Molnar 1 sibling, 0 replies; 14+ messages in thread From: Jaswinder Singh Rajput @ 2009-06-22 13:23 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, Peter Zijlstra, LKML On Mon, 2009-06-22 at 18:30 +0530, Jaswinder Singh Rajput wrote: > On Mon, 2009-06-22 at 13:32 +0200, Ingo Molnar wrote: > > > Another nice thing would be for 'perf list' to actually start each > > counter and stop it - and see whether it ticks. Perhaps that could > > be a new sub-command: 'perf test' ? > > > > New 'perf' subcommands are added easily: > > > > create a new tools/perf/builtin-foo.c file, add it to > > command-list.txt and to the Makefile - add it to perf.c's array of > > built-in commands and add a Documentation/perf-foo.txt file to > > generate manpages and usage strings for it. > > > > Ok this is just a ugly quick hack to get the idea what actually you are > looking for : > > [RFC] perf_counter tools: introduce perf test to test each event for ticks > > perf test to Test all events for whether it ticks > > $ perf test > > Performance counter stats for 'test': > > task-clock-msecs Tick > context-switches Tick > CPU-migrations Tick > page-faults Tick > cycles Tick > instructions Tick > cache-references Tick > cache-misses Tick > > 0.007693869 seconds time elapsed. Please let me know should I need to test all events (Hardware + Software) and/or selective event based on user request. And what should be output looks like. Thanks, -- JSR ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 13:00 ` Jaswinder Singh Rajput 2009-06-22 13:23 ` Jaswinder Singh Rajput @ 2009-06-22 14:10 ` Ingo Molnar 2009-06-22 19:55 ` Jaswinder Singh Rajput 1 sibling, 1 reply; 14+ messages in thread From: Ingo Molnar @ 2009-06-22 14:10 UTC (permalink / raw) To: Jaswinder Singh Rajput; +Cc: Thomas Gleixner, Peter Zijlstra, LKML * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote: > On Mon, 2009-06-22 at 13:32 +0200, Ingo Molnar wrote: > > > Another nice thing would be for 'perf list' to actually start each > > counter and stop it - and see whether it ticks. Perhaps that could > > be a new sub-command: 'perf test' ? > > > > New 'perf' subcommands are added easily: > > > > create a new tools/perf/builtin-foo.c file, add it to > > command-list.txt and to the Makefile - add it to perf.c's array of > > built-in commands and add a Documentation/perf-foo.txt file to > > generate manpages and usage strings for it. > > > > Ok this is just a ugly quick hack to get the idea what actually you are > looking for : > > [RFC] perf_counter tools: introduce perf test to test each event for ticks > > perf test to Test all events for whether it ticks > > $ perf test > > Performance counter stats for 'test': > > task-clock-msecs Tick > context-switches Tick > CPU-migrations Tick > page-faults Tick > cycles Tick > instructions Tick > cache-references Tick > cache-misses Tick > > 0.007693869 seconds time elapsed. yeah, somethig like that. I'd suggest to print out the actual measured events: cache-references 10123 events cache-misses 15 events and if something does not appear to be ticking then do something like: cache-misses <inactive> I.e. 'perf test' could be a quick way both to users and to developers to see all possible hw and sw events. Perhaps builtin-test.c should also do specific testcases for certain counters - say intentionally migrate to a CPU and back to see the CPU-migration count. Also, you seem to have copied builtin-stat.c, right? Try to librarize as much of the functionality (into util/*) to make the resulting linecount increase as small as possible. Ingo ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 14:10 ` Ingo Molnar @ 2009-06-22 19:55 ` Jaswinder Singh Rajput 2009-06-22 20:07 ` Jaswinder Singh Rajput 2009-06-23 8:24 ` Ingo Molnar 0 siblings, 2 replies; 14+ messages in thread From: Jaswinder Singh Rajput @ 2009-06-22 19:55 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, Peter Zijlstra, LKML On Mon, 2009-06-22 at 16:10 +0200, Ingo Molnar wrote: > yeah, somethig like that. I'd suggest to print out the actual > measured events: > > cache-references 10123 events > cache-misses 15 events > > and if something does not appear to be ticking then do something > like: > > cache-misses <inactive> > > I.e. 'perf test' could be a quick way both to users and to > developers to see all possible hw and sw events. > > Perhaps builtin-test.c should also do specific testcases for certain > counters - say intentionally migrate to a CPU and back to see the > CPU-migration count. > > Also, you seem to have copied builtin-stat.c, right? Try to > librarize as much of the functionality (into util/*) to make the > resulting linecount increase as small as possible. > perf test also need some command to execute otherwise it will also show long list of <inactive> I think better I should support all events in perf stat so user can get better information from it and we can all add some other testing option to it. Anyway currently it looks like this : [RFC][PATCH] perf_counter tools: introduce perf test to test event for ticks perf test to Test performance counter events, its output on AMD box : ./perf test -a -- ls -lR > /dev/null Performance counter stats for 'ls' -lR: cycles 1226819954 instructions 283680441 cache-references 144893559 cache-misses 3268438 branches 37488241 branch-misses 2464027 bus-cycles <inactive> cpu-clock-msecs 17175506056 task-clock-msecs 17175086665 page-faults 488 minor-faults 488 major-faults <inactive> context-switches 7956 CPU-migrations 7 L1-data-Cache-Load-Referencees 398303881 L1-data-Cache-Load-Misses 3552374 L1-data-Cache-Store-Referencees 270178 L1-data-Cache-Store-Misses <inactive> L1-data-Cache-Prefetch-Referencees 611622 L1-data-Cache-Prefetch-Misses 399730 L1-instruction-Cache-Load-Referencees 124696447 L1-instruction-Cache-Load-Misses 2912802 L1-instruction-Cache-Store-Referencees <inactive> L1-instruction-Cache-Store-Misses <inactive> L1-instruction-Cache-Prefetch-Referencees 156576 L1-instruction-Cache-Prefetch-Misses <inactive> L2-Cache-Load-Referencees 4312353 L2-Cache-Load-Misses 470382 L2-Cache-Store-Referencees 4392945 L2-Cache-Store-Misses <inactive> L2-Cache-Prefetch-Referencees <inactive> L2-Cache-Prefetch-Misses <inactive> Data-TLB-Cache-Load-Referencees 127076487 Data-TLB-Cache-Load-Misses 1930048 Data-TLB-Cache-Store-Referencees <inactive> Data-TLB-Cache-Store-Misses <inactive> Data-TLB-Cache-Prefetch-Referencees <inactive> Data-TLB-Cache-Prefetch-Misses <inactive> Instruction-TLB-Cache-Load-Referencees 132768077 Instruction-TLB-Cache-Load-Misses 6406 Instruction-TLB-Cache-Store-Referencees <inactive> Instruction-TLB-Cache-Store-Misses <inactive> Instruction-TLB-Cache-Prefetch-Referencees <inactive> Instruction-TLB-Cache-Prefetch-Misses <inactive> Branch-Cache-Load-Referencees 58030210 Branch-Cache-Load-Misses 3257804 Branch-Cache-Store-Referencees <inactive> Branch-Cache-Store-Misses <inactive> Branch-Cache-Prefetch-Referencees <inactive> Branch-Cache-Prefetch-Misses <inactive> 8.681671511 seconds time elapsed. Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> --- tools/perf/Documentation/perf-test.txt | 44 ++++ tools/perf/Makefile | 1 + tools/perf/builtin-test.c | 436 ++++++++++++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + 6 files changed, 484 insertions(+), 0 deletions(-) create mode 100644 tools/perf/Documentation/perf-test.txt create mode 100644 tools/perf/builtin-test.c diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt new file mode 100644 index 0000000..6233769 --- /dev/null +++ b/tools/perf/Documentation/perf-test.txt @@ -0,0 +1,44 @@ +perf-test(1) +============ + +NAME +---- +perf-test - Run a command and gather performance counter event count if any + +SYNOPSIS +-------- +[verse] +'perf test' [-e <EVENT> | --event=EVENT] [-a] <command> +'perf test' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>] + +DESCRIPTION +----------- +This command runs a command and gathers performance counter event count +from it. + + +OPTIONS +------- +<command>...:: + Any command you can specify in a shell. + + +-e:: +--event=:: + Select the PMU event. Selection can be a symbolic event name + (use 'perf list' to list all events) or a raw PMU + event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + +-a:: + system-wide collection + +EXAMPLES +-------- + +$ perf test -- make -j + + +SEE ALSO +-------- +linkperf:perf-stat[1], perf-top[1], linkperf:perf-list[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 36d7eef..f5ac83f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -335,6 +335,7 @@ BUILTIN_OBJS += builtin-list.o BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-report.o BUILTIN_OBJS += builtin-stat.o +BUILTIN_OBJS += builtin-test.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c new file mode 100644 index 0000000..4ae1efe --- /dev/null +++ b/tools/perf/builtin-test.c @@ -0,0 +1,436 @@ +/* + * builtin-test.c + * + * Builtin test command: Test performace counter events + * + * Sample output on AMD box: + + $ perf test -a -- ls -lR > /dev/null + + Performance counter stats for 'ls' -lR: + + cycles 1226819954 + instructions 283680441 + cache-references 144893559 + cache-misses 3268438 + branches 37488241 + branch-misses 2464027 + bus-cycles <inactive> + cpu-clock-msecs 17175506056 + task-clock-msecs 17175086665 + page-faults 488 + minor-faults 488 + major-faults <inactive> + context-switches 7956 + CPU-migrations 7 + L1-data-Cache-Load-Referencees 398303881 + L1-data-Cache-Load-Misses 3552374 + L1-data-Cache-Store-Referencees 270178 + L1-data-Cache-Store-Misses <inactive> + L1-data-Cache-Prefetch-Referencees 611622 + L1-data-Cache-Prefetch-Misses 399730 + L1-instruction-Cache-Load-Referencees 124696447 + L1-instruction-Cache-Load-Misses 2912802 + L1-instruction-Cache-Store-Referencees <inactive> + L1-instruction-Cache-Store-Misses <inactive> + L1-instruction-Cache-Prefetch-Referencees 156576 + L1-instruction-Cache-Prefetch-Misses <inactive> + L2-Cache-Load-Referencees 4312353 + L2-Cache-Load-Misses 470382 + L2-Cache-Store-Referencees 4392945 + L2-Cache-Store-Misses <inactive> + L2-Cache-Prefetch-Referencees <inactive> + L2-Cache-Prefetch-Misses <inactive> + Data-TLB-Cache-Load-Referencees 127076487 + Data-TLB-Cache-Load-Misses 1930048 + Data-TLB-Cache-Store-Referencees <inactive> + Data-TLB-Cache-Store-Misses <inactive> + Data-TLB-Cache-Prefetch-Referencees <inactive> + Data-TLB-Cache-Prefetch-Misses <inactive> + Instruction-TLB-Cache-Load-Referencees 132768077 + Instruction-TLB-Cache-Load-Misses 6406 + Instruction-TLB-Cache-Store-Referencees <inactive> + Instruction-TLB-Cache-Store-Misses <inactive> + Instruction-TLB-Cache-Prefetch-Referencees <inactive> + Instruction-TLB-Cache-Prefetch-Misses <inactive> + Branch-Cache-Load-Referencees 58030210 + Branch-Cache-Load-Misses 3257804 + Branch-Cache-Store-Referencees <inactive> + Branch-Cache-Store-Misses <inactive> + Branch-Cache-Prefetch-Referencees <inactive> + Branch-Cache-Prefetch-Misses <inactive> + + 8.681671511 seconds time elapsed. + + * (based on builtin-stat.c) + * + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2009, Jaswinder Singh Rajput <jaswinder@kernel.org> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "perf.h" +#include "builtin.h" +#include "util/util.h" +#include "util/parse-options.h" +#include "util/parse-events.h" + +#include <sys/prctl.h> +#include <math.h> + +#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x +#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x +#define CHCACHE(x, y, z) \ +.type = PERF_TYPE_HW_CACHE, \ +.config = (PERF_COUNT_HW_CACHE_##x | (PERF_COUNT_HW_CACHE_OP_##y << 8) |\ + (PERF_COUNT_HW_CACHE_RESULT_##z << 16)) + +static struct perf_counter_attr default_attrs[] = { +/* Generalized Hardware events */ + { CHW(CPU_CYCLES) }, + { CHW(INSTRUCTIONS) }, + { CHW(CACHE_REFERENCES) }, + { CHW(CACHE_MISSES) }, + { CHW(BRANCH_INSTRUCTIONS) }, + { CHW(BRANCH_MISSES) }, + { CHW(BUS_CYCLES) }, + +/* Generalized Software events */ + { CSW(CPU_CLOCK) }, + { CSW(TASK_CLOCK) }, + { CSW(PAGE_FAULTS) }, + { CSW(PAGE_FAULTS_MIN) }, + { CSW(PAGE_FAULTS_MAJ) }, + { CSW(CONTEXT_SWITCHES) }, + { CSW(CPU_MIGRATIONS) }, + +/* Generalized Hardware cache counters events */ + { CHCACHE(L1D, READ, ACCESS) }, + { CHCACHE(L1D, READ, MISS) }, + { CHCACHE(L1D, WRITE, ACCESS) }, + { CHCACHE(L1D, WRITE, MISS) }, + { CHCACHE(L1D, PREFETCH, ACCESS) }, + { CHCACHE(L1D, PREFETCH, MISS) }, + + { CHCACHE(L1I, READ, ACCESS) }, + { CHCACHE(L1I, READ, MISS) }, + { CHCACHE(L1I, WRITE, ACCESS) }, + { CHCACHE(L1I, WRITE, MISS) }, + { CHCACHE(L1I, PREFETCH, ACCESS) }, + { CHCACHE(L1I, PREFETCH, MISS) }, + + { CHCACHE(LL, READ, ACCESS) }, + { CHCACHE(LL, READ, MISS) }, + { CHCACHE(LL, WRITE, ACCESS) }, + { CHCACHE(LL, WRITE, MISS) }, + { CHCACHE(LL, PREFETCH, ACCESS) }, + { CHCACHE(LL, PREFETCH, MISS) }, + + { CHCACHE(DTLB, READ, ACCESS) }, + { CHCACHE(DTLB, READ, MISS) }, + { CHCACHE(DTLB, WRITE, ACCESS) }, + { CHCACHE(DTLB, WRITE, MISS) }, + { CHCACHE(DTLB, PREFETCH, ACCESS) }, + { CHCACHE(DTLB, PREFETCH, MISS) }, + + { CHCACHE(ITLB, READ, ACCESS) }, + { CHCACHE(ITLB, READ, MISS) }, + { CHCACHE(ITLB, WRITE, ACCESS) }, + { CHCACHE(ITLB, WRITE, MISS) }, + { CHCACHE(ITLB, PREFETCH, ACCESS) }, + { CHCACHE(ITLB, PREFETCH, MISS) }, + + { CHCACHE(BPU, READ, ACCESS) }, + { CHCACHE(BPU, READ, MISS) }, + { CHCACHE(BPU, WRITE, ACCESS) }, + { CHCACHE(BPU, WRITE, MISS) }, + { CHCACHE(BPU, PREFETCH, ACCESS) }, + { CHCACHE(BPU, PREFETCH, MISS) }, + +}; + +#define MAX_RUN 100 + +static int system_wide = 0; +static int verbose = 0; + +static int nr_cpus = 0; + +static int run_count = 1; +static int run_idx = 0; + +static unsigned int page_size; + +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; + +static u64 walltime_nsecs[MAX_RUN]; +static u64 runtime_cycles[MAX_RUN]; + +static u64 event_res_avg[MAX_COUNTERS][3]; + +static u64 walltime_nsecs_avg; + +static u64 runtime_cycles_avg; + +static void create_perf_stat_counter(int counter) +{ + struct perf_counter_attr *attr = attrs + counter; + + if (system_wide) { + int cpu; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0 && verbose) { + printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); + } + } + } else { + attr->disabled = 1; + + fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); + if (fd[0][counter] < 0 && verbose) { + printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); + } + } +} + +/* + * Read out the results of a single counter: + */ +static void read_counter(int counter) +{ + u64 *count, single_count[3]; + ssize_t res; + int cpu, nv; + + count = event_res[run_idx][counter]; + + count[0] = count[1] = count[2] = 0; + + nv = 1; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + if (fd[cpu][counter] < 0) + continue; + + res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); + assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); + fd[cpu][counter] = -1; + + count[0] += single_count[0]; + } + + /* + * Save the full runtime - to allow normalization during printout: + */ + runtime_cycles[run_idx] = count[0]; +} + +static int run_perf_test(int argc, const char **argv) +{ + unsigned long long t0, t1; + int status = 0; + int counter; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perf_stat_counter(counter); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], (char **)argv)) { + perror(argv[0]); + exit(-1); + } + } + + wait(&status); + + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + walltime_nsecs[run_idx] = t1 - t0; + + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + + return WEXITSTATUS(status); +} + +static void test_printout(int counter, u64 *count) +{ + fprintf(stderr, " %-45s", event_name(counter)); + + if (count[0]) + fprintf(stderr, " %14Ld", count[0]); + else + fprintf(stderr, " <inactive>"); +} + +/* + * Print out the results of a single counter: + */ +static void print_counter(int counter) +{ + u64 *count; + + count = event_res_avg[counter]; + + test_printout(counter, count); + + fprintf(stderr, "\n"); +} + +static void update_avg(const char *name, int idx, u64 *avg, u64 *val) +{ + *avg += *val; + + if (verbose > 1) + fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); +} +/* + * Calculate the averages: + */ +static void calc_avg(void) +{ + int i, j; + + if (verbose > 1) + fprintf(stderr, "\n"); + + for (i = 0; i < run_count; i++) { + update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); + update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); + for (j = 0; j < nr_counters; j++) { + update_avg("counter/0", j, + event_res_avg[j]+0, event_res[i][j]+0); + update_avg("counter/1", j, + event_res_avg[j]+1, event_res[i][j]+1); + update_avg("counter/2", j, + event_res_avg[j]+2, event_res[i][j]+2); + } + } + walltime_nsecs_avg /= run_count; + runtime_cycles_avg /= run_count; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] /= run_count; + event_res_avg[j][1] /= run_count; + event_res_avg[j][2] /= run_count; + } +} + +static void print_test(int argc, const char **argv) +{ + int i, counter; + + calc_avg(); + + fflush(stdout); + + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for \'%s\'", argv[0]); + + for (i = 1; i < argc; i++) + fprintf(stderr, " %s", argv[i]); + + fprintf(stderr, ":\n\n"); + + for (counter = 0; counter < nr_counters; counter++) + print_counter(counter); + + fprintf(stderr, "\n"); + fprintf(stderr, " %14.9f seconds time elapsed.\n", + (double)walltime_nsecs_avg/1e9); + fprintf(stderr, "\n"); +} + +static volatile int signr = -1; + +static void skip_signal(int signo) +{ + signr = signo; +} + +static const char * const test_usage[] = { + "perf test [<options>] <command>", + NULL +}; + +static void sig_atexit(void) +{ + if (signr == -1) + return; + + signal(signr, SIG_DFL); + kill(getpid(), signr); +} + +static const struct option options[] = { + OPT_CALLBACK('e', "event", NULL, "event", + "event selector. use 'perf list' to list available events", + parse_events), + OPT_BOOLEAN('a', "all-cpus", &system_wide, + "system-wide collection from all CPUs"), + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_END() +}; + +int cmd_test(int argc, const char **argv, const char *prefix) +{ + int status; + + page_size = sysconf(_SC_PAGE_SIZE); + + memcpy(attrs, default_attrs, sizeof(attrs)); + + argc = parse_options(argc, argv, options, test_usage, 0); + if (!argc) + usage_with_options(test_usage, options); + if (run_count <= 0 || run_count > MAX_RUN) + usage_with_options(test_usage, options); + + if (!nr_counters) + nr_counters = ARRAY_SIZE(default_attrs); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + /* + * We dont want to block the signals - that would cause + * child tasks to inherit that and Ctrl-C would not work. + * What we want is for Ctrl-C to work in the exec()-ed + * task, but being ignored by perf test itself: + */ + atexit(sig_atexit); + signal(SIGINT, skip_signal); + signal(SIGALRM, skip_signal); + signal(SIGABRT, skip_signal); + + status = 0; + for (run_idx = 0; run_idx < run_count; run_idx++) { + if (run_count != 1 && verbose) + fprintf(stderr, "[ perf test: executing run #%d ... ]\n", run_idx+1); + status = run_perf_test(argc, argv); + } + + print_test(argc, argv); + + return status; +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 51d1682..3ed0362 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -22,5 +22,6 @@ extern int cmd_stat(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix); +extern int cmd_test(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index eebce30..f53544c 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -7,4 +7,5 @@ perf-list mainporcelain common perf-record mainporcelain common perf-report mainporcelain common perf-stat mainporcelain common +perf-test mainporcelain common perf-top mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4eb7259..9f98f5e 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -262,6 +262,7 @@ static void handle_internal_command(int argc, const char **argv) { "record", cmd_record, 0 }, { "report", cmd_report, 0 }, { "stat", cmd_stat, 0 }, + { "test", cmd_test, 0 }, { "top", cmd_top, 0 }, { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, -- 1.6.0.6 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 19:55 ` Jaswinder Singh Rajput @ 2009-06-22 20:07 ` Jaswinder Singh Rajput 2009-06-23 8:27 ` Ingo Molnar 2009-06-23 8:24 ` Ingo Molnar 1 sibling, 1 reply; 14+ messages in thread From: Jaswinder Singh Rajput @ 2009-06-22 20:07 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, Peter Zijlstra, LKML On Tue, 2009-06-23 at 01:25 +0530, Jaswinder Singh Rajput wrote: > On Mon, 2009-06-22 at 16:10 +0200, Ingo Molnar wrote: > > yeah, somethig like that. I'd suggest to print out the actual > > measured events: > > > > cache-references 10123 events > > cache-misses 15 events > > > > and if something does not appear to be ticking then do something > > like: > > > > cache-misses <inactive> > > > > I.e. 'perf test' could be a quick way both to users and to > > developers to see all possible hw and sw events. > > > > Perhaps builtin-test.c should also do specific testcases for certain > > counters - say intentionally migrate to a CPU and back to see the > > CPU-migration count. > > > > Also, you seem to have copied builtin-stat.c, right? Try to > > librarize as much of the functionality (into util/*) to make the > > resulting linecount increase as small as possible. > > > > perf test also need some command to execute otherwise it will also show > long list of <inactive> > > I think better I should support all events in perf stat so user can get > better information from it and we can all add some other testing option > to it. > > Anyway currently it looks like this : > > [RFC][PATCH] perf_counter tools: introduce perf test to test event for ticks This fixes some style issues : [RFC][PATCH] perf_counter tools: introduce perf test to test event for ticks perf test to Test performance counter events, its output on AMD box : ./perf test -a -- ls -lR > /dev/null Performance counter stats for 'ls' -lR: cycles 1226819954 instructions 283680441 cache-references 144893559 cache-misses 3268438 branches 37488241 branch-misses 2464027 bus-cycles <inactive> cpu-clock-msecs 17175506056 task-clock-msecs 17175086665 page-faults 488 minor-faults 488 major-faults <inactive> context-switches 7956 CPU-migrations 7 L1-data-Cache-Load-Referencees 398303881 L1-data-Cache-Load-Misses 3552374 L1-data-Cache-Store-Referencees 270178 L1-data-Cache-Store-Misses <inactive> L1-data-Cache-Prefetch-Referencees 611622 L1-data-Cache-Prefetch-Misses 399730 L1-instruction-Cache-Load-Referencees 124696447 L1-instruction-Cache-Load-Misses 2912802 L1-instruction-Cache-Store-Referencees <inactive> L1-instruction-Cache-Store-Misses <inactive> L1-instruction-Cache-Prefetch-Referencees 156576 L1-instruction-Cache-Prefetch-Misses <inactive> L2-Cache-Load-Referencees 4312353 L2-Cache-Load-Misses 470382 L2-Cache-Store-Referencees 4392945 L2-Cache-Store-Misses <inactive> L2-Cache-Prefetch-Referencees <inactive> L2-Cache-Prefetch-Misses <inactive> Data-TLB-Cache-Load-Referencees 127076487 Data-TLB-Cache-Load-Misses 1930048 Data-TLB-Cache-Store-Referencees <inactive> Data-TLB-Cache-Store-Misses <inactive> Data-TLB-Cache-Prefetch-Referencees <inactive> Data-TLB-Cache-Prefetch-Misses <inactive> Instruction-TLB-Cache-Load-Referencees 132768077 Instruction-TLB-Cache-Load-Misses 6406 Instruction-TLB-Cache-Store-Referencees <inactive> Instruction-TLB-Cache-Store-Misses <inactive> Instruction-TLB-Cache-Prefetch-Referencees <inactive> Instruction-TLB-Cache-Prefetch-Misses <inactive> Branch-Cache-Load-Referencees 58030210 Branch-Cache-Load-Misses 3257804 Branch-Cache-Store-Referencees <inactive> Branch-Cache-Store-Misses <inactive> Branch-Cache-Prefetch-Referencees <inactive> Branch-Cache-Prefetch-Misses <inactive> 8.681671511 seconds time elapsed. Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> --- tools/perf/Documentation/perf-test.txt | 44 ++++ tools/perf/Makefile | 1 + tools/perf/builtin-test.c | 436 ++++++++++++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + 6 files changed, 484 insertions(+), 0 deletions(-) create mode 100644 tools/perf/Documentation/perf-test.txt create mode 100644 tools/perf/builtin-test.c diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt new file mode 100644 index 0000000..6233769 --- /dev/null +++ b/tools/perf/Documentation/perf-test.txt @@ -0,0 +1,44 @@ +perf-test(1) +============ + +NAME +---- +perf-test - Run a command and gather performance counter event count if any + +SYNOPSIS +-------- +[verse] +'perf test' [-e <EVENT> | --event=EVENT] [-a] <command> +'perf test' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>] + +DESCRIPTION +----------- +This command runs a command and gathers performance counter event count +from it. + + +OPTIONS +------- +<command>...:: + Any command you can specify in a shell. + + +-e:: +--event=:: + Select the PMU event. Selection can be a symbolic event name + (use 'perf list' to list all events) or a raw PMU + event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + +-a:: + system-wide collection + +EXAMPLES +-------- + +$ perf test -- make -j + + +SEE ALSO +-------- +linkperf:perf-stat[1], perf-top[1], linkperf:perf-list[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 36d7eef..f5ac83f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -335,6 +335,7 @@ BUILTIN_OBJS += builtin-list.o BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-report.o BUILTIN_OBJS += builtin-stat.o +BUILTIN_OBJS += builtin-test.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c new file mode 100644 index 0000000..3b24b2d --- /dev/null +++ b/tools/perf/builtin-test.c @@ -0,0 +1,436 @@ +/* + * builtin-test.c + * + * Builtin test command: Test performace counter events + * + * Sample output on AMD box: + + $ perf test -a -- ls -lR > /dev/null + + Performance counter stats for 'ls' -lR: + + cycles 1226819954 + instructions 283680441 + cache-references 144893559 + cache-misses 3268438 + branches 37488241 + branch-misses 2464027 + bus-cycles <inactive> + cpu-clock-msecs 17175506056 + task-clock-msecs 17175086665 + page-faults 488 + minor-faults 488 + major-faults <inactive> + context-switches 7956 + CPU-migrations 7 + L1-data-Cache-Load-Referencees 398303881 + L1-data-Cache-Load-Misses 3552374 + L1-data-Cache-Store-Referencees 270178 + L1-data-Cache-Store-Misses <inactive> + L1-data-Cache-Prefetch-Referencees 611622 + L1-data-Cache-Prefetch-Misses 399730 + L1-instruction-Cache-Load-Referencees 124696447 + L1-instruction-Cache-Load-Misses 2912802 + L1-instruction-Cache-Store-Referencees <inactive> + L1-instruction-Cache-Store-Misses <inactive> + L1-instruction-Cache-Prefetch-Referencees 156576 + L1-instruction-Cache-Prefetch-Misses <inactive> + L2-Cache-Load-Referencees 4312353 + L2-Cache-Load-Misses 470382 + L2-Cache-Store-Referencees 4392945 + L2-Cache-Store-Misses <inactive> + L2-Cache-Prefetch-Referencees <inactive> + L2-Cache-Prefetch-Misses <inactive> + Data-TLB-Cache-Load-Referencees 127076487 + Data-TLB-Cache-Load-Misses 1930048 + Data-TLB-Cache-Store-Referencees <inactive> + Data-TLB-Cache-Store-Misses <inactive> + Data-TLB-Cache-Prefetch-Referencees <inactive> + Data-TLB-Cache-Prefetch-Misses <inactive> + Instruction-TLB-Cache-Load-Referencees 132768077 + Instruction-TLB-Cache-Load-Misses 6406 + Instruction-TLB-Cache-Store-Referencees <inactive> + Instruction-TLB-Cache-Store-Misses <inactive> + Instruction-TLB-Cache-Prefetch-Referencees <inactive> + Instruction-TLB-Cache-Prefetch-Misses <inactive> + Branch-Cache-Load-Referencees 58030210 + Branch-Cache-Load-Misses 3257804 + Branch-Cache-Store-Referencees <inactive> + Branch-Cache-Store-Misses <inactive> + Branch-Cache-Prefetch-Referencees <inactive> + Branch-Cache-Prefetch-Misses <inactive> + + 8.681671511 seconds time elapsed. + + * (based on builtin-stat.c) + * + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2009, Jaswinder Singh Rajput <jaswinder@kernel.org> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "perf.h" +#include "builtin.h" +#include "util/util.h" +#include "util/parse-options.h" +#include "util/parse-events.h" + +#include <sys/prctl.h> +#include <math.h> + +#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x +#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x +#define CHCACHE(x, y, z) \ +.type = PERF_TYPE_HW_CACHE, \ +.config = (PERF_COUNT_HW_CACHE_##x | (PERF_COUNT_HW_CACHE_OP_##y << 8) |\ + (PERF_COUNT_HW_CACHE_RESULT_##z << 16)) + +static struct perf_counter_attr default_attrs[] = { +/* Generalized Hardware events */ + { CHW(CPU_CYCLES) }, + { CHW(INSTRUCTIONS) }, + { CHW(CACHE_REFERENCES) }, + { CHW(CACHE_MISSES) }, + { CHW(BRANCH_INSTRUCTIONS) }, + { CHW(BRANCH_MISSES) }, + { CHW(BUS_CYCLES) }, + +/* Generalized Software events */ + { CSW(CPU_CLOCK) }, + { CSW(TASK_CLOCK) }, + { CSW(PAGE_FAULTS) }, + { CSW(PAGE_FAULTS_MIN) }, + { CSW(PAGE_FAULTS_MAJ) }, + { CSW(CONTEXT_SWITCHES) }, + { CSW(CPU_MIGRATIONS) }, + +/* Generalized Hardware cache counters events */ + { CHCACHE(L1D, READ, ACCESS) }, + { CHCACHE(L1D, READ, MISS) }, + { CHCACHE(L1D, WRITE, ACCESS) }, + { CHCACHE(L1D, WRITE, MISS) }, + { CHCACHE(L1D, PREFETCH, ACCESS) }, + { CHCACHE(L1D, PREFETCH, MISS) }, + + { CHCACHE(L1I, READ, ACCESS) }, + { CHCACHE(L1I, READ, MISS) }, + { CHCACHE(L1I, WRITE, ACCESS) }, + { CHCACHE(L1I, WRITE, MISS) }, + { CHCACHE(L1I, PREFETCH, ACCESS) }, + { CHCACHE(L1I, PREFETCH, MISS) }, + + { CHCACHE(LL, READ, ACCESS) }, + { CHCACHE(LL, READ, MISS) }, + { CHCACHE(LL, WRITE, ACCESS) }, + { CHCACHE(LL, WRITE, MISS) }, + { CHCACHE(LL, PREFETCH, ACCESS) }, + { CHCACHE(LL, PREFETCH, MISS) }, + + { CHCACHE(DTLB, READ, ACCESS) }, + { CHCACHE(DTLB, READ, MISS) }, + { CHCACHE(DTLB, WRITE, ACCESS) }, + { CHCACHE(DTLB, WRITE, MISS) }, + { CHCACHE(DTLB, PREFETCH, ACCESS) }, + { CHCACHE(DTLB, PREFETCH, MISS) }, + + { CHCACHE(ITLB, READ, ACCESS) }, + { CHCACHE(ITLB, READ, MISS) }, + { CHCACHE(ITLB, WRITE, ACCESS) }, + { CHCACHE(ITLB, WRITE, MISS) }, + { CHCACHE(ITLB, PREFETCH, ACCESS) }, + { CHCACHE(ITLB, PREFETCH, MISS) }, + + { CHCACHE(BPU, READ, ACCESS) }, + { CHCACHE(BPU, READ, MISS) }, + { CHCACHE(BPU, WRITE, ACCESS) }, + { CHCACHE(BPU, WRITE, MISS) }, + { CHCACHE(BPU, PREFETCH, ACCESS) }, + { CHCACHE(BPU, PREFETCH, MISS) }, + +}; + +#define MAX_RUN 100 + +static int system_wide = 0; +static int verbose = 0; + +static int nr_cpus = 0; + +static int run_count = 1; +static int run_idx = 0; + +static unsigned int page_size; + +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; + +static u64 walltime_nsecs[MAX_RUN]; +static u64 runtime_cycles[MAX_RUN]; + +static u64 event_res_avg[MAX_COUNTERS][3]; + +static u64 walltime_nsecs_avg; + +static u64 runtime_cycles_avg; + +static void create_perf_stat_counter(int counter) +{ + struct perf_counter_attr *attr = attrs + counter; + + if (system_wide) { + int cpu; + for (cpu = 0; cpu < nr_cpus; cpu++) { + fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0 && verbose) { + printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); + } + } + } else { + attr->disabled = 1; + + fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); + if (fd[0][counter] < 0 && verbose) { + printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); + } + } +} + +/* + * Read out the results of a single counter: + */ +static void read_counter(int counter) +{ + u64 *count, single_count[3]; + ssize_t res; + int cpu, nv; + + count = event_res[run_idx][counter]; + + count[0] = count[1] = count[2] = 0; + + nv = 1; + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (fd[cpu][counter] < 0) + continue; + + res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); + assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); + fd[cpu][counter] = -1; + + count[0] += single_count[0]; + } + + /* + * Save the full runtime - to allow normalization during printout: + */ + runtime_cycles[run_idx] = count[0]; +} + +static int run_perf_test(int argc, const char **argv) +{ + unsigned long long t0, t1; + int status = 0; + int counter; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perf_stat_counter(counter); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], (char **)argv)) { + perror(argv[0]); + exit(-1); + } + } + + wait(&status); + + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + walltime_nsecs[run_idx] = t1 - t0; + + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + + return WEXITSTATUS(status); +} + +static void test_printout(int counter, u64 *count) +{ + fprintf(stderr, " %-45s", event_name(counter)); + + if (count[0]) + fprintf(stderr, " %14Ld", count[0]); + else + fprintf(stderr, " <inactive>"); +} + +/* + * Print out the results of a single counter: + */ +static void print_counter(int counter) +{ + u64 *count; + + count = event_res_avg[counter]; + + test_printout(counter, count); + + fprintf(stderr, "\n"); +} + +static void update_avg(const char *name, int idx, u64 *avg, u64 *val) +{ + *avg += *val; + + if (verbose > 1) + fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); +} +/* + * Calculate the averages: + */ +static void calc_avg(void) +{ + int i, j; + + if (verbose > 1) + fprintf(stderr, "\n"); + + for (i = 0; i < run_count; i++) { + update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); + update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); + for (j = 0; j < nr_counters; j++) { + update_avg("counter/0", j, + event_res_avg[j]+0, event_res[i][j]+0); + update_avg("counter/1", j, + event_res_avg[j]+1, event_res[i][j]+1); + update_avg("counter/2", j, + event_res_avg[j]+2, event_res[i][j]+2); + } + } + walltime_nsecs_avg /= run_count; + runtime_cycles_avg /= run_count; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] /= run_count; + event_res_avg[j][1] /= run_count; + event_res_avg[j][2] /= run_count; + } +} + +static void print_test(int argc, const char **argv) +{ + int i, counter; + + calc_avg(); + + fflush(stdout); + + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for \'%s\'", argv[0]); + + for (i = 1; i < argc; i++) + fprintf(stderr, " %s", argv[i]); + + fprintf(stderr, ":\n\n"); + + for (counter = 0; counter < nr_counters; counter++) + print_counter(counter); + + fprintf(stderr, "\n"); + fprintf(stderr, " %14.9f seconds time elapsed.\n", + (double)walltime_nsecs_avg/1e9); + fprintf(stderr, "\n"); +} + +static volatile int signr = -1; + +static void skip_signal(int signo) +{ + signr = signo; +} + +static const char * const test_usage[] = { + "perf test [<options>] <command>", + NULL +}; + +static void sig_atexit(void) +{ + if (signr == -1) + return; + + signal(signr, SIG_DFL); + kill(getpid(), signr); +} + +static const struct option options[] = { + OPT_CALLBACK('e', "event", NULL, "event", + "event selector. use 'perf list' to list available events", + parse_events), + OPT_BOOLEAN('a', "all-cpus", &system_wide, + "system-wide collection from all CPUs"), + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_END() +}; + +int cmd_test(int argc, const char **argv, const char *prefix) +{ + int status; + + page_size = sysconf(_SC_PAGE_SIZE); + + memcpy(attrs, default_attrs, sizeof(attrs)); + + argc = parse_options(argc, argv, options, test_usage, 0); + if (!argc) + usage_with_options(test_usage, options); + if (run_count <= 0 || run_count > MAX_RUN) + usage_with_options(test_usage, options); + + if (!nr_counters) + nr_counters = ARRAY_SIZE(default_attrs); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + /* + * We dont want to block the signals - that would cause + * child tasks to inherit that and Ctrl-C would not work. + * What we want is for Ctrl-C to work in the exec()-ed + * task, but being ignored by perf test itself: + */ + atexit(sig_atexit); + signal(SIGINT, skip_signal); + signal(SIGALRM, skip_signal); + signal(SIGABRT, skip_signal); + + status = 0; + for (run_idx = 0; run_idx < run_count; run_idx++) { + if (run_count != 1 && verbose) + fprintf(stderr, "[ perf test: executing run #%d ... ]\n", run_idx+1); + status = run_perf_test(argc, argv); + } + + print_test(argc, argv); + + return status; +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 51d1682..3ed0362 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -22,5 +22,6 @@ extern int cmd_stat(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix); +extern int cmd_test(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index eebce30..f53544c 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -7,4 +7,5 @@ perf-list mainporcelain common perf-record mainporcelain common perf-report mainporcelain common perf-stat mainporcelain common +perf-test mainporcelain common perf-top mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4eb7259..9f98f5e 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -262,6 +262,7 @@ static void handle_internal_command(int argc, const char **argv) { "record", cmd_record, 0 }, { "report", cmd_report, 0 }, { "stat", cmd_stat, 0 }, + { "test", cmd_test, 0 }, { "top", cmd_top, 0 }, { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, -- 1.6.0.6 ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 20:07 ` Jaswinder Singh Rajput @ 2009-06-23 8:27 ` Ingo Molnar 0 siblings, 0 replies; 14+ messages in thread From: Ingo Molnar @ 2009-06-23 8:27 UTC (permalink / raw) To: Jaswinder Singh Rajput; +Cc: Thomas Gleixner, Peter Zijlstra, LKML * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote: > L1-data-Cache-Load-Referencees 398303881 > L1-data-Cache-Load-Misses 3552374 > L1-data-Cache-Store-Referencees 270178 > L1-data-Cache-Store-Misses <inactive> > L1-data-Cache-Prefetch-Referencees 611622 > L1-data-Cache-Prefetch-Misses 399730 > L1-instruction-Cache-Load-Referencees 124696447 > L1-instruction-Cache-Load-Misses 2912802 > L1-instruction-Cache-Store-Referencees <inactive> > L1-instruction-Cache-Store-Misses <inactive> > L1-instruction-Cache-Prefetch-Referencees 156576 > L1-instruction-Cache-Prefetch-Misses <inactive> > L2-Cache-Load-Referencees 4312353 > L2-Cache-Load-Misses 470382 > L2-Cache-Store-Referencees 4392945 > L2-Cache-Store-Misses <inactive> > L2-Cache-Prefetch-Referencees <inactive> > L2-Cache-Prefetch-Misses <inactive> > Data-TLB-Cache-Load-Referencees 127076487 > Data-TLB-Cache-Load-Misses 1930048 > Data-TLB-Cache-Store-Referencees <inactive> > Data-TLB-Cache-Store-Misses <inactive> > Data-TLB-Cache-Prefetch-Referencees <inactive> > Data-TLB-Cache-Prefetch-Misses <inactive> > Instruction-TLB-Cache-Load-Referencees 132768077 > Instruction-TLB-Cache-Load-Misses 6406 > Instruction-TLB-Cache-Store-Referencees <inactive> > Instruction-TLB-Cache-Store-Misses <inactive> > Instruction-TLB-Cache-Prefetch-Referencees <inactive> there's a typo somewhere: it's References, not Referencees. > Instruction-TLB-Cache-Prefetch-Misses <inactive> > Branch-Cache-Load-Referencees 58030210 > Branch-Cache-Load-Misses 3257804 > Branch-Cache-Store-Referencees <inactive> > Branch-Cache-Store-Misses <inactive> > Branch-Cache-Prefetch-Referencees <inactive> > Branch-Cache-Prefetch-Misses <inactive> btw., these names should be shortened somehow. Instruction-TLB-Cache-Prefetch-Referencees could become: iTLB-prefetch-refs Data-TLB-Cache-Store-Referencees could become: dTLB-store-refs etc. (You might want to send us a list of abbreviations first, before coding it, to avoid unnecessary work - it will need a few iterations.) The long versions should be recognized too, if someone prefers those (i.e. our alias detection should be extensive and permissive), but the names we _display_ should be short. Ingo ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-22 19:55 ` Jaswinder Singh Rajput 2009-06-22 20:07 ` Jaswinder Singh Rajput @ 2009-06-23 8:24 ` Ingo Molnar 2009-06-23 14:02 ` Jaswinder Singh Rajput 1 sibling, 1 reply; 14+ messages in thread From: Ingo Molnar @ 2009-06-23 8:24 UTC (permalink / raw) To: Jaswinder Singh Rajput; +Cc: Thomas Gleixner, Peter Zijlstra, LKML * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote: > On Mon, 2009-06-22 at 16:10 +0200, Ingo Molnar wrote: > > yeah, somethig like that. I'd suggest to print out the actual > > measured events: > > > > cache-references 10123 events > > cache-misses 15 events > > > > and if something does not appear to be ticking then do something > > like: > > > > cache-misses <inactive> > > > > I.e. 'perf test' could be a quick way both to users and to > > developers to see all possible hw and sw events. > > > > Perhaps builtin-test.c should also do specific testcases for certain > > counters - say intentionally migrate to a CPU and back to see the > > CPU-migration count. > > > > Also, you seem to have copied builtin-stat.c, right? Try to > > librarize as much of the functionality (into util/*) to make the > > resulting linecount increase as small as possible. > > perf test also need some command to execute otherwise it will also > show long list of <inactive> I think what it should do is to execute test-cases _internally_. Not just execute some random command on the system and hope for events. > I think better I should support all events in perf stat so user > can get better information from it and we can all add some other > testing option to it. I agree - see my previous mail about how to achieve this better: we should extend event string parsing with wildcards (regex) and with 'set of events' symbols that act as convenient specifiers for certain typical uses. > Anyway currently it looks like this : > > [RFC][PATCH] perf_counter tools: introduce perf test to test event for ticks > > perf test to Test performance counter events, its output on AMD box : > > ./perf test -a -- ls -lR > /dev/null > > Performance counter stats for 'ls' -lR: > > cycles 1226819954 > instructions 283680441 > cache-references 144893559 > cache-misses 3268438 > branches 37488241 > branch-misses 2464027 > bus-cycles <inactive> > cpu-clock-msecs 17175506056 > task-clock-msecs 17175086665 > page-faults 488 > minor-faults 488 > major-faults <inactive> We should try to provoke a real major fault (i.e. a fault with IO) here. Not sure how though :-) > context-switches 7956 > CPU-migrations 7 this needs to be provoked intentionally via sched_setaffinity(): first migrate to cpu0, then to cpu1. > L1-data-Cache-Load-Referencees 398303881 > L1-data-Cache-Load-Misses 3552374 > L1-data-Cache-Store-Referencees 270178 > L1-data-Cache-Store-Misses <inactive> this is probably inactive due to AMD not having events for that and the generic cache event being 0 there, right? > L1-data-Cache-Prefetch-Referencees 611622 > L1-data-Cache-Prefetch-Misses 399730 > L1-instruction-Cache-Load-Referencees 124696447 > L1-instruction-Cache-Load-Misses 2912802 > L1-instruction-Cache-Store-Referencees <inactive> > L1-instruction-Cache-Store-Misses <inactive> > L1-instruction-Cache-Prefetch-Referencees 156576 > L1-instruction-Cache-Prefetch-Misses <inactive> > L2-Cache-Load-Referencees 4312353 > L2-Cache-Load-Misses 470382 > L2-Cache-Store-Referencees 4392945 > L2-Cache-Store-Misses <inactive> > L2-Cache-Prefetch-Referencees <inactive> > L2-Cache-Prefetch-Misses <inactive> > Data-TLB-Cache-Load-Referencees 127076487 > Data-TLB-Cache-Load-Misses 1930048 > Data-TLB-Cache-Store-Referencees <inactive> > Data-TLB-Cache-Store-Misses <inactive> > Data-TLB-Cache-Prefetch-Referencees <inactive> > Data-TLB-Cache-Prefetch-Misses <inactive> > Instruction-TLB-Cache-Load-Referencees 132768077 > Instruction-TLB-Cache-Load-Misses 6406 > Instruction-TLB-Cache-Store-Referencees <inactive> > Instruction-TLB-Cache-Store-Misses <inactive> > Instruction-TLB-Cache-Prefetch-Referencees <inactive> > Instruction-TLB-Cache-Prefetch-Misses <inactive> > Branch-Cache-Load-Referencees 58030210 > Branch-Cache-Load-Misses 3257804 > Branch-Cache-Store-Referencees <inactive> > Branch-Cache-Store-Misses <inactive> > Branch-Cache-Prefetch-Referencees <inactive> > Branch-Cache-Prefetch-Misses <inactive> > > 8.681671511 seconds time elapsed. looks nice. Ingo ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-23 8:24 ` Ingo Molnar @ 2009-06-23 14:02 ` Jaswinder Singh Rajput 2009-06-24 8:48 ` Ingo Molnar 0 siblings, 1 reply; 14+ messages in thread From: Jaswinder Singh Rajput @ 2009-06-23 14:02 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, Peter Zijlstra, LKML On Tue, 2009-06-23 at 10:24 +0200, Ingo Molnar wrote: > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote: > > > > > perf test also need some command to execute otherwise it will also > > show long list of <inactive> > > I think what it should do is to execute test-cases _internally_. Not > just execute some random command on the system and hope for events. > Can you suggest some good test cases where we can get numbers for almost all the events on each and every run. > > context-switches 7956 > > CPU-migrations 7 > > this needs to be provoked intentionally via sched_setaffinity(): > first migrate to cpu0, then to cpu1. > There should be some option from user or we test it each time. > > L1-data-Cache-Load-Referencees 398303881 > > L1-data-Cache-Load-Misses 3552374 > > L1-data-Cache-Store-Referencees 270178 > > L1-data-Cache-Store-Misses <inactive> > > this is probably inactive due to AMD not having events for that and > the generic cache event being 0 there, right? > Yes, it is not set for AMD. Thanks, -- JSR ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol 2009-06-23 14:02 ` Jaswinder Singh Rajput @ 2009-06-24 8:48 ` Ingo Molnar 0 siblings, 0 replies; 14+ messages in thread From: Ingo Molnar @ 2009-06-24 8:48 UTC (permalink / raw) To: Jaswinder Singh Rajput; +Cc: Thomas Gleixner, Peter Zijlstra, LKML * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote: > On Tue, 2009-06-23 at 10:24 +0200, Ingo Molnar wrote: > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote: > > > > > > > > perf test also need some command to execute otherwise it will also > > > show long list of <inactive> > > > > I think what it should do is to execute test-cases _internally_. Not > > just execute some random command on the system and hope for events. > > Can you suggest some good test cases where we can get numbers for > almost all the events on each and every run. the testcases should be specific to the counter type. Some counters are easy (such as cycles or instructions - you in fact cannot even get zero out of them even if you tried) - some are harder - such as migrations, or cache-misses or page-faults. Small functions that trigger them for sure would do the trick. For example, to provoke a minor page fault: static void trigger_minor_page_fault(void) { void *page; page = mmap(0, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS, -1, 0); /* Clearing the page will trigger a minor fault: */ memset(page, 0, PAGE_SIZE); munmap(page, PAGE_SIZE); } These small test-functions should be attached to the event array as function pointers - hence the generic testing code would just do a ->event.trigger_event() callback, it wouldnt need to know much about the event itself. > > > context-switches 7956 > > > CPU-migrations 7 > > > > this needs to be provoked intentionally via sched_setaffinity(): > > first migrate to cpu0, then to cpu1. > > There should be some option from user or we test it each time. If this is builtin-test.c - then yes, testing it all the time would be a good default. (there might be other aspects of testing that would require options to this command in the future.) Ingo ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tip:perfcounters/urgent] perf_counter tools: Introduce alias member in event_symbol 2009-06-22 11:14 ` [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol Jaswinder Singh Rajput 2009-06-22 11:32 ` Ingo Molnar @ 2009-06-22 11:38 ` tip-bot for Jaswinder Singh Rajput 1 sibling, 0 replies; 14+ messages in thread From: tip-bot for Jaswinder Singh Rajput @ 2009-06-22 11:38 UTC (permalink / raw) To: linux-tip-commits Cc: linux-kernel, hpa, mingo, peterz, jaswinder, jaswinderrajput, tglx, mingo Commit-ID: 74d5b5889ea71a95d8924c08f8a7c6e2bdcbc0ba Gitweb: http://git.kernel.org/tip/74d5b5889ea71a95d8924c08f8a7c6e2bdcbc0ba Author: Jaswinder Singh Rajput <jaswinder@kernel.org> AuthorDate: Mon, 22 Jun 2009 16:44:28 +0530 Committer: Ingo Molnar <mingo@elte.hu> CommitDate: Mon, 22 Jun 2009 13:29:58 +0200 perf_counter tools: Introduce alias member in event_symbol By introducing alias member in event_symbol : 1. duplicate lines are removed, like: cpu-cycles and cycles branch-instructions and branches context-switches and cs cpu-migrations and migrations 2. We can also add alias for another events. Now ./perf list looks like : List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults [Software event] faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] rNNN [raw hardware event descriptor] Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> LKML-Reference: <1245669268.17153.8.camel@localhost.localdomain> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- tools/perf/util/parse-events.c | 63 ++++++++++++++++++++++++---------------- 1 files changed, 38 insertions(+), 25 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 12abab3..f569548 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -16,32 +16,29 @@ struct event_symbol { u8 type; u64 config; char *symbol; + char *alias; }; #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { CHW(CPU_CYCLES), "cpu-cycles", }, - { CHW(CPU_CYCLES), "cycles", }, - { CHW(INSTRUCTIONS), "instructions", }, - { CHW(CACHE_REFERENCES), "cache-references", }, - { CHW(CACHE_MISSES), "cache-misses", }, - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, - { CHW(BRANCH_INSTRUCTIONS), "branches", }, - { CHW(BRANCH_MISSES), "branch-misses", }, - { CHW(BUS_CYCLES), "bus-cycles", }, - - { CSW(CPU_CLOCK), "cpu-clock", }, - { CSW(TASK_CLOCK), "task-clock", }, - { CSW(PAGE_FAULTS), "page-faults", }, - { CSW(PAGE_FAULTS), "faults", }, - { CSW(PAGE_FAULTS_MIN), "minor-faults", }, - { CSW(PAGE_FAULTS_MAJ), "major-faults", }, - { CSW(CONTEXT_SWITCHES), "context-switches", }, - { CSW(CONTEXT_SWITCHES), "cs", }, - { CSW(CPU_MIGRATIONS), "cpu-migrations", }, - { CSW(CPU_MIGRATIONS), "migrations", }, + { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, + { CHW(INSTRUCTIONS), "instructions", "" }, + { CHW(CACHE_REFERENCES), "cache-references", "" }, + { CHW(CACHE_MISSES), "cache-misses", "" }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, + { CHW(BRANCH_MISSES), "branch-misses", "" }, + { CHW(BUS_CYCLES), "bus-cycles", "" }, + + { CSW(CPU_CLOCK), "cpu-clock", "" }, + { CSW(TASK_CLOCK), "task-clock", "" }, + { CSW(PAGE_FAULTS), "page-faults", "" }, + { CSW(PAGE_FAULTS), "faults", "" }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, + { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, }; #define __PERF_COUNTER_FIELD(config, name) \ @@ -196,6 +193,19 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a return 0; } +static int check_events(const char *str, unsigned int i) +{ + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return 1; + + if (strlen(event_symbols[i].alias)) + if (!strncmp(str, event_symbols[i].alias, + strlen(event_symbols[i].alias))) + return 1; + return 0; +} + /* * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. @@ -235,9 +245,7 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) } for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) { - + if (check_events(str, i)) { attr->type = event_symbols[i].type; attr->config = event_symbols[i].config; @@ -289,6 +297,7 @@ void print_events(void) { struct event_symbol *syms = event_symbols; unsigned int i, type, prev_type = -1; + char name[40]; fprintf(stderr, "\n"); fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); @@ -301,14 +310,18 @@ void print_events(void) if (type != prev_type) fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [%s]\n", syms->symbol, + if (strlen(syms->alias)) + sprintf(name, "%s OR %s", syms->symbol, syms->alias); + else + strcpy(name, syms->symbol); + fprintf(stderr, " %-40s [%s]\n", name, event_type_descriptors[type]); prev_type = type; } fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [raw hardware event descriptor]\n", + fprintf(stderr, " %-40s [raw hardware event descriptor]\n", "rNNN"); fprintf(stderr, "\n"); ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [tip:perfcounters/urgent] perf_counter tools: Define separate declarations for H/W and S/W events 2009-06-22 11:13 [PATCH 1/2 -tip] perf_counter: parse-events.c define separate declarations for H/W and S/W events Jaswinder Singh Rajput 2009-06-22 11:14 ` [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol Jaswinder Singh Rajput @ 2009-06-22 11:38 ` tip-bot for Jaswinder Singh Rajput 1 sibling, 0 replies; 14+ messages in thread From: tip-bot for Jaswinder Singh Rajput @ 2009-06-22 11:38 UTC (permalink / raw) To: linux-tip-commits Cc: linux-kernel, hpa, mingo, peterz, jaswinder, jaswinderrajput, tglx, mingo Commit-ID: 51e268423151fc7bb41945bde7843160b6a14c32 Gitweb: http://git.kernel.org/tip/51e268423151fc7bb41945bde7843160b6a14c32 Author: Jaswinder Singh Rajput <jaswinder@kernel.org> AuthorDate: Mon, 22 Jun 2009 16:43:14 +0530 Committer: Ingo Molnar <mingo@elte.hu> CommitDate: Mon, 22 Jun 2009 13:29:57 +0200 perf_counter tools: Define separate declarations for H/W and S/W events Define separate declarations for H/W and S/W events to: 1. Shorten name to save some space so that we can add more members 2. Fix alignment 3. Avoid declaring HARDWARE/SOFTWARE again and again. Removed unused CR(x, y) Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> LKML-Reference: <1245669194.17153.6.camel@localhost.localdomain> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- tools/perf/util/parse-events.c | 42 ++++++++++++++++++++-------------------- 1 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 35d04da..12abab3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -18,30 +18,30 @@ struct event_symbol { char *symbol; }; -#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y -#define CR(x, y) .type = PERF_TYPE_##x, .config = y +#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x +#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, - { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, - { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, - { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, - { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, - { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, - { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, - - { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, - { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, + { CHW(CPU_CYCLES), "cpu-cycles", }, + { CHW(CPU_CYCLES), "cycles", }, + { CHW(INSTRUCTIONS), "instructions", }, + { CHW(CACHE_REFERENCES), "cache-references", }, + { CHW(CACHE_MISSES), "cache-misses", }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, + { CHW(BRANCH_INSTRUCTIONS), "branches", }, + { CHW(BRANCH_MISSES), "branch-misses", }, + { CHW(BUS_CYCLES), "bus-cycles", }, + + { CSW(CPU_CLOCK), "cpu-clock", }, + { CSW(TASK_CLOCK), "task-clock", }, + { CSW(PAGE_FAULTS), "page-faults", }, + { CSW(PAGE_FAULTS), "faults", }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", }, + { CSW(CONTEXT_SWITCHES), "context-switches", }, + { CSW(CONTEXT_SWITCHES), "cs", }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", }, + { CSW(CPU_MIGRATIONS), "migrations", }, }; #define __PERF_COUNTER_FIELD(config, name) \ ^ permalink raw reply related [flat|nested] 14+ messages in thread
end of thread, other threads:[~2009-06-24 8:48 UTC | newest] Thread overview: 14+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-06-22 11:13 [PATCH 1/2 -tip] perf_counter: parse-events.c define separate declarations for H/W and S/W events Jaswinder Singh Rajput 2009-06-22 11:14 ` [PATCH 2/2 -tip] perf_counter: parse-events.c introduce alias member in event_symbol Jaswinder Singh Rajput 2009-06-22 11:32 ` Ingo Molnar 2009-06-22 13:00 ` Jaswinder Singh Rajput 2009-06-22 13:23 ` Jaswinder Singh Rajput 2009-06-22 14:10 ` Ingo Molnar 2009-06-22 19:55 ` Jaswinder Singh Rajput 2009-06-22 20:07 ` Jaswinder Singh Rajput 2009-06-23 8:27 ` Ingo Molnar 2009-06-23 8:24 ` Ingo Molnar 2009-06-23 14:02 ` Jaswinder Singh Rajput 2009-06-24 8:48 ` Ingo Molnar 2009-06-22 11:38 ` [tip:perfcounters/urgent] perf_counter tools: Introduce " tip-bot for Jaswinder Singh Rajput 2009-06-22 11:38 ` [tip:perfcounters/urgent] perf_counter tools: Define separate declarations for H/W and S/W events tip-bot for Jaswinder Singh Rajput
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox