Re: [PATCH 01/12] trace-cmd: Add trace-cmd analyze

linux-trace-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Kalesh Singh <kaleshsingh@google.com>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-trace-devel@vger.kernel.org, williskung@google.com,
	linux-rt-users@vger.kernel.org
Subject: Re: [PATCH 01/12] trace-cmd: Add trace-cmd analyze
Date: Mon, 28 Mar 2022 09:08:02 -0700	[thread overview]
Message-ID: <CAC_TJvfrQMyhSRHh7_z_Vx_6KDELQhyYnapyTBRDMBtKvCuitw@mail.gmail.com> (raw)
In-Reply-To: <20220324025726.1727204-2-rostedt@goodmis.org>

On Wed, Mar 23, 2022 at 7:57 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
>
> Add a new trace-cmd command "analyze" that will look at a trace.dat file
> and report various statistics about the code.
>
> Start off by showing the amount each CPU is idle.
>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
>  tracecmd/Makefile              |   1 +
>  tracecmd/include/trace-local.h |   2 +
>  tracecmd/trace-analyze.c       | 508 +++++++++++++++++++++++++++++++++
>  tracecmd/trace-cmd.c           |   1 +
>  4 files changed, 512 insertions(+)
>  create mode 100644 tracecmd/trace-analyze.c
>
> diff --git a/tracecmd/Makefile b/tracecmd/Makefile
> index 355f04723ad7..895294d09092 100644
> --- a/tracecmd/Makefile
> +++ b/tracecmd/Makefile
> @@ -37,6 +37,7 @@ TRACE_CMD_OBJS += trace-dump.o
>  TRACE_CMD_OBJS += trace-clear.o
>  TRACE_CMD_OBJS += trace-vm.o
>  TRACE_CMD_OBJS += trace-convert.o
> +TRACE_CMD_OBJS += trace-analyze.o
>
>  ifeq ($(VSOCK_DEFINED), 1)
>  TRACE_CMD_OBJS += trace-agent.o
> diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
> index bb33de06bb58..f9d0745914fe 100644
> --- a/tracecmd/include/trace-local.h
> +++ b/tracecmd/include/trace-local.h
> @@ -112,6 +112,8 @@ void trace_dump(int argc, char **argv);
>
>  void trace_convert(int argc, char **argv);
>
> +void trace_analyze(int argc, char **argv);
> +
>  int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
>                        int cpus, int *fds,
>                        int argc, char **argv, bool use_fifos,
> diff --git a/tracecmd/trace-analyze.c b/tracecmd/trace-analyze.c
> new file mode 100644
> index 000000000000..4a17cf12eb07
> --- /dev/null
> +++ b/tracecmd/trace-analyze.c
> @@ -0,0 +1,508 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +/*
> + * Copyright (C) 2022 Google Inc, Steven Rostedt <srostedt@redhat.com>
> + */
> +#define _LARGEFILE64_SOURCE
> +#include <getopt.h>
> +#include <stdlib.h>
> +
> +#include "trace-cmd-local.h"
> +#include "trace-local.h"
> +#include "trace-hash.h"
> +#include "list.h"
> +
> +static bool time_in_nsecs;
> +
> +struct analysis_data;
> +
> +struct cpu_data {
> +       unsigned long long      start_ts;
> +       unsigned long long      last_ts;
> +       struct analysis_data    *data;
> +       struct trace_hash       tasks;
> +       int                     nr_tasks;
> +       int                     cpu;
> +       int                     current_pid;
> +       int                     first_pid;
> +};
> +
> +struct analysis_data {
> +       unsigned long long      start_ts;
> +       unsigned long long      last_ts;
> +       struct cpu_data         *cpu_data;
> +       struct trace_hash       tasks;
> +       int                     nr_tasks;
> +       int                     allocated_cpus;
> +       int                     cpus;

Hi Steve,

I don't see analysis_data->cpus field ever used.

> +};
> +
> +struct task_item {
> +       unsigned long long      runtime;
> +       unsigned long long      start_ts;
> +       struct trace_hash_item  hash;
> +       int                     pid;
> +};
> +
> +struct task_cpu_item {
> +       struct trace_hash_item  hash;
> +       struct task_item        *task;
> +};
> +
> +#define CPU_BLOCKS     32
> +
> +#define task_from_hash(item) container_of(item, struct task_item, hash)
> +#define task_cpu_from_hash(item)  container_of(item, struct task_cpu_item, hash)
> +
> +static struct cpu_data *get_cpu_data(struct analysis_data *data,
> +                                    struct tep_record *record)
> +{
> +       struct cpu_data *cpu_data;
> +       int cpu = record->cpu;
> +       int cnt;
> +
> +       if (cpu < data->allocated_cpus) {
> +               cpu_data = &data->cpu_data[cpu];
> +               if (!cpu_data->data)
> +                       goto init_cpu_data;
> +               return cpu_data;
> +       }
> +
> +       /* Round up to CPU_BLOCKS multiplier */
> +       cnt = ((cpu + CPU_BLOCKS) / CPU_BLOCKS) * CPU_BLOCKS;
> +
> +       cpu_data = realloc(data->cpu_data, sizeof(*cpu_data) * cnt);
> +
> +       if (!cpu_data)
> +               die("Allocating cpu size %d for cpu %d", cnt, cpu);
> +
> +       memset(cpu_data + data->allocated_cpus, 0,
> +              sizeof(*cpu_data) * (cnt - data->allocated_cpus));
> +
> +       data->allocated_cpus = cnt;
> +
> +       data->cpu_data = cpu_data;
> +       cpu_data += cpu;
> +
> + init_cpu_data:
> +       cpu_data->current_pid = -1;
> +       cpu_data->cpu = cpu;
> +       cpu_data->data = data;
> +
> +       trace_hash_init(&cpu_data->tasks, 32);

nit: #define CPU_TASKS_HASH_SIZE 32?

> +
> +       return cpu_data;
> +}
> +
> +static int check_idle(struct cpu_data *cpu_data, int pid)
> +{
> +       if (pid)
> +               return pid;
> +
> +       /*
> +        * Since pid 0 is the per cpu swapper task that
> +        * means several of these tasks have the same pid
> +        * and only differentiate between CPUs. Set the pid
> +        * that is stored in the hash as -2 - CPU id.
> +        */
> +
> +       return -2 - cpu_data->cpu;
> +}
> +
> +static struct task_item *get_task(struct cpu_data *cpu_data, int pid)
> +{
> +       struct trace_hash_item *hash;
> +       struct task_item *task;
> +       int find_pid;
> +
> +       find_pid = check_idle(cpu_data, pid);
> +
> +       hash = trace_hash_find(&cpu_data->data->tasks, find_pid, NULL, NULL);
> +       if (!hash) {
> +               task = calloc(sizeof(*task), 1);
> +               if (!task)
> +                       die("allocating task");
> +               task->pid = find_pid;
> +               hash = &task->hash;
> +               hash->key = find_pid;
> +               cpu_data->data->nr_tasks++;
> +               trace_hash_add(&cpu_data->data->tasks, hash);
> +       }
> +
> +       return task_from_hash(hash);
> +}
> +
> +static struct task_cpu_item *get_cpu_task(struct cpu_data *cpu_data, int pid)
> +{
> +       struct trace_hash_item *hash;
> +       struct task_cpu_item *task;
> +
> +       hash = trace_hash_find(&cpu_data->tasks, pid, NULL, NULL);
> +       if (!hash) {
> +               task = calloc(sizeof(*task), 1);
> +               if (!task)
> +                       die("allocating cpu task");
> +               task->task = get_task(cpu_data, pid);
> +               hash = &task->hash;
> +               hash->key = pid;
> +               cpu_data->nr_tasks++;
> +               trace_hash_add(&cpu_data->tasks, hash);
> +       }
> +
> +       return task_cpu_from_hash(hash);
> +};
> +
> +/* Update times for a task scheduling off the CPU */
> +static void update_cpu_task_times(struct cpu_data *cpu_data,
> +                                 struct task_cpu_item *cpu_task,
> +                                 unsigned long long ts)
> +{
> +       unsigned long long delta;
> +       struct task_item *task = cpu_task->task;
> +
> +       /*
> +        * If the last event was a sched switch where the previous task
> +        * ran on another CPU, and migrated back to this CPU, and sched
> +        * switch was not recorded (saying that this task scheduled off)
> +        * It could be miscronstrued to still be on this CPU, and that
> +        * its start_ts is later than the last_ts of this CPU.
> +        */
> +       if (ts < task->start_ts)
> +               return;
> +
> +       delta = ts - task->start_ts;
> +       task->runtime += delta;
> +}
> +
> +static void update_pid(struct cpu_data *cpu_data,
> +                      struct tep_record *record, int pid)
> +{
> +       struct task_cpu_item *cpu_task;
> +       struct task_item *task;
> +       int curr_pid = cpu_data->current_pid;
> +       unsigned long long delta;
> +
> +       cpu_task = get_cpu_task(cpu_data, curr_pid);
> +       task = cpu_task->task;
> +
> +       update_cpu_task_times(cpu_data, cpu_task, cpu_data->last_ts);
> +
> +       if (!record)
> +               return;
> +
> +       cpu_task = get_cpu_task(cpu_data, pid);
> +       task = cpu_task->task;
> +
> +       if (record->ts < cpu_data->last_ts) {
> +               tracecmd_warning("task %d start time %llu greater than CPU time %llu",
> +                                pid, record->ts, cpu_data->last_ts);

Shouldn’t this say, ... start time %llu "less than" CPU time ...?

> +       } else {
> +               delta = record->ts - cpu_data->last_ts;
> +               task->runtime += delta;
> +       }
> +
> +       cpu_data->last_ts = record->ts;
> +
> +       task->start_ts = cpu_data->last_ts;
> +       cpu_data->current_pid = pid;
> +}
> +
> +static void update_cpu_times(struct cpu_data *cpu_data,
> +                            struct tep_handle *tep, int pid,
> +                            struct tep_record *record)
> +{
> +       struct task_cpu_item *cpu_task;
> +       struct task_item *task;
> +
> +       if (cpu_data->current_pid < 0) {
> +               /* First time called */
> +               cpu_data->start_ts = record->ts;
> +               cpu_data->last_ts = record->ts;
> +               cpu_data->current_pid = pid;
> +               cpu_data->first_pid = pid;
> +               cpu_task = get_cpu_task(cpu_data, pid);
> +               task = cpu_task->task;
> +               task->start_ts = record->ts;
> +               if (record->ts < cpu_data->data->start_ts)
> +                       cpu_data->data->start_ts = record->ts;
> +               return;
> +       }
> +
> +       if (pid != cpu_data->current_pid) {
> +               update_pid(cpu_data, record, pid);
> +               return;
> +       }
> +
> +       cpu_data->last_ts = record->ts;
> +}
> +
> +static void update_first_pid(struct cpu_data *cpu_data)
> +{
> +       struct task_cpu_item *cpu_task;
> +       struct task_item *task;
> +       unsigned long long start_ts = cpu_data->data->start_ts;
> +       unsigned long long delta;
> +
> +       cpu_task = get_cpu_task(cpu_data, cpu_data->first_pid);
> +       task = cpu_task->task;
> +       delta = cpu_data->start_ts - start_ts;
> +       task->runtime += delta;
> +       cpu_data->start_ts = start_ts;
> +}
> +
> +static void process_cpu(struct analysis_data *data,
> +                       struct tep_handle *tep,
> +                       struct tep_record *record)
> +{
> +       struct cpu_data *cpu_data;
> +       int pid;
> +
> +       pid = tep_data_pid(tep, record);
> +       if (pid < 0) /* Warn? */
> +               return;
> +
> +       cpu_data = get_cpu_data(data, record);
> +       update_cpu_times(cpu_data, tep, pid, record);
> +}
> +
> +static void print_time(unsigned long long ts, char delim)
> +{
> +       unsigned long long secs;
> +       unsigned long long msecs;
> +       unsigned long long usecs;
> +       unsigned long long nsecs;
> +
> +       secs = ts / 1000000000;
> +       ts -= secs * 1000000000;
> +
> +       msecs = ts / 1000000;
> +       ts -= msecs * 1000000;
> +
> +       usecs = ts / 1000;
> +       ts -= usecs * 1000;
> +
> +       nsecs = ts;
> +
> +       if (delim) {
> +               printf("%6llu.%03llu%c%03llu",
> +                      secs, msecs, delim, usecs);
> +               if (time_in_nsecs)
> +                       printf("%c%03llu", delim, nsecs);
> +       } else {
> +               printf("%6llu.%03llu%03llu",
> +                      secs, msecs, usecs);
> +               if (time_in_nsecs)
> +                       printf("%03llu", nsecs);
> +       }
> +}
> +
> +static void print_total(struct tep_handle *tep, struct analysis_data *data)
> +{
> +       unsigned long long total_time;
> +       struct trace_hash_item **bucket;
> +       struct trace_hash_item *item;
> +       struct task_item **idle_tasks;
> +       struct task_item *task;
> +       bool first = true;
> +       int cpu;
> +       int i = 0;
> +
> +       total_time = data->last_ts - data->start_ts;
> +       printf("\nTotal time: ");
> +       print_time(total_time, '_');
> +       printf("\n");
> +
> +       idle_tasks = calloc(sizeof(*idle_tasks), data->allocated_cpus);
> +       if (!idle_tasks)
> +               die("Could not allocate idle task array");
> +
> +       trace_hash_for_each_bucket(bucket, &data->tasks) {
> +               trace_hash_for_each_item(item, bucket) {
> +                       task = task_from_hash(item);
> +                       if (task->pid < 0) {
> +                               cpu = -2 - task->pid;
> +                               idle_tasks[cpu] = task;
> +                       }
> +               }
> +       }
> +
> +       for (i = 0; i < data->allocated_cpus; i++) {
> +               if (!idle_tasks[i])
> +                       continue;
> +
> +               if (first) {
> +                       printf("\n Idle CPU\t     Run time\n");
> +                       printf(" --------\t     --------\n");
> +                       first = false;
> +               }
> +               printf("CPU %d idle:\t", i);
> +               print_time(idle_tasks[i]->runtime, '_');
> +               printf(" (%%%lld)\n", (idle_tasks[i]->runtime * 100) / total_time);
> +       }
> +       free(idle_tasks);
> +}
> +
> +static void free_tasks(struct trace_hash *hash)
> +{
> +       struct trace_hash_item **bucket;
> +       struct trace_hash_item *item;
> +       struct task_item *task;
> +
> +       trace_hash_for_each_bucket(bucket, hash) {
> +               trace_hash_while_item(item, bucket) {
> +                       task = task_from_hash(item);
> +                       trace_hash_del(item);
> +                       free(task);
> +               }
> +       }
> +       trace_hash_free(hash);
> +}
> +
> +static void free_cpu_tasks(struct trace_hash *hash)
> +{
> +       struct trace_hash_item **bucket;
> +       struct trace_hash_item *item;
> +       struct task_cpu_item *cpu_task;
> +
> +       trace_hash_for_each_bucket(bucket, hash) {
> +               trace_hash_while_item(item, bucket) {
> +                       cpu_task = task_cpu_from_hash(item);
> +                       trace_hash_del(item);
> +                       free(cpu_task);
> +               }
> +       }
> +       trace_hash_free(hash);
> +}
> +
> +static void free_cpus(struct analysis_data *data)
> +{
> +       struct cpu_data *cpu_data;
> +       int i;
> +
> +       for (i = 0; i < data->allocated_cpus; i++) {
> +               cpu_data = &data->cpu_data[i];
> +               if (!cpu_data->data)
> +                       continue;
> +               free_cpu_tasks(&cpu_data->tasks);
> +       }
> +       free(data->cpu_data);
> +}
> +
> +static void do_trace_analyze(struct tracecmd_input *handle)
> +{
> +       struct tep_handle *tep = tracecmd_get_tep(handle);
> +       struct tep_record *record;
> +       struct analysis_data data;
> +       struct cpu_data *cpu_data;
> +       int i;
> +
> +       memset(&data, 0, sizeof(data));
> +
> +       trace_hash_init(&data.tasks, 128);
> +
> +       /* Set to a very large number */
> +       data.start_ts = -1ULL;
> +
> +       do {
> +               record = tracecmd_read_next_data(handle, NULL);
> +               if (record)
> +                       process_cpu(&data, tep, record);
> +               tracecmd_free_record(record);
> +       } while (record);
> +
> +       for (i = 0; i < data.allocated_cpus; i++) {
> +               cpu_data = &data.cpu_data[i];
> +               if (!cpu_data->data || !cpu_data->nr_tasks)
> +                       continue;
> +               if (cpu_data->last_ts > data.last_ts)
> +                       data.last_ts = cpu_data->last_ts;
> +       }
> +
> +       for (i = 0; i < data.allocated_cpus; i++) {
> +               cpu_data = &data.cpu_data[i];
> +               if (!cpu_data->data || !cpu_data->nr_tasks)
> +                       continue;
> +               cpu_data->last_ts = data.last_ts;
> +               update_pid(cpu_data, NULL, -1);

I think having update_last_pid() makes this easier to read.

Thanks,
Kalesh

> +               update_first_pid(cpu_data);
> +       }
> +
> +       print_total(tep, &data);
> +
> +       free_cpus(&data);
> +       free_tasks(&data.tasks);
> +}
> +
> +void trace_analyze(int argc, char **argv)
> +{
> +       struct tracecmd_input *handle;
> +       const char *input_file = NULL;
> +       int instances;
> +       int ret;
> +
> +       for (;;) {
> +               int c;
> +
> +               c = getopt(argc-1, argv+1, "+hti:");
> +               if (c == -1)
> +                       break;
> +               switch (c) {
> +               case 'h':
> +                       usage(argv);
> +                       break;
> +               case 'i':
> +                       if (input_file)
> +                               die("Only one input for historgram");
> +                       input_file = optarg;
> +                       break;
> +               case 't':
> +                       time_in_nsecs = true;
> +                       break;
> +               default:
> +                       usage(argv);
> +               }
> +       }
> +
> +       if ((argc - optind) >= 2) {
> +               if (input_file)
> +                       usage(argv);
> +               input_file = argv[optind + 1];
> +       }
> +
> +       if (!input_file)
> +               input_file = DEFAULT_INPUT_FILE;
> +
> +       handle = tracecmd_alloc(input_file, 0);
> +       if (!handle)
> +               die("can't open %s\n", input_file);
> +
> +       ret = tracecmd_read_headers(handle, 0);
> +       if (ret)
> +               return;
> +
> +       ret = tracecmd_init_data(handle);
> +       if (ret < 0)
> +               die("failed to init data");
> +
> +       if (ret > 0)
> +               die("trace-cmd hist does not work with latency traces\n");
> +
> +       do_trace_analyze(handle);
> +
> +       instances = tracecmd_buffer_instances(handle);
> +       if (instances) {
> +               struct tracecmd_input *new_handle;
> +               int i;
> +
> +               for (i = 0; i < instances; i++) {
> +                       new_handle = tracecmd_buffer_instance_handle(handle, i);
> +                       if (!new_handle) {
> +                               warning("could not retrieve handle %d", i);
> +                               continue;
> +                       }
> +                       do_trace_analyze(new_handle);
> +                       tracecmd_close(new_handle);
> +               }
> +       }
> +
> +       tracecmd_close(handle);
> +}
> diff --git a/tracecmd/trace-cmd.c b/tracecmd/trace-cmd.c
> index a83a8d0bb9e4..d6e1f9dacc3c 100644
> --- a/tracecmd/trace-cmd.c
> +++ b/tracecmd/trace-cmd.c
> @@ -134,6 +134,7 @@ struct command commands[] = {
>         {"help", trace_usage},
>         {"dump", trace_dump},
>         {"convert", trace_convert},
> +       {"analyze", trace_analyze},
>         {"-h", trace_usage},
>  };
>
> --
> 2.35.1
>

next prev parent reply	other threads:[~2022-03-28 16:08 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-24  2:57 [PATCH 00/12] trace-cmd: Add trace-cmd analyze command Steven Rostedt
2022-03-24  2:57 ` [PATCH 01/12] trace-cmd: Add trace-cmd analyze Steven Rostedt
2022-03-28 16:08   ` Kalesh Singh [this message]
2022-03-28 16:34     ` Steven Rostedt
2022-03-24  2:57 ` [PATCH 02/12] trace-cmd analyze: Show what tasks are running the most Steven Rostedt
2022-03-24  2:57 ` [PATCH 03/12] trace-cmd analyze: Show how much tasks run on each CPU Steven Rostedt
2022-03-25 19:36   ` John Keeping
2022-03-25 20:18     ` Steven Rostedt
2022-03-26 11:07       ` John Keeping
2022-03-26 15:16         ` Steven Rostedt
2022-03-28 16:23           ` Kalesh Singh
2022-03-24  2:57 ` [PATCH 04/12] trace-cmd analyze: Use sched_switch to find comm mappings Steven Rostedt
2022-03-24  2:57 ` [PATCH 05/12] trace-cmd analyze: Use sched_switch event to update times Steven Rostedt
2022-03-24  2:57 ` [PATCH 06/12] trace-cmd analyze: Add tracing of tasks and their states Steven Rostedt
2022-03-25 19:37   ` John Keeping
2022-03-25 20:19     ` Steven Rostedt
2022-03-24  2:57 ` [PATCH 07/12] trace-cmd analyze: Add "idleness" Steven Rostedt
2022-03-24  2:57 ` [PATCH 08/12] trace-cmd analyze: Track migration Steven Rostedt
2022-03-24  2:57 ` [PATCH 09/12] trace-cmd analyze: Add wake up latency timings Steven Rostedt
2022-03-25 19:34   ` John Keeping
2022-03-25 20:13     ` Steven Rostedt
2022-03-25 20:31       ` Steven Rostedt
2022-03-26 11:14         ` John Keeping
2022-03-26 15:32           ` Steven Rostedt
2022-03-24  2:57 ` [PATCH 10/12] trace-cmd analyze: Add counting of page faults Steven Rostedt
2022-03-24  2:57 ` [PATCH 11/12] trace-cmd analyze: Account for dropped events Steven Rostedt
2022-03-24  2:57 ` [PATCH 12/12] trace-cmd analyze: Add documentation Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAC_TJvfrQMyhSRHh7_z_Vx_6KDELQhyYnapyTBRDMBtKvCuitw@mail.gmail.com \
    --to=kaleshsingh@google.com \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=williskung@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).