Re: [PATCH 01/12] trace-cmd: Add trace-cmd analyze

public inbox for linux-rt-users@vger.kernel.org
 help / color / mirror / Atom feed

From: Steven Rostedt <rostedt@goodmis.org>
To: Kalesh Singh <kaleshsingh@google.com>
Cc: linux-trace-devel@vger.kernel.org, williskung@google.com,
	linux-rt-users@vger.kernel.org
Subject: Re: [PATCH 01/12] trace-cmd: Add trace-cmd analyze
Date: Mon, 28 Mar 2022 12:34:34 -0400	[thread overview]
Message-ID: <20220328123434.301a5ce4@gandalf.local.home> (raw)
In-Reply-To: <CAC_TJvfrQMyhSRHh7_z_Vx_6KDELQhyYnapyTBRDMBtKvCuitw@mail.gmail.com>

On Mon, 28 Mar 2022 09:08:02 -0700
Kalesh Singh <kaleshsingh@google.com> wrote:

> > +++ b/tracecmd/trace-analyze.c
> > @@ -0,0 +1,508 @@
> > +// SPDX-License-Identifier: LGPL-2.1
> > +/*
> > + * Copyright (C) 2022 Google Inc, Steven Rostedt <srostedt@redhat.com>
> > + */
> > +#define _LARGEFILE64_SOURCE
> > +#include <getopt.h>
> > +#include <stdlib.h>
> > +
> > +#include "trace-cmd-local.h"
> > +#include "trace-local.h"
> > +#include "trace-hash.h"
> > +#include "list.h"
> > +
> > +static bool time_in_nsecs;
> > +
> > +struct analysis_data;
> > +
> > +struct cpu_data {
> > +       unsigned long long      start_ts;
> > +       unsigned long long      last_ts;
> > +       struct analysis_data    *data;
> > +       struct trace_hash       tasks;
> > +       int                     nr_tasks;
> > +       int                     cpu;
> > +       int                     current_pid;
> > +       int                     first_pid;
> > +};
> > +
> > +struct analysis_data {
> > +       unsigned long long      start_ts;
> > +       unsigned long long      last_ts;
> > +       struct cpu_data         *cpu_data;
> > +       struct trace_hash       tasks;
> > +       int                     nr_tasks;
> > +       int                     allocated_cpus;
> > +       int                     cpus;  
> 
> Hi Steve,
> 
> I don't see analysis_data->cpus field ever used.

Probably not. I was debating about adding RFC to this series, but decided
not to. Now I regret that, because it really is an RFC (as well as I rushed
a lot of it).

> 
> > +};
> > +
> > +struct task_item {
> > +       unsigned long long      runtime;
> > +       unsigned long long      start_ts;
> > +       struct trace_hash_item  hash;
> > +       int                     pid;
> > +};
> > +
> > +struct task_cpu_item {
> > +       struct trace_hash_item  hash;
> > +       struct task_item        *task;
> > +};
> > +
> > +#define CPU_BLOCKS     32
> > +
> > +#define task_from_hash(item) container_of(item, struct task_item, hash)
> > +#define task_cpu_from_hash(item)  container_of(item, struct task_cpu_item, hash)
> > +
> > +static struct cpu_data *get_cpu_data(struct analysis_data *data,
> > +                                    struct tep_record *record)
> > +{
> > +       struct cpu_data *cpu_data;
> > +       int cpu = record->cpu;
> > +       int cnt;
> > +
> > +       if (cpu < data->allocated_cpus) {
> > +               cpu_data = &data->cpu_data[cpu];
> > +               if (!cpu_data->data)
> > +                       goto init_cpu_data;
> > +               return cpu_data;
> > +       }
> > +
> > +       /* Round up to CPU_BLOCKS multiplier */
> > +       cnt = ((cpu + CPU_BLOCKS) / CPU_BLOCKS) * CPU_BLOCKS;
> > +
> > +       cpu_data = realloc(data->cpu_data, sizeof(*cpu_data) * cnt);
> > +
> > +       if (!cpu_data)
> > +               die("Allocating cpu size %d for cpu %d", cnt, cpu);
> > +
> > +       memset(cpu_data + data->allocated_cpus, 0,
> > +              sizeof(*cpu_data) * (cnt - data->allocated_cpus));
> > +
> > +       data->allocated_cpus = cnt;
> > +
> > +       data->cpu_data = cpu_data;
> > +       cpu_data += cpu;
> > +
> > + init_cpu_data:
> > +       cpu_data->current_pid = -1;
> > +       cpu_data->cpu = cpu;
> > +       cpu_data->data = data;
> > +
> > +       trace_hash_init(&cpu_data->tasks, 32);  
> 
> nit: #define CPU_TASKS_HASH_SIZE 32?

Agreed.

> 
> > +
> > +       return cpu_data;
> > +}
> > +
> > +static int check_idle(struct cpu_data *cpu_data, int pid)
> > +{
> > +       if (pid)
> > +               return pid;
> > +
> > +       /*
> > +        * Since pid 0 is the per cpu swapper task that
> > +        * means several of these tasks have the same pid
> > +        * and only differentiate between CPUs. Set the pid
> > +        * that is stored in the hash as -2 - CPU id.
> > +        */
> > +
> > +       return -2 - cpu_data->cpu;
> > +}
> > +
> > +static struct task_item *get_task(struct cpu_data *cpu_data, int pid)
> > +{
> > +       struct trace_hash_item *hash;
> > +       struct task_item *task;
> > +       int find_pid;
> > +
> > +       find_pid = check_idle(cpu_data, pid);
> > +
> > +       hash = trace_hash_find(&cpu_data->data->tasks, find_pid, NULL, NULL);
> > +       if (!hash) {
> > +               task = calloc(sizeof(*task), 1);
> > +               if (!task)
> > +                       die("allocating task");
> > +               task->pid = find_pid;
> > +               hash = &task->hash;
> > +               hash->key = find_pid;
> > +               cpu_data->data->nr_tasks++;
> > +               trace_hash_add(&cpu_data->data->tasks, hash);
> > +       }
> > +
> > +       return task_from_hash(hash);
> > +}
> > +
> > +static struct task_cpu_item *get_cpu_task(struct cpu_data *cpu_data, int pid)
> > +{
> > +       struct trace_hash_item *hash;
> > +       struct task_cpu_item *task;
> > +
> > +       hash = trace_hash_find(&cpu_data->tasks, pid, NULL, NULL);
> > +       if (!hash) {
> > +               task = calloc(sizeof(*task), 1);
> > +               if (!task)
> > +                       die("allocating cpu task");
> > +               task->task = get_task(cpu_data, pid);
> > +               hash = &task->hash;
> > +               hash->key = pid;
> > +               cpu_data->nr_tasks++;
> > +               trace_hash_add(&cpu_data->tasks, hash);
> > +       }
> > +
> > +       return task_cpu_from_hash(hash);
> > +};
> > +
> > +/* Update times for a task scheduling off the CPU */
> > +static void update_cpu_task_times(struct cpu_data *cpu_data,
> > +                                 struct task_cpu_item *cpu_task,
> > +                                 unsigned long long ts)
> > +{
> > +       unsigned long long delta;
> > +       struct task_item *task = cpu_task->task;
> > +
> > +       /*
> > +        * If the last event was a sched switch where the previous task
> > +        * ran on another CPU, and migrated back to this CPU, and sched
> > +        * switch was not recorded (saying that this task scheduled off)
> > +        * It could be miscronstrued to still be on this CPU, and that
> > +        * its start_ts is later than the last_ts of this CPU.
> > +        */
> > +       if (ts < task->start_ts)
> > +               return;
> > +
> > +       delta = ts - task->start_ts;
> > +       task->runtime += delta;
> > +}
> > +
> > +static void update_pid(struct cpu_data *cpu_data,
> > +                      struct tep_record *record, int pid)
> > +{
> > +       struct task_cpu_item *cpu_task;
> > +       struct task_item *task;
> > +       int curr_pid = cpu_data->current_pid;
> > +       unsigned long long delta;
> > +
> > +       cpu_task = get_cpu_task(cpu_data, curr_pid);
> > +       task = cpu_task->task;
> > +
> > +       update_cpu_task_times(cpu_data, cpu_task, cpu_data->last_ts);
> > +
> > +       if (!record)
> > +               return;
> > +
> > +       cpu_task = get_cpu_task(cpu_data, pid);
> > +       task = cpu_task->task;
> > +
> > +       if (record->ts < cpu_data->last_ts) {
> > +               tracecmd_warning("task %d start time %llu greater than CPU time %llu",
> > +                                pid, record->ts, cpu_data->last_ts);  
> 
> Shouldn’t this say, ... start time %llu "less than" CPU time ...?

Yeah, that wording is confusing.

	"task %d CPU start time %llu is before last event %llu on CPU for a different task"

?

> 
> > +       } else {
> > +               delta = record->ts - cpu_data->last_ts;
> > +               task->runtime += delta;
> > +       }
> > +
> > +       cpu_data->last_ts = record->ts;
> > +
> > +       task->start_ts = cpu_data->last_ts;
> > +       cpu_data->current_pid = pid;
> > +}
> > +
> > +static void update_cpu_times(struct cpu_data *cpu_data,
> > +                            struct tep_handle *tep, int pid,
> > +                            struct tep_record *record)
> > +{
> > +       struct task_cpu_item *cpu_task;
> > +       struct task_item *task;
> > +
> > +       if (cpu_data->current_pid < 0) {
> > +               /* First time called */
> > +               cpu_data->start_ts = record->ts;
> > +               cpu_data->last_ts = record->ts;
> > +               cpu_data->current_pid = pid;
> > +               cpu_data->first_pid = pid;
> > +               cpu_task = get_cpu_task(cpu_data, pid);
> > +               task = cpu_task->task;
> > +               task->start_ts = record->ts;
> > +               if (record->ts < cpu_data->data->start_ts)
> > +                       cpu_data->data->start_ts = record->ts;
> > +               return;
> > +       }
> > +
> > +       if (pid != cpu_data->current_pid) {
> > +               update_pid(cpu_data, record, pid);
> > +               return;
> > +       }
> > +
> > +       cpu_data->last_ts = record->ts;
> > +}
> > +
> > +static void update_first_pid(struct cpu_data *cpu_data)
> > +{
> > +       struct task_cpu_item *cpu_task;
> > +       struct task_item *task;
> > +       unsigned long long start_ts = cpu_data->data->start_ts;
> > +       unsigned long long delta;
> > +
> > +       cpu_task = get_cpu_task(cpu_data, cpu_data->first_pid);
> > +       task = cpu_task->task;
> > +       delta = cpu_data->start_ts - start_ts;
> > +       task->runtime += delta;
> > +       cpu_data->start_ts = start_ts;
> > +}
> > +
> > +static void process_cpu(struct analysis_data *data,
> > +                       struct tep_handle *tep,
> > +                       struct tep_record *record)
> > +{
> > +       struct cpu_data *cpu_data;
> > +       int pid;
> > +
> > +       pid = tep_data_pid(tep, record);
> > +       if (pid < 0) /* Warn? */
> > +               return;
> > +
> > +       cpu_data = get_cpu_data(data, record);
> > +       update_cpu_times(cpu_data, tep, pid, record);
> > +}
> > +
> > +static void print_time(unsigned long long ts, char delim)
> > +{
> > +       unsigned long long secs;
> > +       unsigned long long msecs;
> > +       unsigned long long usecs;
> > +       unsigned long long nsecs;
> > +
> > +       secs = ts / 1000000000;
> > +       ts -= secs * 1000000000;
> > +
> > +       msecs = ts / 1000000;
> > +       ts -= msecs * 1000000;
> > +
> > +       usecs = ts / 1000;
> > +       ts -= usecs * 1000;
> > +
> > +       nsecs = ts;
> > +
> > +       if (delim) {
> > +               printf("%6llu.%03llu%c%03llu",
> > +                      secs, msecs, delim, usecs);
> > +               if (time_in_nsecs)
> > +                       printf("%c%03llu", delim, nsecs);
> > +       } else {
> > +               printf("%6llu.%03llu%03llu",
> > +                      secs, msecs, usecs);
> > +               if (time_in_nsecs)
> > +                       printf("%03llu", nsecs);
> > +       }
> > +}
> > +
> > +static void print_total(struct tep_handle *tep, struct analysis_data *data)
> > +{
> > +       unsigned long long total_time;
> > +       struct trace_hash_item **bucket;
> > +       struct trace_hash_item *item;
> > +       struct task_item **idle_tasks;
> > +       struct task_item *task;
> > +       bool first = true;
> > +       int cpu;
> > +       int i = 0;
> > +
> > +       total_time = data->last_ts - data->start_ts;
> > +       printf("\nTotal time: ");
> > +       print_time(total_time, '_');
> > +       printf("\n");
> > +
> > +       idle_tasks = calloc(sizeof(*idle_tasks), data->allocated_cpus);
> > +       if (!idle_tasks)
> > +               die("Could not allocate idle task array");
> > +
> > +       trace_hash_for_each_bucket(bucket, &data->tasks) {
> > +               trace_hash_for_each_item(item, bucket) {
> > +                       task = task_from_hash(item);
> > +                       if (task->pid < 0) {
> > +                               cpu = -2 - task->pid;
> > +                               idle_tasks[cpu] = task;
> > +                       }
> > +               }
> > +       }
> > +
> > +       for (i = 0; i < data->allocated_cpus; i++) {
> > +               if (!idle_tasks[i])
> > +                       continue;
> > +
> > +               if (first) {
> > +                       printf("\n Idle CPU\t     Run time\n");
> > +                       printf(" --------\t     --------\n");
> > +                       first = false;
> > +               }
> > +               printf("CPU %d idle:\t", i);
> > +               print_time(idle_tasks[i]->runtime, '_');
> > +               printf(" (%%%lld)\n", (idle_tasks[i]->runtime * 100) / total_time);
> > +       }
> > +       free(idle_tasks);
> > +}
> > +
> > +static void free_tasks(struct trace_hash *hash)
> > +{
> > +       struct trace_hash_item **bucket;
> > +       struct trace_hash_item *item;
> > +       struct task_item *task;
> > +
> > +       trace_hash_for_each_bucket(bucket, hash) {
> > +               trace_hash_while_item(item, bucket) {
> > +                       task = task_from_hash(item);
> > +                       trace_hash_del(item);
> > +                       free(task);
> > +               }
> > +       }
> > +       trace_hash_free(hash);
> > +}
> > +
> > +static void free_cpu_tasks(struct trace_hash *hash)
> > +{
> > +       struct trace_hash_item **bucket;
> > +       struct trace_hash_item *item;
> > +       struct task_cpu_item *cpu_task;
> > +
> > +       trace_hash_for_each_bucket(bucket, hash) {
> > +               trace_hash_while_item(item, bucket) {
> > +                       cpu_task = task_cpu_from_hash(item);
> > +                       trace_hash_del(item);
> > +                       free(cpu_task);
> > +               }
> > +       }
> > +       trace_hash_free(hash);
> > +}
> > +
> > +static void free_cpus(struct analysis_data *data)
> > +{
> > +       struct cpu_data *cpu_data;
> > +       int i;
> > +
> > +       for (i = 0; i < data->allocated_cpus; i++) {
> > +               cpu_data = &data->cpu_data[i];
> > +               if (!cpu_data->data)
> > +                       continue;
> > +               free_cpu_tasks(&cpu_data->tasks);
> > +       }
> > +       free(data->cpu_data);
> > +}
> > +
> > +static void do_trace_analyze(struct tracecmd_input *handle)
> > +{
> > +       struct tep_handle *tep = tracecmd_get_tep(handle);
> > +       struct tep_record *record;
> > +       struct analysis_data data;
> > +       struct cpu_data *cpu_data;
> > +       int i;
> > +
> > +       memset(&data, 0, sizeof(data));
> > +
> > +       trace_hash_init(&data.tasks, 128);
> > +
> > +       /* Set to a very large number */
> > +       data.start_ts = -1ULL;
> > +
> > +       do {
> > +               record = tracecmd_read_next_data(handle, NULL);
> > +               if (record)
> > +                       process_cpu(&data, tep, record);
> > +               tracecmd_free_record(record);
> > +       } while (record);
> > +
> > +       for (i = 0; i < data.allocated_cpus; i++) {
> > +               cpu_data = &data.cpu_data[i];
> > +               if (!cpu_data->data || !cpu_data->nr_tasks)
> > +                       continue;
> > +               if (cpu_data->last_ts > data.last_ts)
> > +                       data.last_ts = cpu_data->last_ts;
> > +       }
> > +
> > +       for (i = 0; i < data.allocated_cpus; i++) {
> > +               cpu_data = &data.cpu_data[i];
> > +               if (!cpu_data->data || !cpu_data->nr_tasks)
> > +                       continue;
> > +               cpu_data->last_ts = data.last_ts;
> > +               update_pid(cpu_data, NULL, -1);  
> 
> I think having update_last_pid() makes this easier to read.

You mean to add a helper function that does the last pid and have
update_pid() start with that?

I could do that.

Thanks for looking at this.

Note, I'm going to rewrite this so that this is actually part of the
library, to allow other applications to have access to this functionality.

-- Steve

next prev parent reply	other threads:[~2022-03-28 16:34 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-24  2:57 [PATCH 00/12] trace-cmd: Add trace-cmd analyze command Steven Rostedt
2022-03-24  2:57 ` [PATCH 01/12] trace-cmd: Add trace-cmd analyze Steven Rostedt
2022-03-28 16:08   ` Kalesh Singh
2022-03-28 16:34     ` Steven Rostedt [this message]
2022-03-24  2:57 ` [PATCH 02/12] trace-cmd analyze: Show what tasks are running the most Steven Rostedt
2022-03-24  2:57 ` [PATCH 03/12] trace-cmd analyze: Show how much tasks run on each CPU Steven Rostedt
2022-03-25 19:36   ` John Keeping
2022-03-25 20:18     ` Steven Rostedt
2022-03-26 11:07       ` John Keeping
2022-03-26 15:16         ` Steven Rostedt
2022-03-28 16:23           ` Kalesh Singh
2022-03-24  2:57 ` [PATCH 04/12] trace-cmd analyze: Use sched_switch to find comm mappings Steven Rostedt
2022-03-24  2:57 ` [PATCH 05/12] trace-cmd analyze: Use sched_switch event to update times Steven Rostedt
2022-03-24  2:57 ` [PATCH 06/12] trace-cmd analyze: Add tracing of tasks and their states Steven Rostedt
2022-03-25 19:37   ` John Keeping
2022-03-25 20:19     ` Steven Rostedt
2022-03-24  2:57 ` [PATCH 07/12] trace-cmd analyze: Add "idleness" Steven Rostedt
2022-03-24  2:57 ` [PATCH 08/12] trace-cmd analyze: Track migration Steven Rostedt
2022-03-24  2:57 ` [PATCH 09/12] trace-cmd analyze: Add wake up latency timings Steven Rostedt
2022-03-25 19:34   ` John Keeping
2022-03-25 20:13     ` Steven Rostedt
2022-03-25 20:31       ` Steven Rostedt
2022-03-26 11:14         ` John Keeping
2022-03-26 15:32           ` Steven Rostedt
2022-03-24  2:57 ` [PATCH 10/12] trace-cmd analyze: Add counting of page faults Steven Rostedt
2022-03-24  2:57 ` [PATCH 11/12] trace-cmd analyze: Account for dropped events Steven Rostedt
2022-03-24  2:57 ` [PATCH 12/12] trace-cmd analyze: Add documentation Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220328123434.301a5ce4@gandalf.local.home \
    --to=rostedt@goodmis.org \
    --cc=kaleshsingh@google.com \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=williskung@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox