public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: kan.liang@intel.com
Cc: peterz@infradead.org, mingo@redhat.com,
	linux-kernel@vger.kernel.org, jolsa@kernel.org,
	wangnan0@huawei.com, hekuang@huawei.com, namhyung@kernel.org,
	alexander.shishkin@linux.intel.com, adrian.hunter@intel.com,
	ak@linux.intel.com
Subject: Re: [PATCH 3/4] perf record: event synthesization multithreading support
Date: Fri, 13 Oct 2017 11:38:56 -0300	[thread overview]
Message-ID: <20171013143856.GP3503@kernel.org> (raw)
In-Reply-To: <1507903767-293944-4-git-send-email-kan.liang@intel.com>

Em Fri, Oct 13, 2017 at 07:09:26AM -0700, kan.liang@intel.com escreveu:
> From: Kan Liang <Kan.liang@intel.com>
> 
> The process function process_synthesized_event writes the process
> result to perf.data, which is not multithreading friendly.
> 
> Realloc buffer for each thread to temporarily keep the processing
> result. Write them to the perf.data at the end of event synthesization.
> The new method doesn't impact the final result, because
>  - The order of the synthesized event is not important.
>  - The number of synthesized event is limited. Usually, it only needs
>    hundreds of Kilobyte to store all the synthesized event.
>    It's unlikly failed because of lack of memory.

Why not write to a per cpu file and then at the end merge them? Leave
the memory management to the kernel, i.e. in most cases you may even not
end up touching the disk, when memory is plentiful, just rewind the per
event files and go on dumping to the main perf.data file.

At some point we may just don't do this merging, and keep per cpu files
all the way to perf report, etc. This would be a first foray into
that...

- Arnaldo
 
> The threads number hard code to online CPU number. The following patch
> will introduce an option to set it.
> 
> The multithreading synthesize is only available for per cpu monitoring.
> 
> Signed-off-by: Kan Liang <Kan.liang@intel.com>
> ---
>  tools/perf/builtin-record.c | 86 ++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 81 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 4ede9bf..1978021 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -62,6 +62,11 @@ struct switch_output {
>  	bool		 set;
>  };
>  
> +struct synthesize_buf {
> +	void		*entry;
> +	size_t		size;
> +};
> +
>  struct record {
>  	struct perf_tool	tool;
>  	struct record_opts	opts;
> @@ -80,6 +85,7 @@ struct record {
>  	bool			timestamp_filename;
>  	struct switch_output	switch_output;
>  	unsigned long long	samples;
> +	struct synthesize_buf	*buf;
>  };
>  
>  static volatile int auxtrace_record__snapshot_started;
> @@ -120,14 +126,35 @@ static int record__write(struct record *rec, void *bf, size_t size)
>  	return 0;
>  }
>  
> +static int buf__write(struct record *rec, int idx, void *bf, size_t size)
> +{
> +	size_t target_size = rec->buf[idx].size;
> +	void *target;
> +
> +	target = realloc(rec->buf[idx].entry, target_size + size);
> +	if (target == NULL)
> +		return -ENOMEM;
> +
> +	memcpy(target + target_size, bf, size);
> +
> +	rec->buf[idx].entry = target;
> +	rec->buf[idx].size += size;
> +
> +	return 0;
> +}
> +
>  static int process_synthesized_event(struct perf_tool *tool,
>  				     union perf_event *event,
>  				     struct perf_sample *sample __maybe_unused,
>  				     struct machine *machine __maybe_unused,
> -				     struct thread_info *thread __maybe_unused)
> +				     struct thread_info *thread)
>  {
>  	struct record *rec = container_of(tool, struct record, tool);
> -	return record__write(rec, event, event->header.size);
> +
> +	if (!perf_singlethreaded && thread)
> +		return buf__write(rec, thread->idx, event, event->header.size);
> +	else
> +		return record__write(rec, event, event->header.size);
>  }
>  
>  static int record__pushfn(void *to, void *bf, size_t size)
> @@ -690,6 +717,48 @@ static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
>  	return NULL;
>  }
>  
> +static int record__multithread_synthesize(struct record *rec,
> +					  struct machine *machine,
> +					  struct perf_tool *tool,
> +					  struct record_opts *opts)
> +{
> +	int i, err, nr_thread = sysconf(_SC_NPROCESSORS_ONLN);
> +
> +	if (nr_thread > 1) {
> +		perf_set_multithreaded();
> +
> +		rec->buf = calloc(nr_thread, sizeof(struct synthesize_buf));
> +		if (rec->buf == NULL) {
> +			pr_err("Could not do multithread synthesize\n");
> +			nr_thread = 1;
> +			perf_set_singlethreaded();
> +		}
> +	}
> +
> +	err = __machine__synthesize_threads(machine, tool, &opts->target,
> +					    rec->evlist->threads,
> +					    process_synthesized_event,
> +					    opts->sample_address,
> +					    opts->proc_map_timeout, nr_thread);
> +	if (err < 0)
> +		goto free;
> +
> +	if (nr_thread > 1) {
> +		for (i = 0; i < nr_thread; i++)
> +			record__write(rec, rec->buf[i].entry, rec->buf[i].size);
> +	}
> +
> +free:
> +	if (nr_thread > 1) {
> +		for (i = 0; i < nr_thread; i++)
> +			free(rec->buf[i].entry);
> +		free(rec->buf);
> +		perf_set_singlethreaded();
> +	}
> +
> +	return err;
> +}
> +
>  static int record__synthesize(struct record *rec, bool tail)
>  {
>  	struct perf_session *session = rec->session;
> @@ -766,9 +835,16 @@ static int record__synthesize(struct record *rec, bool tail)
>  					 perf_event__synthesize_guest_os, tool);
>  	}
>  
> -	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
> -					    process_synthesized_event, opts->sample_address,
> -					    opts->proc_map_timeout, 1);
> +	/* multithreading synthesize is only available for cpu monitoring */
> +	if (target__has_cpu(&opts->target))
> +		err = record__multithread_synthesize(rec, machine, tool, opts);
> +	else
> +		err = __machine__synthesize_threads(machine, tool,
> +						    &opts->target,
> +						    rec->evlist->threads,
> +						    process_synthesized_event,
> +						    opts->sample_address,
> +						    opts->proc_map_timeout, 1);
>  out:
>  	return err;
>  }
> -- 
> 2.7.4

  reply	other threads:[~2017-10-13 14:39 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-13 14:09 [PATCH 0/4] event synthesization multithreading for perf record kan.liang
2017-10-13 14:09 ` [PATCH 1/4] perf tools: pass thread info to process function kan.liang
2017-10-13 14:09 ` [PATCH 2/4] perf tools: pass thread info in event synthesization kan.liang
2017-10-13 14:09 ` [PATCH 3/4] perf record: event synthesization multithreading support kan.liang
2017-10-13 14:38   ` Arnaldo Carvalho de Melo [this message]
2017-10-13 14:58     ` Liang, Kan
2017-10-13 15:09       ` Arnaldo Carvalho de Melo
2017-10-13 20:06     ` Ingo Molnar
2017-10-13 14:09 ` [PATCH 4/4] perf record: add option to set the number of thread for event synthesize kan.liang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171013143856.GP3503@kernel.org \
    --to=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=hekuang@huawei.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=wangnan0@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox