From: Adrian Hunter <adrian.hunter@intel.com>
To: Tavian Barnes <tavianator@tavianator.com>,
linux-perf-users@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Jiri Olsa <jolsa@kernel.org>, Ian Rogers <irogers@google.com>,
"Liang, Kan" <kan.liang@linux.intel.com>,
Andrew Kreimer <algonell@gmail.com>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH] perf intel-pt: don't zero the whole perf_sample
Date: Mon, 13 Jan 2025 10:15:16 +0200 [thread overview]
Message-ID: <c29a4cdc-9419-4f1e-92bc-a75c3b10e37d@intel.com> (raw)
In-Reply-To: <d841b97b3ad2ca8bcab07e4293375fb7c32dfce7.1736618095.git.tavianator@tavianator.com>
On 11/01/25 19:56, Tavian Barnes wrote:
> C designated initializers like
>
> struct perf_sample sample = { .ip = 0, };
>
> set every unmentioned field of the struct to zero. But since
> sizeof(struct perf_sample) == 1384, this takes a long time.
>
> struct perf_sample does not need to be fully initialized, and even
Yes it does need to be fully initialized. Leaving members
uninitialized in the hope that they never get used adds to
code complexity e.g. how do you know they never are used,
or future members never will be used.
> .ip = 0 is unnecessary because intel_pt_prep_*_sample() will initialize
> it. Skipping the initialization saves about 2.5% of the execution time
> when running
>
> $ perf script --itrace=i0
>
> Signed-off-by: Tavian Barnes <tavianator@tavianator.com>
> ---
> tools/perf/util/intel-pt.c | 28 ++++++++++++++--------------
> 1 file changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
> index 30be6dfe09eb..c829398c5bb9 100644
> --- a/tools/perf/util/intel-pt.c
> +++ b/tools/perf/util/intel-pt.c
> @@ -1764,7 +1764,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct dummy_branch_stack {
> u64 nr;
> u64 hw_idx;
> @@ -1835,7 +1835,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
>
> if (intel_pt_skip_event(pt))
> return 0;
> @@ -1867,7 +1867,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> u64 period = 0;
>
> if (ptq->sample_ipc)
> @@ -1894,7 +1894,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
>
> if (intel_pt_skip_event(pt))
> return 0;
> @@ -1927,7 +1927,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct perf_synth_intel_ptwrite raw;
>
> if (intel_pt_skip_event(pt))
> @@ -1953,7 +1953,7 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct perf_synth_intel_cbr raw;
> u32 flags;
>
> @@ -1983,7 +1983,7 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct perf_synth_intel_psb raw;
>
> if (intel_pt_skip_event(pt))
> @@ -2009,7 +2009,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct perf_synth_intel_mwait raw;
>
> if (intel_pt_skip_event(pt))
> @@ -2034,7 +2034,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct perf_synth_intel_pwre raw;
>
> if (intel_pt_skip_event(pt))
> @@ -2059,7 +2059,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct perf_synth_intel_exstop raw;
>
> if (intel_pt_skip_event(pt))
> @@ -2084,7 +2084,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct perf_synth_intel_pwrx raw;
>
> if (intel_pt_skip_event(pt))
> @@ -2235,7 +2235,7 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
> static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
> {
> const struct intel_pt_blk_items *items = &ptq->state->items;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> union perf_event *event = ptq->event_buf;
> struct intel_pt *pt = ptq->pt;
> u64 sample_type = evsel->core.attr.sample_type;
> @@ -2407,7 +2407,7 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct {
> struct perf_synth_intel_evt cfe;
> struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
> @@ -2446,7 +2446,7 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
> {
> struct intel_pt *pt = ptq->pt;
> union perf_event *event = ptq->event_buf;
> - struct perf_sample sample = { .ip = 0, };
> + struct perf_sample sample;
> struct perf_synth_intel_iflag_chg raw;
>
> if (intel_pt_skip_event(pt))
next prev parent reply other threads:[~2025-01-13 8:15 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-11 17:56 [PATCH] perf intel-pt: don't zero the whole perf_sample Tavian Barnes
2025-01-13 8:15 ` Adrian Hunter [this message]
2025-01-13 16:26 ` Tavian Barnes
2025-01-13 17:30 ` Ian Rogers
2025-01-13 19:45 ` Ian Rogers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c29a4cdc-9419-4f1e-92bc-a75c3b10e37d@intel.com \
--to=adrian.hunter@intel.com \
--cc=acme@kernel.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=algonell@gmail.com \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=tavianator@tavianator.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox