* [PATCH] perf utilities: cln_size header
@ 2026-02-14 4:07 Ricky Ringler
2026-03-04 1:25 ` Namhyung Kim
0 siblings, 1 reply; 15+ messages in thread
From: Ricky Ringler @ 2026-02-14 4:07 UTC (permalink / raw)
To: acme; +Cc: namhyung, peterz, mingo, linux-kernel, linux-perf-users,
Ricky Ringler
Store cacheline size during perf record in header, so
that cacheline size can be used for other features, like
sort.
Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
Testing:
- Built perf
- Ran record + report with feat enabled
- Ran record + report with feat disabled
Tested-by: Ricky Ringler <ricky.ringler@proton.me>
Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
---
tools/perf/builtin-inject.c | 1 +
tools/perf/util/env.h | 1 +
| 29 +++++++++++++++++++++++++++++
| 1 +
tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++-----------
5 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index aa7be4fb5838..9639154459d9 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2047,6 +2047,7 @@ static bool keep_feat(int feat)
case HEADER_CLOCK_DATA:
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
+ case HEADER_CLN_SIZE:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 9977b85523a8..04580c64847b 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -93,6 +93,7 @@ struct perf_env {
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
int caches_cnt;
+ unsigned int cln_size;
u32 comp_ratio;
u32 comp_ver;
u32 comp_type;
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f5cad377c99e..ad15829acc69 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -54,6 +54,7 @@
#include "bpf-event.h"
#include "bpf-utils.h"
#include "clockid.h"
+#include "cacheline.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -1288,6 +1289,18 @@ static int write_cache(struct feat_fd *ff,
return ret;
}
+static int write_cln_size(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ int cln_size = cacheline_size();
+
+ if (!cln_size)
+ cln_size = 0;
+ ff->ph->env.cln_size = cln_size;
+
+ return do_write(ff, &cln_size, sizeof(cln_size));
+}
+
static int write_stat(struct feat_fd *ff __maybe_unused,
struct evlist *evlist __maybe_unused)
{
@@ -2084,6 +2097,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
+static void print_cln_size(struct feat_fd *ff, FILE *fp __maybe_unused)
+{
+ fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
+}
+
static void print_compressed(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
@@ -2933,6 +2951,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
return -1;
}
+static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+
+ if (do_read_u32(ff, &env->cln_size))
+ return -1;
+
+ return 0;
+}
+
static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_session *session;
@@ -3453,6 +3481,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(CLOCK_DATA, clock_data, false),
FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true),
FEAT_OPR(PMU_CAPS, pmu_caps, false),
+ FEAT_OPR(CLN_SIZE, cln_size, false),
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index c058021c3150..04394331630f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -53,6 +53,7 @@ enum {
HEADER_CLOCK_DATA,
HEADER_HYBRID_TOPOLOGY,
HEADER_PMU_CAPS,
+ HEADER_CLN_SIZE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index aa79eb6476dd..e636b9f88e5c 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -30,6 +30,7 @@
#include "time-utils.h"
#include "cgroup.h"
#include "machine.h"
+#include "session.h"
#include "trace-event.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -2474,7 +2475,26 @@ struct sort_entry sort_type_offset = {
/* --sort typecln */
-#define DEFAULT_CACHELINE_SIZE 64
+static int
+hist_entry__cln_size(struct hist_entry *he)
+{
+ int ret = 0;
+
+ if (he && he->hists) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+ if (evsel && evsel->evlist->session && evsel->evlist->session)
+ ret = evsel->evlist->session->header.env.cln_size;
+ }
+
+ if (!ret || ret < 1) {
+ int default_cacheline_size = 64; // avoid div/0 later
+
+ ret = default_cacheline_size;
+ }
+
+ return ret;
+}
static int64_t
sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
@@ -2482,11 +2502,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
struct annotated_data_type *left_type = left->mem_type;
struct annotated_data_type *right_type = right->mem_type;
int64_t left_cln, right_cln;
+ int64_t cln_size_left = hist_entry__cln_size(left);
+ int64_t cln_size_right = hist_entry__cln_size(right);
int64_t ret;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
if (!left_type) {
sort__type_init(left);
@@ -2502,8 +2520,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
if (ret)
return ret;
- left_cln = left->mem_type_off / cln_size;
- right_cln = right->mem_type_off / cln_size;
+ left_cln = left->mem_type_off / cln_size_left;
+ right_cln = right->mem_type_off / cln_size_right;
return left_cln - right_cln;
}
@@ -2511,10 +2529,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
+ int cln_size = hist_entry__cln_size(he);
return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
he->mem_type_off / cln_size);
--
2.53.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH] perf utilities: cln_size header
2026-02-14 4:07 [PATCH] perf utilities: cln_size header Ricky Ringler
@ 2026-03-04 1:25 ` Namhyung Kim
2026-03-05 23:57 ` [PATCH v2] " Ricky Ringler
0 siblings, 1 reply; 15+ messages in thread
From: Namhyung Kim @ 2026-03-04 1:25 UTC (permalink / raw)
To: Ricky Ringler; +Cc: acme, peterz, mingo, linux-kernel, linux-perf-users
Hello,
On Sat, Feb 14, 2026 at 04:07:25AM +0000, Ricky Ringler wrote:
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
>
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
>
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled
Sorry for the delay. Can you please rebase this on top of the current
perf-tools-next? We added a couple more header features recently.
Thanks,
Namhyung
>
> Tested-by: Ricky Ringler <ricky.ringler@proton.me>
> Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
> ---
> tools/perf/builtin-inject.c | 1 +
> tools/perf/util/env.h | 1 +
> tools/perf/util/header.c | 29 +++++++++++++++++++++++++++++
> tools/perf/util/header.h | 1 +
> tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++-----------
> 5 files changed, 58 insertions(+), 11 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index aa7be4fb5838..9639154459d9 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2047,6 +2047,7 @@ static bool keep_feat(int feat)
> case HEADER_CLOCK_DATA:
> case HEADER_HYBRID_TOPOLOGY:
> case HEADER_PMU_CAPS:
> + case HEADER_CLN_SIZE:
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index 9977b85523a8..04580c64847b 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -93,6 +93,7 @@ struct perf_env {
> struct cpu_topology_map *cpu;
> struct cpu_cache_level *caches;
> int caches_cnt;
> + unsigned int cln_size;
> u32 comp_ratio;
> u32 comp_ver;
> u32 comp_type;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index f5cad377c99e..ad15829acc69 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -54,6 +54,7 @@
> #include "bpf-event.h"
> #include "bpf-utils.h"
> #include "clockid.h"
> +#include "cacheline.h"
>
> #include <linux/ctype.h>
> #include <internal/lib.h>
> @@ -1288,6 +1289,18 @@ static int write_cache(struct feat_fd *ff,
> return ret;
> }
>
> +static int write_cln_size(struct feat_fd *ff,
> + struct evlist *evlist __maybe_unused)
> +{
> + int cln_size = cacheline_size();
> +
> + if (!cln_size)
> + cln_size = 0;
> + ff->ph->env.cln_size = cln_size;
> +
> + return do_write(ff, &cln_size, sizeof(cln_size));
> +}
> +
> static int write_stat(struct feat_fd *ff __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
> @@ -2084,6 +2097,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
> }
> }
>
> +static void print_cln_size(struct feat_fd *ff, FILE *fp __maybe_unused)
> +{
> + fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
> +}
> +
> static void print_compressed(struct feat_fd *ff, FILE *fp)
> {
> fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
> @@ -2933,6 +2951,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
> return -1;
> }
>
> +static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
> +{
> + struct perf_env *env = &ff->ph->env;
> +
> + if (do_read_u32(ff, &env->cln_size))
> + return -1;
> +
> + return 0;
> +}
> +
> static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
> {
> struct perf_session *session;
> @@ -3453,6 +3481,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
> FEAT_OPR(CLOCK_DATA, clock_data, false),
> FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true),
> FEAT_OPR(PMU_CAPS, pmu_caps, false),
> + FEAT_OPR(CLN_SIZE, cln_size, false),
> };
>
> struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index c058021c3150..04394331630f 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -53,6 +53,7 @@ enum {
> HEADER_CLOCK_DATA,
> HEADER_HYBRID_TOPOLOGY,
> HEADER_PMU_CAPS,
> + HEADER_CLN_SIZE,
> HEADER_LAST_FEATURE,
> HEADER_FEAT_BITS = 256,
> };
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index aa79eb6476dd..e636b9f88e5c 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -30,6 +30,7 @@
> #include "time-utils.h"
> #include "cgroup.h"
> #include "machine.h"
> +#include "session.h"
> #include "trace-event.h"
> #include <linux/kernel.h>
> #include <linux/string.h>
> @@ -2474,7 +2475,26 @@ struct sort_entry sort_type_offset = {
>
> /* --sort typecln */
>
> -#define DEFAULT_CACHELINE_SIZE 64
> +static int
> +hist_entry__cln_size(struct hist_entry *he)
> +{
> + int ret = 0;
> +
> + if (he && he->hists) {
> + struct evsel *evsel = hists_to_evsel(he->hists);
> +
> + if (evsel && evsel->evlist->session && evsel->evlist->session)
> + ret = evsel->evlist->session->header.env.cln_size;
> + }
> +
> + if (!ret || ret < 1) {
> + int default_cacheline_size = 64; // avoid div/0 later
> +
> + ret = default_cacheline_size;
> + }
> +
> + return ret;
> +}
>
> static int64_t
> sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> @@ -2482,11 +2502,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> struct annotated_data_type *left_type = left->mem_type;
> struct annotated_data_type *right_type = right->mem_type;
> int64_t left_cln, right_cln;
> + int64_t cln_size_left = hist_entry__cln_size(left);
> + int64_t cln_size_right = hist_entry__cln_size(right);
> int64_t ret;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
>
> if (!left_type) {
> sort__type_init(left);
> @@ -2502,8 +2520,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> if (ret)
> return ret;
>
> - left_cln = left->mem_type_off / cln_size;
> - right_cln = right->mem_type_off / cln_size;
> + left_cln = left->mem_type_off / cln_size_left;
> + right_cln = right->mem_type_off / cln_size_right;
> return left_cln - right_cln;
> }
>
> @@ -2511,10 +2529,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width __maybe_unused)
> {
> struct annotated_data_type *he_type = he->mem_type;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
> + int cln_size = hist_entry__cln_size(he);
>
> return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
> he->mem_type_off / cln_size);
> --
> 2.53.0
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH v2] perf utilities: cln_size header
2026-03-04 1:25 ` Namhyung Kim
@ 2026-03-05 23:57 ` Ricky Ringler
2026-03-06 0:12 ` Ricky Ringler
0 siblings, 1 reply; 15+ messages in thread
From: Ricky Ringler @ 2026-03-05 23:57 UTC (permalink / raw)
To: namhyung; +Cc: peterz, mingo, acme, linux-kernel, linux-perf-users,
Ricky Ringler
Store cacheline size during perf record in header, so
that cacheline size can be used for other features, like
sort.
Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
Testing:
- Built perf
- Ran record + report with feat enabled
- Ran record + report with feat disabled
Tested-by: Ricky Ringler <ricky.ringler@proton.me>
Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
---
tools/perf/builtin-inject.c | 1 +
tools/perf/util/env.h | 1 +
| 29 +++++++++++++++++++++++++++++
| 1 +
tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++-----------
5 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index aa7be4fb5838..9639154459d9 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2047,6 +2047,7 @@ static bool keep_feat(int feat)
case HEADER_CLOCK_DATA:
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
+ case HEADER_CLN_SIZE:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 9977b85523a8..04580c64847b 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -93,6 +93,7 @@ struct perf_env {
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
int caches_cnt;
+ unsigned int cln_size;
u32 comp_ratio;
u32 comp_ver;
u32 comp_type;
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f5cad377c99e..ad15829acc69 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -54,6 +54,7 @@
#include "bpf-event.h"
#include "bpf-utils.h"
#include "clockid.h"
+#include "cacheline.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -1288,6 +1289,18 @@ static int write_cache(struct feat_fd *ff,
return ret;
}
+static int write_cln_size(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ int cln_size = cacheline_size();
+
+ if (!cln_size)
+ cln_size = 0;
+ ff->ph->env.cln_size = cln_size;
+
+ return do_write(ff, &cln_size, sizeof(cln_size));
+}
+
static int write_stat(struct feat_fd *ff __maybe_unused,
struct evlist *evlist __maybe_unused)
{
@@ -2084,6 +2097,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
+static void print_cln_size(struct feat_fd *ff, FILE *fp __maybe_unused)
+{
+ fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
+}
+
static void print_compressed(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
@@ -2933,6 +2951,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
return -1;
}
+static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+
+ if (do_read_u32(ff, &env->cln_size))
+ return -1;
+
+ return 0;
+}
+
static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_session *session;
@@ -3453,6 +3481,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(CLOCK_DATA, clock_data, false),
FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true),
FEAT_OPR(PMU_CAPS, pmu_caps, false),
+ FEAT_OPR(CLN_SIZE, cln_size, false),
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index c058021c3150..04394331630f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -53,6 +53,7 @@ enum {
HEADER_CLOCK_DATA,
HEADER_HYBRID_TOPOLOGY,
HEADER_PMU_CAPS,
+ HEADER_CLN_SIZE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index aa79eb6476dd..e636b9f88e5c 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -30,6 +30,7 @@
#include "time-utils.h"
#include "cgroup.h"
#include "machine.h"
+#include "session.h"
#include "trace-event.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -2474,7 +2475,26 @@ struct sort_entry sort_type_offset = {
/* --sort typecln */
-#define DEFAULT_CACHELINE_SIZE 64
+static int
+hist_entry__cln_size(struct hist_entry *he)
+{
+ int ret = 0;
+
+ if (he && he->hists) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+ if (evsel && evsel->evlist->session && evsel->evlist->session)
+ ret = evsel->evlist->session->header.env.cln_size;
+ }
+
+ if (!ret || ret < 1) {
+ int default_cacheline_size = 64; // avoid div/0 later
+
+ ret = default_cacheline_size;
+ }
+
+ return ret;
+}
static int64_t
sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
@@ -2482,11 +2502,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
struct annotated_data_type *left_type = left->mem_type;
struct annotated_data_type *right_type = right->mem_type;
int64_t left_cln, right_cln;
+ int64_t cln_size_left = hist_entry__cln_size(left);
+ int64_t cln_size_right = hist_entry__cln_size(right);
int64_t ret;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
if (!left_type) {
sort__type_init(left);
@@ -2502,8 +2520,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
if (ret)
return ret;
- left_cln = left->mem_type_off / cln_size;
- right_cln = right->mem_type_off / cln_size;
+ left_cln = left->mem_type_off / cln_size_left;
+ right_cln = right->mem_type_off / cln_size_right;
return left_cln - right_cln;
}
@@ -2511,10 +2529,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
+ int cln_size = hist_entry__cln_size(he);
return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
he->mem_type_off / cln_size);
--
2.53.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v2] perf utilities: cln_size header
2026-03-05 23:57 ` [PATCH v2] " Ricky Ringler
@ 2026-03-06 0:12 ` Ricky Ringler
2026-03-08 17:20 ` [PATCH v3] " Ricky Ringler
0 siblings, 1 reply; 15+ messages in thread
From: Ricky Ringler @ 2026-03-06 0:12 UTC (permalink / raw)
To: namhyung; +Cc: peterz, mingo, acme, linux-kernel, linux-perf-users,
Ricky Ringler
Apologies all. I just realized I rebased on the kernel instead of perf-tools-next. Please hold off reviewing. I will submit a v3 patch rebased on perf-tools-next.
On Thursday, March 5th, 2026 at 5:57 PM, Ricky Ringler <ricky.ringler@proton.me> wrote:
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
>
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
>
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled
>
> Tested-by: Ricky Ringler <ricky.ringler@proton.me>
> Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
> ---
> tools/perf/builtin-inject.c | 1 +
> tools/perf/util/env.h | 1 +
> tools/perf/util/header.c | 29 +++++++++++++++++++++++++++++
> tools/perf/util/header.h | 1 +
> tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++-----------
> 5 files changed, 58 insertions(+), 11 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index aa7be4fb5838..9639154459d9 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2047,6 +2047,7 @@ static bool keep_feat(int feat)
> case HEADER_CLOCK_DATA:
> case HEADER_HYBRID_TOPOLOGY:
> case HEADER_PMU_CAPS:
> + case HEADER_CLN_SIZE:
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index 9977b85523a8..04580c64847b 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -93,6 +93,7 @@ struct perf_env {
> struct cpu_topology_map *cpu;
> struct cpu_cache_level *caches;
> int caches_cnt;
> + unsigned int cln_size;
> u32 comp_ratio;
> u32 comp_ver;
> u32 comp_type;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index f5cad377c99e..ad15829acc69 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -54,6 +54,7 @@
> #include "bpf-event.h"
> #include "bpf-utils.h"
> #include "clockid.h"
> +#include "cacheline.h"
>
> #include <linux/ctype.h>
> #include <internal/lib.h>
> @@ -1288,6 +1289,18 @@ static int write_cache(struct feat_fd *ff,
> return ret;
> }
>
> +static int write_cln_size(struct feat_fd *ff,
> + struct evlist *evlist __maybe_unused)
> +{
> + int cln_size = cacheline_size();
> +
> + if (!cln_size)
> + cln_size = 0;
> + ff->ph->env.cln_size = cln_size;
> +
> + return do_write(ff, &cln_size, sizeof(cln_size));
> +}
> +
> static int write_stat(struct feat_fd *ff __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
> @@ -2084,6 +2097,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
> }
> }
>
> +static void print_cln_size(struct feat_fd *ff, FILE *fp __maybe_unused)
> +{
> + fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
> +}
> +
> static void print_compressed(struct feat_fd *ff, FILE *fp)
> {
> fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
> @@ -2933,6 +2951,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
> return -1;
> }
>
> +static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
> +{
> + struct perf_env *env = &ff->ph->env;
> +
> + if (do_read_u32(ff, &env->cln_size))
> + return -1;
> +
> + return 0;
> +}
> +
> static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
> {
> struct perf_session *session;
> @@ -3453,6 +3481,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
> FEAT_OPR(CLOCK_DATA, clock_data, false),
> FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true),
> FEAT_OPR(PMU_CAPS, pmu_caps, false),
> + FEAT_OPR(CLN_SIZE, cln_size, false),
> };
>
> struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index c058021c3150..04394331630f 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -53,6 +53,7 @@ enum {
> HEADER_CLOCK_DATA,
> HEADER_HYBRID_TOPOLOGY,
> HEADER_PMU_CAPS,
> + HEADER_CLN_SIZE,
> HEADER_LAST_FEATURE,
> HEADER_FEAT_BITS = 256,
> };
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index aa79eb6476dd..e636b9f88e5c 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -30,6 +30,7 @@
> #include "time-utils.h"
> #include "cgroup.h"
> #include "machine.h"
> +#include "session.h"
> #include "trace-event.h"
> #include <linux/kernel.h>
> #include <linux/string.h>
> @@ -2474,7 +2475,26 @@ struct sort_entry sort_type_offset = {
>
> /* --sort typecln */
>
> -#define DEFAULT_CACHELINE_SIZE 64
> +static int
> +hist_entry__cln_size(struct hist_entry *he)
> +{
> + int ret = 0;
> +
> + if (he && he->hists) {
> + struct evsel *evsel = hists_to_evsel(he->hists);
> +
> + if (evsel && evsel->evlist->session && evsel->evlist->session)
> + ret = evsel->evlist->session->header.env.cln_size;
> + }
> +
> + if (!ret || ret < 1) {
> + int default_cacheline_size = 64; // avoid div/0 later
> +
> + ret = default_cacheline_size;
> + }
> +
> + return ret;
> +}
>
> static int64_t
> sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> @@ -2482,11 +2502,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> struct annotated_data_type *left_type = left->mem_type;
> struct annotated_data_type *right_type = right->mem_type;
> int64_t left_cln, right_cln;
> + int64_t cln_size_left = hist_entry__cln_size(left);
> + int64_t cln_size_right = hist_entry__cln_size(right);
> int64_t ret;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
>
> if (!left_type) {
> sort__type_init(left);
> @@ -2502,8 +2520,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> if (ret)
> return ret;
>
> - left_cln = left->mem_type_off / cln_size;
> - right_cln = right->mem_type_off / cln_size;
> + left_cln = left->mem_type_off / cln_size_left;
> + right_cln = right->mem_type_off / cln_size_right;
> return left_cln - right_cln;
> }
>
> @@ -2511,10 +2529,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width __maybe_unused)
> {
> struct annotated_data_type *he_type = he->mem_type;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
> + int cln_size = hist_entry__cln_size(he);
>
> return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
> he->mem_type_off / cln_size);
> --
> 2.53.0
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH v3] perf utilities: cln_size header
2026-03-06 0:12 ` Ricky Ringler
@ 2026-03-08 17:20 ` Ricky Ringler
2026-03-09 15:28 ` Ian Rogers
0 siblings, 1 reply; 15+ messages in thread
From: Ricky Ringler @ 2026-03-08 17:20 UTC (permalink / raw)
To: namhyung; +Cc: mingo, acme, linux-kernel, linux-perf-users, Ricky Ringler
Store cacheline size during perf record in header, so
that cacheline size can be used for other features, like
sort.
Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
Testing:
- Built perf
- Ran record + report with feat enabled
- Ran record + report with feat disabled
Tested-by: Ricky Ringler <ricky.ringler@proton.me>
Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
---
tools/perf/builtin-inject.c | 1 +
tools/perf/util/env.h | 1 +
| 29 +++++++++++++++++++++++++++++
| 1 +
tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++-----------
5 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 5b29f4296861..2675d32f88cf 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
case HEADER_CPU_DOMAIN_INFO:
+ case HEADER_CLNq_SIZE:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index a4501cbca375..c7052ac1f856 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -112,6 +112,7 @@ struct perf_env {
struct cpu_cache_level *caches;
struct cpu_domain_map **cpu_domain;
int caches_cnt;
+ unsigned int cln_size;
u32 comp_ratio;
u32 comp_ver;
u32 comp_type;
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9142a8ba4019..4f65faafe75d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -54,6 +54,7 @@
#include "bpf-event.h"
#include "bpf-utils.h"
#include "clockid.h"
+#include "cacheline.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -1304,6 +1305,18 @@ static int write_cache(struct feat_fd *ff,
return ret;
}
+static int write_cln_size(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ int cln_size = cacheline_size();
+
+ if (!cln_size)
+ cln_size = 0;
+ ff->ph->env.cln_size = cln_size;
+
+ return do_write(ff, &cln_size, sizeof(cln_size));
+}
+
static int write_stat(struct feat_fd *ff __maybe_unused,
struct evlist *evlist __maybe_unused)
{
@@ -2261,6 +2274,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
+static void print_cln_size(struct feat_fd *ff, FILE *fp __maybe_unused)
+{
+ fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
+}
+
static void print_compressed(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
@@ -3154,6 +3172,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
return -1;
}
+static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+
+ if (do_read_u32(ff, &env->cln_size))
+ return -1;
+
+ return 0;
+}
+
static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_session *session;
@@ -3763,6 +3791,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(PMU_CAPS, pmu_caps, false),
FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true),
FEAT_OPR(E_MACHINE, e_machine, false),
+ FEAT_OPR(CLN_SIZE, cln_size, false),
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index cc40ac796f52..be315040727f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -55,6 +55,7 @@ enum {
HEADER_PMU_CAPS,
HEADER_CPU_DOMAIN_INFO,
HEADER_E_MACHINE,
+ HEADER_CLN_SIZE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 42d5cd7ef4e2..13287ade784e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -30,6 +30,7 @@
#include "time-utils.h"
#include "cgroup.h"
#include "machine.h"
+#include "session.h"
#include "trace-event.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -2474,7 +2475,26 @@ struct sort_entry sort_type_offset = {
/* --sort typecln */
-#define DEFAULT_CACHELINE_SIZE 64
+static int
+hist_entry__cln_size(struct hist_entry *he)
+{
+ int ret = 0;
+
+ if (he && he->hists) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+ if (evsel && evsel->evlist->session && evsel->evlist->session)
+ ret = evsel->evlist->session->header.env.cln_size;
+ }
+
+ if (!ret || ret < 1) {
+ int default_cacheline_size = 64; // avoid div/0 later
+
+ ret = default_cacheline_size;
+ }
+
+ return ret;
+}
static int64_t
sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
@@ -2482,11 +2502,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
struct annotated_data_type *left_type = left->mem_type;
struct annotated_data_type *right_type = right->mem_type;
int64_t left_cln, right_cln;
+ int64_t cln_size_left = hist_entry__cln_size(left);
+ int64_t cln_size_right = hist_entry__cln_size(right);
int64_t ret;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
if (!left_type) {
sort__type_init(left);
@@ -2502,8 +2520,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
if (ret)
return ret;
- left_cln = left->mem_type_off / cln_size;
- right_cln = right->mem_type_off / cln_size;
+ left_cln = left->mem_type_off / cln_size_left;
+ right_cln = right->mem_type_off / cln_size_right;
return left_cln - right_cln;
}
@@ -2511,10 +2529,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
+ int cln_size = hist_entry__cln_size(he);
return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
he->mem_type_off / cln_size);
--
2.53.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v3] perf utilities: cln_size header
2026-03-08 17:20 ` [PATCH v3] " Ricky Ringler
@ 2026-03-09 15:28 ` Ian Rogers
2026-03-21 20:41 ` [PATCH v4] " Ricky Ringler
0 siblings, 1 reply; 15+ messages in thread
From: Ian Rogers @ 2026-03-09 15:28 UTC (permalink / raw)
To: Ricky Ringler; +Cc: namhyung, mingo, acme, linux-kernel, linux-perf-users
On Sun, Mar 8, 2026 at 10:21 AM Ricky Ringler <ricky.ringler@proton.me> wrote:
>
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
>
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
>
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled
>
> Tested-by: Ricky Ringler <ricky.ringler@proton.me>
> Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
> ---
> tools/perf/builtin-inject.c | 1 +
> tools/perf/util/env.h | 1 +
> tools/perf/util/header.c | 29 +++++++++++++++++++++++++++++
> tools/perf/util/header.h | 1 +
> tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++-----------
> 5 files changed, 58 insertions(+), 11 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 5b29f4296861..2675d32f88cf 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
> case HEADER_HYBRID_TOPOLOGY:
> case HEADER_PMU_CAPS:
> case HEADER_CPU_DOMAIN_INFO:
> + case HEADER_CLNq_SIZE:
Looks like a typo, s/HEADER_CLNq_SIZE/HEADER_CLN_SIZE/
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index a4501cbca375..c7052ac1f856 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -112,6 +112,7 @@ struct perf_env {
> struct cpu_cache_level *caches;
> struct cpu_domain_map **cpu_domain;
> int caches_cnt;
> + unsigned int cln_size;
> u32 comp_ratio;
> u32 comp_ver;
> u32 comp_type;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 9142a8ba4019..4f65faafe75d 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -54,6 +54,7 @@
> #include "bpf-event.h"
> #include "bpf-utils.h"
> #include "clockid.h"
> +#include "cacheline.h"
>
> #include <linux/ctype.h>
> #include <internal/lib.h>
> @@ -1304,6 +1305,18 @@ static int write_cache(struct feat_fd *ff,
> return ret;
> }
>
> +static int write_cln_size(struct feat_fd *ff,
> + struct evlist *evlist __maybe_unused)
> +{
> + int cln_size = cacheline_size();
> +
> + if (!cln_size)
> + cln_size = 0;
Did you mean to assign "cln_size = DEFAULT_CACHELINE_SIZE" here? It
seems strange to something with value 0 the value 0.
> + ff->ph->env.cln_size = cln_size;
> +
> + return do_write(ff, &cln_size, sizeof(cln_size));
> +}
> +
> static int write_stat(struct feat_fd *ff __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
> @@ -2261,6 +2274,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
> }
> }
>
> +static void print_cln_size(struct feat_fd *ff, FILE *fp __maybe_unused)
No need for the __maybe_unused on fp here.
> +{
> + fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
> +}
> +
> static void print_compressed(struct feat_fd *ff, FILE *fp)
> {
> fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
> @@ -3154,6 +3172,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
> return -1;
> }
>
> +static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
> +{
> + struct perf_env *env = &ff->ph->env;
> +
> + if (do_read_u32(ff, &env->cln_size))
> + return -1;
> +
> + return 0;
> +}
> +
> static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
> {
> struct perf_session *session;
> @@ -3763,6 +3791,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
> FEAT_OPR(PMU_CAPS, pmu_caps, false),
> FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true),
> FEAT_OPR(E_MACHINE, e_machine, false),
> + FEAT_OPR(CLN_SIZE, cln_size, false),
> };
>
> struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index cc40ac796f52..be315040727f 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -55,6 +55,7 @@ enum {
> HEADER_PMU_CAPS,
> HEADER_CPU_DOMAIN_INFO,
> HEADER_E_MACHINE,
> + HEADER_CLN_SIZE,
> HEADER_LAST_FEATURE,
> HEADER_FEAT_BITS = 256,
> };
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 42d5cd7ef4e2..13287ade784e 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -30,6 +30,7 @@
> #include "time-utils.h"
> #include "cgroup.h"
> #include "machine.h"
> +#include "session.h"
> #include "trace-event.h"
> #include <linux/kernel.h>
> #include <linux/string.h>
> @@ -2474,7 +2475,26 @@ struct sort_entry sort_type_offset = {
>
> /* --sort typecln */
>
> -#define DEFAULT_CACHELINE_SIZE 64
> +static int
> +hist_entry__cln_size(struct hist_entry *he)
> +{
> + int ret = 0;
> +
> + if (he && he->hists) {
> + struct evsel *evsel = hists_to_evsel(he->hists);
> +
> + if (evsel && evsel->evlist->session && evsel->evlist->session)
> + ret = evsel->evlist->session->header.env.cln_size;
I think on these 2 lines, prefer evsel__session(evsel) rather than the
direct access.
Thanks,
Ian
> + }
> +
> + if (!ret || ret < 1) {
> + int default_cacheline_size = 64; // avoid div/0 later
> +
> + ret = default_cacheline_size;
> + }
> +
> + return ret;
> +}
>
> static int64_t
> sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> @@ -2482,11 +2502,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> struct annotated_data_type *left_type = left->mem_type;
> struct annotated_data_type *right_type = right->mem_type;
> int64_t left_cln, right_cln;
> + int64_t cln_size_left = hist_entry__cln_size(left);
> + int64_t cln_size_right = hist_entry__cln_size(right);
> int64_t ret;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
>
> if (!left_type) {
> sort__type_init(left);
> @@ -2502,8 +2520,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> if (ret)
> return ret;
>
> - left_cln = left->mem_type_off / cln_size;
> - right_cln = right->mem_type_off / cln_size;
> + left_cln = left->mem_type_off / cln_size_left;
> + right_cln = right->mem_type_off / cln_size_right;
> return left_cln - right_cln;
> }
>
> @@ -2511,10 +2529,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width __maybe_unused)
> {
> struct annotated_data_type *he_type = he->mem_type;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
> + int cln_size = hist_entry__cln_size(he);
>
> return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
> he->mem_type_off / cln_size);
> --
> 2.53.0
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH v4] perf utilities: cln_size header
2026-03-09 15:28 ` Ian Rogers
@ 2026-03-21 20:41 ` Ricky Ringler
2026-03-26 22:43 ` Namhyung Kim
0 siblings, 1 reply; 15+ messages in thread
From: Ricky Ringler @ 2026-03-21 20:41 UTC (permalink / raw)
To: irogers
Cc: namhyung, mingo, acme, linux-kernel, linux-perf-users,
Ricky Ringler
Store cacheline size during perf record in header, so
that cacheline size can be used for other features, like
sort.
V4: Ian feedback
V3: Rebase off perf-tools-next round two
V2: Rebase off perf-tools-next
Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
Testing:
- Built perf
- Ran record + report with feat enabled
- Ran record + report with feat disabled
Tested-by: Ricky Ringler <ricky.ringler@proton.me>
Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
---
tools/perf/builtin-inject.c | 2 +-
| 11 ++++++++---
tools/perf/util/sort.c | 8 ++++++--
3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 2675d32f88cf..11ac7c8c4be3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2134,7 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
case HEADER_CPU_DOMAIN_INFO:
- case HEADER_CLNq_SIZE:
+ case HEADER_CLN_SIZE:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4f65faafe75d..2d39da470267 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1310,8 +1310,13 @@ static int write_cln_size(struct feat_fd *ff,
{
int cln_size = cacheline_size();
- if (!cln_size)
- cln_size = 0;
+
+ if (!cln_size) {
+ int default_cacheline_size = 64;
+
+ cln_size = default_cacheline_size;
+ }
+
ff->ph->env.cln_size = cln_size;
return do_write(ff, &cln_size, sizeof(cln_size));
@@ -2274,7 +2279,7 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
-static void print_cln_size(struct feat_fd *ff, FILE *fp __maybe_unused)
+static void print_cln_size(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 13287ade784e..5f617cf03d5d 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2483,8 +2483,12 @@ hist_entry__cln_size(struct hist_entry *he)
if (he && he->hists) {
struct evsel *evsel = hists_to_evsel(he->hists);
- if (evsel && evsel->evlist->session && evsel->evlist->session)
- ret = evsel->evlist->session->header.env.cln_size;
+
+ if (evsel) {
+ struct perf_session *session = evsel__session(evsel);
+
+ ret = session->header.env.cln_size;
+ }
}
if (!ret || ret < 1) {
--
2.53.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v4] perf utilities: cln_size header
2026-03-21 20:41 ` [PATCH v4] " Ricky Ringler
@ 2026-03-26 22:43 ` Namhyung Kim
2026-03-28 20:04 ` [PATCH v5] " Ricky Ringler
0 siblings, 1 reply; 15+ messages in thread
From: Namhyung Kim @ 2026-03-26 22:43 UTC (permalink / raw)
To: Ricky Ringler; +Cc: irogers, mingo, acme, linux-kernel, linux-perf-users
Hello,
On Sat, Mar 21, 2026 at 08:41:55PM +0000, Ricky Ringler wrote:
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
>
> V4: Ian feedback
> V3: Rebase off perf-tools-next round two
> V2: Rebase off perf-tools-next
>
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
Please squash the patches into a single change and send it again.
>
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled
>
> Tested-by: Ricky Ringler <ricky.ringler@proton.me>
Tested-by tag from the author may not be meaningful. We always expect
authors to test their patches. :)
> Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
> ---
> tools/perf/builtin-inject.c | 2 +-
> tools/perf/util/header.c | 11 ++++++++---
> tools/perf/util/sort.c | 8 ++++++--
> 3 files changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 2675d32f88cf..11ac7c8c4be3 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2134,7 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
> case HEADER_HYBRID_TOPOLOGY:
> case HEADER_PMU_CAPS:
> case HEADER_CPU_DOMAIN_INFO:
> - case HEADER_CLNq_SIZE:
> + case HEADER_CLN_SIZE:
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 4f65faafe75d..2d39da470267 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -1310,8 +1310,13 @@ static int write_cln_size(struct feat_fd *ff,
> {
> int cln_size = cacheline_size();
>
> - if (!cln_size)
> - cln_size = 0;
> +
> + if (!cln_size) {
> + int default_cacheline_size = 64;
Let's get rid of the local variable.
Thanks,
Namhyung
> +
> + cln_size = default_cacheline_size;
> + }
> +
> ff->ph->env.cln_size = cln_size;
>
> return do_write(ff, &cln_size, sizeof(cln_size));
> @@ -2274,7 +2279,7 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
> }
> }
>
> -static void print_cln_size(struct feat_fd *ff, FILE *fp __maybe_unused)
> +static void print_cln_size(struct feat_fd *ff, FILE *fp)
> {
> fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
> }
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 13287ade784e..5f617cf03d5d 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -2483,8 +2483,12 @@ hist_entry__cln_size(struct hist_entry *he)
> if (he && he->hists) {
> struct evsel *evsel = hists_to_evsel(he->hists);
>
> - if (evsel && evsel->evlist->session && evsel->evlist->session)
> - ret = evsel->evlist->session->header.env.cln_size;
> +
> + if (evsel) {
> + struct perf_session *session = evsel__session(evsel);
> +
> + ret = session->header.env.cln_size;
> + }
> }
>
> if (!ret || ret < 1) {
> --
> 2.53.0
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH v5] perf utilities: cln_size header
2026-03-26 22:43 ` Namhyung Kim
@ 2026-03-28 20:04 ` Ricky Ringler
2026-03-30 7:16 ` kernel test robot
2026-03-31 6:37 ` Namhyung Kim
0 siblings, 2 replies; 15+ messages in thread
From: Ricky Ringler @ 2026-03-28 20:04 UTC (permalink / raw)
To: namhyung
Cc: irogers, mingo, acme, linux-kernel, linux-perf-users,
Ricky Ringler
Store cacheline size during perf record in header, so
that cacheline size can be used for other features, like
sort.
V5: Namhyung feedback
V4: Ian feedback
V3: Rebase off perf-tools-next round two
V2: Rebase off perf-tools-next
Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
Testing:
- Built perf
- Ran record + report with feat enabled
- Ran record + report with feat disabled
Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
---
tools/perf/builtin-inject.c | 1 +
tools/perf/util/env.h | 1 +
| 33 +++++++++++++++++++++++++++++
| 1 +
tools/perf/util/sort.c | 41 +++++++++++++++++++++++++++----------
5 files changed, 66 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 5b29f4296861..11ac7c8c4be3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
case HEADER_CPU_DOMAIN_INFO:
+ case HEADER_CLN_SIZE:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index a4501cbca375..c7052ac1f856 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -112,6 +112,7 @@ struct perf_env {
struct cpu_cache_level *caches;
struct cpu_domain_map **cpu_domain;
int caches_cnt;
+ unsigned int cln_size;
u32 comp_ratio;
u32 comp_ver;
u32 comp_type;
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9142a8ba4019..4d852bd4ca9a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -54,6 +54,7 @@
#include "bpf-event.h"
#include "bpf-utils.h"
#include "clockid.h"
+#include "cacheline.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -1304,6 +1305,22 @@ static int write_cache(struct feat_fd *ff,
return ret;
}
+#define DEFAULT_CACHELINE_SIZE = 64
+
+static int write_cln_size(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ int cln_size = cacheline_size();
+
+
+ if(!cln_size)
+ cln_size = DEFAULT_CACHELINE_SIZE;
+
+ ff->ph->env.cln_size = cln_size;
+
+ return do_write(ff, &cln_size, sizeof(cln_size));
+}
+
static int write_stat(struct feat_fd *ff __maybe_unused,
struct evlist *evlist __maybe_unused)
{
@@ -2261,6 +2278,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
+static void print_cln_size(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
+}
+
static void print_compressed(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
@@ -3154,6 +3176,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
return -1;
}
+static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+
+ if (do_read_u32(ff, &env->cln_size))
+ return -1;
+
+ return 0;
+}
+
static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_session *session;
@@ -3763,6 +3795,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(PMU_CAPS, pmu_caps, false),
FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true),
FEAT_OPR(E_MACHINE, e_machine, false),
+ FEAT_OPR(CLN_SIZE, cln_size, false),
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index cc40ac796f52..be315040727f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -55,6 +55,7 @@ enum {
HEADER_PMU_CAPS,
HEADER_CPU_DOMAIN_INFO,
HEADER_E_MACHINE,
+ HEADER_CLN_SIZE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 42d5cd7ef4e2..5f617cf03d5d 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -30,6 +30,7 @@
#include "time-utils.h"
#include "cgroup.h"
#include "machine.h"
+#include "session.h"
#include "trace-event.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -2474,7 +2475,30 @@ struct sort_entry sort_type_offset = {
/* --sort typecln */
-#define DEFAULT_CACHELINE_SIZE 64
+static int
+hist_entry__cln_size(struct hist_entry *he)
+{
+ int ret = 0;
+
+ if (he && he->hists) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+
+ if (evsel) {
+ struct perf_session *session = evsel__session(evsel);
+
+ ret = session->header.env.cln_size;
+ }
+ }
+
+ if (!ret || ret < 1) {
+ int default_cacheline_size = 64; // avoid div/0 later
+
+ ret = default_cacheline_size;
+ }
+
+ return ret;
+}
static int64_t
sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
@@ -2482,11 +2506,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
struct annotated_data_type *left_type = left->mem_type;
struct annotated_data_type *right_type = right->mem_type;
int64_t left_cln, right_cln;
+ int64_t cln_size_left = hist_entry__cln_size(left);
+ int64_t cln_size_right = hist_entry__cln_size(right);
int64_t ret;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
if (!left_type) {
sort__type_init(left);
@@ -2502,8 +2524,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
if (ret)
return ret;
- left_cln = left->mem_type_off / cln_size;
- right_cln = right->mem_type_off / cln_size;
+ left_cln = left->mem_type_off / cln_size_left;
+ right_cln = right->mem_type_off / cln_size_right;
return left_cln - right_cln;
}
@@ -2511,10 +2533,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
+ int cln_size = hist_entry__cln_size(he);
return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
he->mem_type_off / cln_size);
--
2.53.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v5] perf utilities: cln_size header
2026-03-28 20:04 ` [PATCH v5] " Ricky Ringler
@ 2026-03-30 7:16 ` kernel test robot
2026-03-31 6:37 ` Namhyung Kim
1 sibling, 0 replies; 15+ messages in thread
From: kernel test robot @ 2026-03-30 7:16 UTC (permalink / raw)
To: Ricky Ringler, namhyung
Cc: oe-kbuild-all, irogers, mingo, acme, linux-kernel,
linux-perf-users, Ricky Ringler
Hi Ricky,
kernel test robot noticed the following build errors:
[auto build test ERROR on perf-tools-next/perf-tools-next]
[also build test ERROR on tip/perf/core perf-tools/perf-tools next-20260327]
[cannot apply to acme/perf/core linus/master v6.16-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ricky-Ringler/perf-utilities-cln_size-header/20260329-205729
base: https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git perf-tools-next
patch link: https://lore.kernel.org/r/20260328200442.134489-1-ricky.ringler%40proton.me
patch subject: [PATCH v5] perf utilities: cln_size header
config: arm64-allnoconfig-bpf (https://download.01.org/0day-ci/archive/20260329/202603291618.3giyFism-lkp@intel.com/config)
compiler: aarch64-linux-gnu-gcc (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260329/202603291618.3giyFism-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/r/202603291618.3giyFism-lkp@intel.com/
All errors (new ones prefixed by >>):
Makefile.config:576: No elfutils/debuginfod.h found, no debuginfo server support, please install libdebuginfod-dev/elfutils-debuginfod-client-devel or equivalent
Makefile.config:963: No libllvm 13+ found, slower source file resolution, please install llvm-devel/llvm-dev
Makefile.config:1159: Rust is not found. Test workloads with rust are disabled.
PERF_VERSION = 7.0.rc4.ga971d40d4a8d
util/header.c: In function 'write_cln_size':
>> util/header.c:1308:32: error: expected expression before '=' token
1308 | #define DEFAULT_CACHELINE_SIZE = 64
| ^
util/header.c:1317:28: note: in expansion of macro 'DEFAULT_CACHELINE_SIZE'
1317 | cln_size = DEFAULT_CACHELINE_SIZE;
| ^~~~~~~~~~~~~~~~~~~~~~
make[4]: *** [tools/build/Makefile.build:95: util/header.o] Error 1
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [tools/build/Makefile.build:158: util] Error 2
make[2]: *** [Makefile.perf:797: perf-util-in.o] Error 2
make[2]: *** Waiting for unfinished jobs....
make[1]: *** [Makefile.perf:289: sub-make] Error 2
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v5] perf utilities: cln_size header
2026-03-28 20:04 ` [PATCH v5] " Ricky Ringler
2026-03-30 7:16 ` kernel test robot
@ 2026-03-31 6:37 ` Namhyung Kim
2026-04-03 22:08 ` [PATCH v6] " Ricky Ringler
1 sibling, 1 reply; 15+ messages in thread
From: Namhyung Kim @ 2026-03-31 6:37 UTC (permalink / raw)
To: Ricky Ringler; +Cc: irogers, mingo, acme, linux-kernel, linux-perf-users
On Sat, Mar 28, 2026 at 08:04:52PM +0000, Ricky Ringler wrote:
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
>
> V5: Namhyung feedback
> V4: Ian feedback
> V3: Rebase off perf-tools-next round two
> V2: Rebase off perf-tools-next
>
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
>
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled
It'd be nice if you can add an example output like:
$ perf report --header-only | grep cacheline
>
> Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
> ---
> tools/perf/builtin-inject.c | 1 +
> tools/perf/util/env.h | 1 +
> tools/perf/util/header.c | 33 +++++++++++++++++++++++++++++
> tools/perf/util/header.h | 1 +
> tools/perf/util/sort.c | 41 +++++++++++++++++++++++++++----------
> 5 files changed, 66 insertions(+), 11 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 5b29f4296861..11ac7c8c4be3 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
> case HEADER_HYBRID_TOPOLOGY:
> case HEADER_PMU_CAPS:
> case HEADER_CPU_DOMAIN_INFO:
> + case HEADER_CLN_SIZE:
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index a4501cbca375..c7052ac1f856 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -112,6 +112,7 @@ struct perf_env {
> struct cpu_cache_level *caches;
> struct cpu_domain_map **cpu_domain;
> int caches_cnt;
> + unsigned int cln_size;
> u32 comp_ratio;
> u32 comp_ver;
> u32 comp_type;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 9142a8ba4019..4d852bd4ca9a 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -54,6 +54,7 @@
> #include "bpf-event.h"
> #include "bpf-utils.h"
> #include "clockid.h"
> +#include "cacheline.h"
>
> #include <linux/ctype.h>
> #include <internal/lib.h>
> @@ -1304,6 +1305,22 @@ static int write_cache(struct feat_fd *ff,
> return ret;
> }
>
> +#define DEFAULT_CACHELINE_SIZE = 64
Please move this into a header (after removing '=') ...
> +
> +static int write_cln_size(struct feat_fd *ff,
> + struct evlist *evlist __maybe_unused)
> +{
> + int cln_size = cacheline_size();
> +
> +
> + if(!cln_size)
> + cln_size = DEFAULT_CACHELINE_SIZE;
> +
> + ff->ph->env.cln_size = cln_size;
> +
> + return do_write(ff, &cln_size, sizeof(cln_size));
> +}
> +
> static int write_stat(struct feat_fd *ff __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
> @@ -2261,6 +2278,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
> }
> }
>
> +static void print_cln_size(struct feat_fd *ff, FILE *fp)
> +{
> + fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
> +}
> +
> static void print_compressed(struct feat_fd *ff, FILE *fp)
> {
> fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
> @@ -3154,6 +3176,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
> return -1;
> }
>
> +static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
> +{
> + struct perf_env *env = &ff->ph->env;
> +
> + if (do_read_u32(ff, &env->cln_size))
> + return -1;
> +
> + return 0;
> +}
> +
> static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
> {
> struct perf_session *session;
> @@ -3763,6 +3795,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
> FEAT_OPR(PMU_CAPS, pmu_caps, false),
> FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true),
> FEAT_OPR(E_MACHINE, e_machine, false),
> + FEAT_OPR(CLN_SIZE, cln_size, false),
> };
>
> struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index cc40ac796f52..be315040727f 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -55,6 +55,7 @@ enum {
> HEADER_PMU_CAPS,
> HEADER_CPU_DOMAIN_INFO,
> HEADER_E_MACHINE,
> + HEADER_CLN_SIZE,
> HEADER_LAST_FEATURE,
> HEADER_FEAT_BITS = 256,
> };
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 42d5cd7ef4e2..5f617cf03d5d 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -30,6 +30,7 @@
> #include "time-utils.h"
> #include "cgroup.h"
> #include "machine.h"
> +#include "session.h"
> #include "trace-event.h"
> #include <linux/kernel.h>
> #include <linux/string.h>
> @@ -2474,7 +2475,30 @@ struct sort_entry sort_type_offset = {
>
> /* --sort typecln */
>
> -#define DEFAULT_CACHELINE_SIZE 64
> +static int
> +hist_entry__cln_size(struct hist_entry *he)
> +{
> + int ret = 0;
> +
> + if (he && he->hists) {
> + struct evsel *evsel = hists_to_evsel(he->hists);
> +
> +
> + if (evsel) {
> + struct perf_session *session = evsel__session(evsel);
> +
> + ret = session->header.env.cln_size;
> + }
> + }
> +
> + if (!ret || ret < 1) {
> + int default_cacheline_size = 64; // avoid div/0 later
> +
> + ret = default_cacheline_size;
... and use it here as well.
Thanks,
Namhyung
> + }
> +
> + return ret;
> +}
>
> static int64_t
> sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> @@ -2482,11 +2506,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> struct annotated_data_type *left_type = left->mem_type;
> struct annotated_data_type *right_type = right->mem_type;
> int64_t left_cln, right_cln;
> + int64_t cln_size_left = hist_entry__cln_size(left);
> + int64_t cln_size_right = hist_entry__cln_size(right);
> int64_t ret;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
>
> if (!left_type) {
> sort__type_init(left);
> @@ -2502,8 +2524,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> if (ret)
> return ret;
>
> - left_cln = left->mem_type_off / cln_size;
> - right_cln = right->mem_type_off / cln_size;
> + left_cln = left->mem_type_off / cln_size_left;
> + right_cln = right->mem_type_off / cln_size_right;
> return left_cln - right_cln;
> }
>
> @@ -2511,10 +2533,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width __maybe_unused)
> {
> struct annotated_data_type *he_type = he->mem_type;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
> + int cln_size = hist_entry__cln_size(he);
>
> return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
> he->mem_type_off / cln_size);
> --
> 2.53.0
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH v6] perf utilities: cln_size header
2026-03-31 6:37 ` Namhyung Kim
@ 2026-04-03 22:08 ` Ricky Ringler
2026-04-04 0:29 ` Namhyung Kim
0 siblings, 1 reply; 15+ messages in thread
From: Ricky Ringler @ 2026-04-03 22:08 UTC (permalink / raw)
To: namhyung
Cc: irogers, mingo, acme, linux-kernel, linux-perf-users,
Ricky Ringler
Store cacheline size during perf record in header, so
that cacheline size can be used for other features, like
sort.
V6: Namhyung feedback and tests
V5: Namhyung feedback
V4: Ian feedback
V3: Rebase off perf-tools-next round two
V2: Rebase off perf-tools-next
Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
Testing:
- Built perf
- Ran record + report with feat enabled
- Ran record + report with feat disabled
Testing example with feat enabled:
$ perf record ./Example
$ perf report --header-only | grep -C 3 cacheline
CPU_DOMAIN_INFO info available, use -I to display
e_machine : 62
e_flags : 0
cacheline size: 64
missing features: TRACING_DATA BUILD_ID BRANCH_STACK GROUP_DESC AUXTRACE \
STAT CLOCKID DIR_FORMAT COMPRESSED CLOCK_DATA
========
Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
---
tools/perf/builtin-inject.c | 1 +
tools/perf/util/env.h | 1 +
| 33 +++++++++++++++++++++++++++++
| 1 +
tools/perf/util/sort.c | 41 +++++++++++++++++++++++++++----------
5 files changed, 66 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 5b29f4296861..11ac7c8c4be3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
case HEADER_CPU_DOMAIN_INFO:
+ case HEADER_CLN_SIZE:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index a4501cbca375..c7052ac1f856 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -112,6 +112,7 @@ struct perf_env {
struct cpu_cache_level *caches;
struct cpu_domain_map **cpu_domain;
int caches_cnt;
+ unsigned int cln_size;
u32 comp_ratio;
u32 comp_ver;
u32 comp_type;
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9142a8ba4019..4d852bd4ca9a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -54,6 +54,7 @@
#include "bpf-event.h"
#include "bpf-utils.h"
#include "clockid.h"
+#include "cacheline.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -1304,6 +1305,22 @@ static int write_cache(struct feat_fd *ff,
return ret;
}
+#define DEFAULT_CACHELINE_SIZE = 64
+
+static int write_cln_size(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ int cln_size = cacheline_size();
+
+
+ if(!cln_size)
+ cln_size = DEFAULT_CACHELINE_SIZE;
+
+ ff->ph->env.cln_size = cln_size;
+
+ return do_write(ff, &cln_size, sizeof(cln_size));
+}
+
static int write_stat(struct feat_fd *ff __maybe_unused,
struct evlist *evlist __maybe_unused)
{
@@ -2261,6 +2278,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
+static void print_cln_size(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
+}
+
static void print_compressed(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
@@ -3154,6 +3176,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
return -1;
}
+static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+
+ if (do_read_u32(ff, &env->cln_size))
+ return -1;
+
+ return 0;
+}
+
static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_session *session;
@@ -3763,6 +3795,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(PMU_CAPS, pmu_caps, false),
FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true),
FEAT_OPR(E_MACHINE, e_machine, false),
+ FEAT_OPR(CLN_SIZE, cln_size, false),
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index cc40ac796f52..be315040727f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -55,6 +55,7 @@ enum {
HEADER_PMU_CAPS,
HEADER_CPU_DOMAIN_INFO,
HEADER_E_MACHINE,
+ HEADER_CLN_SIZE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 42d5cd7ef4e2..5f617cf03d5d 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -30,6 +30,7 @@
#include "time-utils.h"
#include "cgroup.h"
#include "machine.h"
+#include "session.h"
#include "trace-event.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -2474,7 +2475,30 @@ struct sort_entry sort_type_offset = {
/* --sort typecln */
-#define DEFAULT_CACHELINE_SIZE 64
+static int
+hist_entry__cln_size(struct hist_entry *he)
+{
+ int ret = 0;
+
+ if (he && he->hists) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+
+ if (evsel) {
+ struct perf_session *session = evsel__session(evsel);
+
+ ret = session->header.env.cln_size;
+ }
+ }
+
+ if (!ret || ret < 1) {
+ int default_cacheline_size = 64; // avoid div/0 later
+
+ ret = default_cacheline_size;
+ }
+
+ return ret;
+}
static int64_t
sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
@@ -2482,11 +2506,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
struct annotated_data_type *left_type = left->mem_type;
struct annotated_data_type *right_type = right->mem_type;
int64_t left_cln, right_cln;
+ int64_t cln_size_left = hist_entry__cln_size(left);
+ int64_t cln_size_right = hist_entry__cln_size(right);
int64_t ret;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
if (!left_type) {
sort__type_init(left);
@@ -2502,8 +2524,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
if (ret)
return ret;
- left_cln = left->mem_type_off / cln_size;
- right_cln = right->mem_type_off / cln_size;
+ left_cln = left->mem_type_off / cln_size_left;
+ right_cln = right->mem_type_off / cln_size_right;
return left_cln - right_cln;
}
@@ -2511,10 +2533,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
+ int cln_size = hist_entry__cln_size(he);
return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
he->mem_type_off / cln_size);
--
2.53.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v6] perf utilities: cln_size header
2026-04-03 22:08 ` [PATCH v6] " Ricky Ringler
@ 2026-04-04 0:29 ` Namhyung Kim
2026-04-04 1:16 ` Ricky Ringler
0 siblings, 1 reply; 15+ messages in thread
From: Namhyung Kim @ 2026-04-04 0:29 UTC (permalink / raw)
To: Ricky Ringler; +Cc: irogers, mingo, acme, linux-kernel, linux-perf-users
On Fri, Apr 03, 2026 at 10:08:06PM +0000, Ricky Ringler wrote:
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
>
> V6: Namhyung feedback and tests
> V5: Namhyung feedback
> V4: Ian feedback
> V3: Rebase off perf-tools-next round two
> V2: Rebase off perf-tools-next
>
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
>
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled
>
> Testing example with feat enabled:
> $ perf record ./Example
> $ perf report --header-only | grep -C 3 cacheline
> CPU_DOMAIN_INFO info available, use -I to display
> e_machine : 62
> e_flags : 0
> cacheline size: 64
> missing features: TRACING_DATA BUILD_ID BRANCH_STACK GROUP_DESC AUXTRACE \
> STAT CLOCKID DIR_FORMAT COMPRESSED CLOCK_DATA
> ========
>
> Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
> ---
> tools/perf/builtin-inject.c | 1 +
> tools/perf/util/env.h | 1 +
> tools/perf/util/header.c | 33 +++++++++++++++++++++++++++++
> tools/perf/util/header.h | 1 +
> tools/perf/util/sort.c | 41 +++++++++++++++++++++++++++----------
> 5 files changed, 66 insertions(+), 11 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 5b29f4296861..11ac7c8c4be3 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
> case HEADER_HYBRID_TOPOLOGY:
> case HEADER_PMU_CAPS:
> case HEADER_CPU_DOMAIN_INFO:
> + case HEADER_CLN_SIZE:
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index a4501cbca375..c7052ac1f856 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -112,6 +112,7 @@ struct perf_env {
> struct cpu_cache_level *caches;
> struct cpu_domain_map **cpu_domain;
> int caches_cnt;
> + unsigned int cln_size;
> u32 comp_ratio;
> u32 comp_ver;
> u32 comp_type;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 9142a8ba4019..4d852bd4ca9a 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -54,6 +54,7 @@
> #include "bpf-event.h"
> #include "bpf-utils.h"
> #include "clockid.h"
> +#include "cacheline.h"
>
> #include <linux/ctype.h>
> #include <internal/lib.h>
> @@ -1304,6 +1305,22 @@ static int write_cache(struct feat_fd *ff,
> return ret;
> }
>
> +#define DEFAULT_CACHELINE_SIZE = 64
I'm curious how your compiler could build this.
> +
> +static int write_cln_size(struct feat_fd *ff,
> + struct evlist *evlist __maybe_unused)
> +{
> + int cln_size = cacheline_size();
> +
> +
> + if(!cln_size)
> + cln_size = DEFAULT_CACHELINE_SIZE;
It should have a syntax error, right?
> +
> + ff->ph->env.cln_size = cln_size;
> +
> + return do_write(ff, &cln_size, sizeof(cln_size));
> +}
> +
> static int write_stat(struct feat_fd *ff __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
[SNIP]
> @@ -2474,7 +2475,30 @@ struct sort_entry sort_type_offset = {
>
> /* --sort typecln */
>
> -#define DEFAULT_CACHELINE_SIZE 64
> +static int
> +hist_entry__cln_size(struct hist_entry *he)
> +{
> + int ret = 0;
> +
> + if (he && he->hists) {
> + struct evsel *evsel = hists_to_evsel(he->hists);
> +
> +
> + if (evsel) {
> + struct perf_session *session = evsel__session(evsel);
> +
> + ret = session->header.env.cln_size;
> + }
> + }
> +
> + if (!ret || ret < 1) {
> + int default_cacheline_size = 64; // avoid div/0 later
As I said in the previous comment, please use DEFAULT_CALLCHAIN_SIZE
here.
Thanks,
Namhyung
> +
> + ret = default_cacheline_size;
> + }
> +
> + return ret;
> +}
>
> static int64_t
> sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> @@ -2482,11 +2506,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> struct annotated_data_type *left_type = left->mem_type;
> struct annotated_data_type *right_type = right->mem_type;
> int64_t left_cln, right_cln;
> + int64_t cln_size_left = hist_entry__cln_size(left);
> + int64_t cln_size_right = hist_entry__cln_size(right);
> int64_t ret;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
>
> if (!left_type) {
> sort__type_init(left);
> @@ -2502,8 +2524,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> if (ret)
> return ret;
>
> - left_cln = left->mem_type_off / cln_size;
> - right_cln = right->mem_type_off / cln_size;
> + left_cln = left->mem_type_off / cln_size_left;
> + right_cln = right->mem_type_off / cln_size_right;
> return left_cln - right_cln;
> }
>
> @@ -2511,10 +2533,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width __maybe_unused)
> {
> struct annotated_data_type *he_type = he->mem_type;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
> + int cln_size = hist_entry__cln_size(he);
>
> return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
> he->mem_type_off / cln_size);
> --
> 2.53.0
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread* [PATCH v6] perf utilities: cln_size header
2026-04-04 0:29 ` Namhyung Kim
@ 2026-04-04 1:16 ` Ricky Ringler
2026-04-06 5:26 ` Namhyung Kim
0 siblings, 1 reply; 15+ messages in thread
From: Ricky Ringler @ 2026-04-04 1:16 UTC (permalink / raw)
To: namhyung
Cc: irogers, mingo, acme, linux-kernel, linux-perf-users,
Ricky Ringler
Forgive me for wasting your time with my last message...
I switched work machines and attached the wrong patch file.
---
Store cacheline size during perf record in header, so
that cacheline size can be used for other features, like
sort.
V6: Namhyung feedback and tests
V5: Namhyung feedback
V4: Ian feedback
V3: Rebase off perf-tools-next round two
V2: Rebase off perf-tools-next
Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
Testing:
- Built perf
- Ran record + report with feat enabled
- Ran record + report with feat disabled
Testing example with feat enabled:
$ perf record ./Example
$ perf report --header-only | grep -C 3 cacheline
CPU_DOMAIN_INFO info available, use -I to display
e_machine : 62
e_flags : 0
cacheline size: 64
missing features: TRACING_DATA BUILD_ID BRANCH_STACK GROUP_DESC AUXTRACE \
STAT CLOCKID DIR_FORMAT COMPRESSED CLOCK_DATA
========
Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
---
tools/perf/builtin-inject.c | 1 +
tools/perf/util/env.h | 1 +
| 31 ++++++++++++++++++++++++++++++
| 3 +++
tools/perf/util/sort.c | 38 ++++++++++++++++++++++++++-----------
5 files changed, 63 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 5b29f4296861..11ac7c8c4be3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
case HEADER_CPU_DOMAIN_INFO:
+ case HEADER_CLN_SIZE:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index a4501cbca375..c7052ac1f856 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -112,6 +112,7 @@ struct perf_env {
struct cpu_cache_level *caches;
struct cpu_domain_map **cpu_domain;
int caches_cnt;
+ unsigned int cln_size;
u32 comp_ratio;
u32 comp_ver;
u32 comp_type;
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9142a8ba4019..2a2c64b1a384 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -54,6 +54,7 @@
#include "bpf-event.h"
#include "bpf-utils.h"
#include "clockid.h"
+#include "cacheline.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -1304,6 +1305,20 @@ static int write_cache(struct feat_fd *ff,
return ret;
}
+static int write_cln_size(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ int cln_size = cacheline_size();
+
+
+ if (!cln_size)
+ cln_size = DEFAULT_CACHELINE_SIZE;
+
+ ff->ph->env.cln_size = cln_size;
+
+ return do_write(ff, &cln_size, sizeof(cln_size));
+}
+
static int write_stat(struct feat_fd *ff __maybe_unused,
struct evlist *evlist __maybe_unused)
{
@@ -2261,6 +2276,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
+static void print_cln_size(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
+}
+
static void print_compressed(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
@@ -3154,6 +3174,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
return -1;
}
+static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+
+ if (do_read_u32(ff, &env->cln_size))
+ return -1;
+
+ return 0;
+}
+
static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_session *session;
@@ -3763,6 +3793,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(PMU_CAPS, pmu_caps, false),
FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true),
FEAT_OPR(E_MACHINE, e_machine, false),
+ FEAT_OPR(CLN_SIZE, cln_size, false),
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index cc40ac796f52..8429e856fd7c 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -55,6 +55,7 @@ enum {
HEADER_PMU_CAPS,
HEADER_CPU_DOMAIN_INFO,
HEADER_E_MACHINE,
+ HEADER_CLN_SIZE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
@@ -202,6 +203,8 @@ int write_padded(struct feat_fd *fd, const void *bf,
int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp);
+#define DEFAULT_CACHELINE_SIZE 64
+
/*
* arch specific callback
*/
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 42d5cd7ef4e2..50eb58837b10 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -30,6 +30,7 @@
#include "time-utils.h"
#include "cgroup.h"
#include "machine.h"
+#include "session.h"
#include "trace-event.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -2474,7 +2475,27 @@ struct sort_entry sort_type_offset = {
/* --sort typecln */
-#define DEFAULT_CACHELINE_SIZE 64
+static int
+hist_entry__cln_size(struct hist_entry *he)
+{
+ int ret = 0;
+
+ if (he && he->hists) {
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+
+ if (evsel) {
+ struct perf_session *session = evsel__session(evsel);
+
+ ret = session->header.env.cln_size;
+ }
+ }
+
+ if (!ret || ret < 1)
+ ret = DEFAULT_CACHELINE_SIZE; // avoid div/0 later
+
+ return ret;
+}
static int64_t
sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
@@ -2482,11 +2503,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
struct annotated_data_type *left_type = left->mem_type;
struct annotated_data_type *right_type = right->mem_type;
int64_t left_cln, right_cln;
+ int64_t cln_size_left = hist_entry__cln_size(left);
+ int64_t cln_size_right = hist_entry__cln_size(right);
int64_t ret;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
if (!left_type) {
sort__type_init(left);
@@ -2502,8 +2521,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
if (ret)
return ret;
- left_cln = left->mem_type_off / cln_size;
- right_cln = right->mem_type_off / cln_size;
+ left_cln = left->mem_type_off / cln_size_left;
+ right_cln = right->mem_type_off / cln_size_right;
return left_cln - right_cln;
}
@@ -2511,10 +2530,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width __maybe_unused)
{
struct annotated_data_type *he_type = he->mem_type;
- int cln_size = cacheline_size();
-
- if (cln_size == 0)
- cln_size = DEFAULT_CACHELINE_SIZE;
+ int cln_size = hist_entry__cln_size(he);
return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
he->mem_type_off / cln_size);
--
2.53.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v6] perf utilities: cln_size header
2026-04-04 1:16 ` Ricky Ringler
@ 2026-04-06 5:26 ` Namhyung Kim
0 siblings, 0 replies; 15+ messages in thread
From: Namhyung Kim @ 2026-04-06 5:26 UTC (permalink / raw)
To: Ricky Ringler; +Cc: irogers, mingo, acme, linux-kernel, linux-perf-users
On Sat, Apr 04, 2026 at 01:16:56AM +0000, Ricky Ringler wrote:
> Forgive me for wasting your time with my last message...
> I switched work machines and attached the wrong patch file.
That's fine but you could send it as v7 so that the tools can pick the
latest version without confusion.
>
> ---
Also, adding this mark invalidates the following paragraphs and leave
them out from the commit message.. I'll update it this time but keep
that in mind for the next time.
Thanks,
Namhyung
>
> Store cacheline size during perf record in header, so
> that cacheline size can be used for other features, like
> sort.
>
> V6: Namhyung feedback and tests
> V5: Namhyung feedback
> V4: Ian feedback
> V3: Rebase off perf-tools-next round two
> V2: Rebase off perf-tools-next
>
> Follow-up patch from message ID "aYZiQk6Uftzlb_JV@x1"
>
> Testing:
> - Built perf
> - Ran record + report with feat enabled
> - Ran record + report with feat disabled
>
> Testing example with feat enabled:
> $ perf record ./Example
> $ perf report --header-only | grep -C 3 cacheline
> CPU_DOMAIN_INFO info available, use -I to display
> e_machine : 62
> e_flags : 0
> cacheline size: 64
> missing features: TRACING_DATA BUILD_ID BRANCH_STACK GROUP_DESC AUXTRACE \
> STAT CLOCKID DIR_FORMAT COMPRESSED CLOCK_DATA
> ========
>
> Signed-off-by: Ricky Ringler <ricky.ringler@proton.me>
> ---
> tools/perf/builtin-inject.c | 1 +
> tools/perf/util/env.h | 1 +
> tools/perf/util/header.c | 31 ++++++++++++++++++++++++++++++
> tools/perf/util/header.h | 3 +++
> tools/perf/util/sort.c | 38 ++++++++++++++++++++++++++-----------
> 5 files changed, 63 insertions(+), 11 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 5b29f4296861..11ac7c8c4be3 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2134,6 +2134,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
> case HEADER_HYBRID_TOPOLOGY:
> case HEADER_PMU_CAPS:
> case HEADER_CPU_DOMAIN_INFO:
> + case HEADER_CLN_SIZE:
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index a4501cbca375..c7052ac1f856 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -112,6 +112,7 @@ struct perf_env {
> struct cpu_cache_level *caches;
> struct cpu_domain_map **cpu_domain;
> int caches_cnt;
> + unsigned int cln_size;
> u32 comp_ratio;
> u32 comp_ver;
> u32 comp_type;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 9142a8ba4019..2a2c64b1a384 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -54,6 +54,7 @@
> #include "bpf-event.h"
> #include "bpf-utils.h"
> #include "clockid.h"
> +#include "cacheline.h"
>
> #include <linux/ctype.h>
> #include <internal/lib.h>
> @@ -1304,6 +1305,20 @@ static int write_cache(struct feat_fd *ff,
> return ret;
> }
>
> +static int write_cln_size(struct feat_fd *ff,
> + struct evlist *evlist __maybe_unused)
> +{
> + int cln_size = cacheline_size();
> +
> +
> + if (!cln_size)
> + cln_size = DEFAULT_CACHELINE_SIZE;
> +
> + ff->ph->env.cln_size = cln_size;
> +
> + return do_write(ff, &cln_size, sizeof(cln_size));
> +}
> +
> static int write_stat(struct feat_fd *ff __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
> @@ -2261,6 +2276,11 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
> }
> }
>
> +static void print_cln_size(struct feat_fd *ff, FILE *fp)
> +{
> + fprintf(fp, "# cacheline size: %u\n", ff->ph->env.cln_size);
> +}
> +
> static void print_compressed(struct feat_fd *ff, FILE *fp)
> {
> fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
> @@ -3154,6 +3174,16 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
> return -1;
> }
>
> +static int process_cln_size(struct feat_fd *ff, void *data __maybe_unused)
> +{
> + struct perf_env *env = &ff->ph->env;
> +
> + if (do_read_u32(ff, &env->cln_size))
> + return -1;
> +
> + return 0;
> +}
> +
> static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
> {
> struct perf_session *session;
> @@ -3763,6 +3793,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
> FEAT_OPR(PMU_CAPS, pmu_caps, false),
> FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true),
> FEAT_OPR(E_MACHINE, e_machine, false),
> + FEAT_OPR(CLN_SIZE, cln_size, false),
> };
>
> struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index cc40ac796f52..8429e856fd7c 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -55,6 +55,7 @@ enum {
> HEADER_PMU_CAPS,
> HEADER_CPU_DOMAIN_INFO,
> HEADER_E_MACHINE,
> + HEADER_CLN_SIZE,
> HEADER_LAST_FEATURE,
> HEADER_FEAT_BITS = 256,
> };
> @@ -202,6 +203,8 @@ int write_padded(struct feat_fd *fd, const void *bf,
>
> int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp);
>
> +#define DEFAULT_CACHELINE_SIZE 64
> +
> /*
> * arch specific callback
> */
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index 42d5cd7ef4e2..50eb58837b10 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -30,6 +30,7 @@
> #include "time-utils.h"
> #include "cgroup.h"
> #include "machine.h"
> +#include "session.h"
> #include "trace-event.h"
> #include <linux/kernel.h>
> #include <linux/string.h>
> @@ -2474,7 +2475,27 @@ struct sort_entry sort_type_offset = {
>
> /* --sort typecln */
>
> -#define DEFAULT_CACHELINE_SIZE 64
> +static int
> +hist_entry__cln_size(struct hist_entry *he)
> +{
> + int ret = 0;
> +
> + if (he && he->hists) {
> + struct evsel *evsel = hists_to_evsel(he->hists);
> +
> +
> + if (evsel) {
> + struct perf_session *session = evsel__session(evsel);
> +
> + ret = session->header.env.cln_size;
> + }
> + }
> +
> + if (!ret || ret < 1)
> + ret = DEFAULT_CACHELINE_SIZE; // avoid div/0 later
> +
> + return ret;
> +}
>
> static int64_t
> sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> @@ -2482,11 +2503,9 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> struct annotated_data_type *left_type = left->mem_type;
> struct annotated_data_type *right_type = right->mem_type;
> int64_t left_cln, right_cln;
> + int64_t cln_size_left = hist_entry__cln_size(left);
> + int64_t cln_size_right = hist_entry__cln_size(right);
> int64_t ret;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
>
> if (!left_type) {
> sort__type_init(left);
> @@ -2502,8 +2521,8 @@ sort__typecln_sort(struct hist_entry *left, struct hist_entry *right)
> if (ret)
> return ret;
>
> - left_cln = left->mem_type_off / cln_size;
> - right_cln = right->mem_type_off / cln_size;
> + left_cln = left->mem_type_off / cln_size_left;
> + right_cln = right->mem_type_off / cln_size_right;
> return left_cln - right_cln;
> }
>
> @@ -2511,10 +2530,7 @@ static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width __maybe_unused)
> {
> struct annotated_data_type *he_type = he->mem_type;
> - int cln_size = cacheline_size();
> -
> - if (cln_size == 0)
> - cln_size = DEFAULT_CACHELINE_SIZE;
> + int cln_size = hist_entry__cln_size(he);
>
> return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name,
> he->mem_type_off / cln_size);
> --
> 2.53.0
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2026-04-06 5:26 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-14 4:07 [PATCH] perf utilities: cln_size header Ricky Ringler
2026-03-04 1:25 ` Namhyung Kim
2026-03-05 23:57 ` [PATCH v2] " Ricky Ringler
2026-03-06 0:12 ` Ricky Ringler
2026-03-08 17:20 ` [PATCH v3] " Ricky Ringler
2026-03-09 15:28 ` Ian Rogers
2026-03-21 20:41 ` [PATCH v4] " Ricky Ringler
2026-03-26 22:43 ` Namhyung Kim
2026-03-28 20:04 ` [PATCH v5] " Ricky Ringler
2026-03-30 7:16 ` kernel test robot
2026-03-31 6:37 ` Namhyung Kim
2026-04-03 22:08 ` [PATCH v6] " Ricky Ringler
2026-04-04 0:29 ` Namhyung Kim
2026-04-04 1:16 ` Ricky Ringler
2026-04-06 5:26 ` Namhyung Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox