* [PATCH v3 1/5] perf: Record sample last_period before updating
2025-03-07 20:22 [PATCH v3 0/5] A mechanism for efficient support for per-function metrics mark.barnett
@ 2025-03-07 20:22 ` mark.barnett
2025-03-07 20:22 ` [PATCH v3 2/5] perf: Allow periodic events to alternate between two sample periods mark.barnett
` (3 subsequent siblings)
4 siblings, 0 replies; 15+ messages in thread
From: mark.barnett @ 2025-03-07 20:22 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, irogers
Cc: ben.gainey, deepak.surti, ak, will, james.clark, mark.rutland,
alexander.shishkin, jolsa, adrian.hunter, linux-perf-users,
linux-kernel, linux-arm-kernel, Mark Barnett
From: Mark Barnett <mark.barnett@arm.com>
This change alters the PowerPC and x86 driver implementations to record
the last sample period before the event is updated for the next period.
A common pattern in PMU driver implementations is to have a
"*_event_set_period" function which takes care of updating the various
period-related fields in a perf_event structure. In most cases, the
drivers choose to call this function after initializing a sample data
structure with perf_sample_data_init. The x86 and PowerPC drivers
deviate from this, choosing to update the period before initializing the
sample data. When using an event with an alternate sample period, this
causes an incorrect period to be written to the sample data that gets
reported to userspace.
Link: https://lore.kernel.org/r/20240515193610.2350456-4-yabinc@google.com
Signed-off-by: Mark Barnett <mark.barnett@arm.com>
---
arch/powerpc/perf/core-book3s.c | 3 ++-
arch/powerpc/perf/core-fsl-emb.c | 3 ++-
arch/x86/events/core.c | 4 +++-
arch/x86/events/intel/core.c | 5 ++++-
arch/x86/events/intel/knc.c | 4 +++-
5 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 2b79171ee185..234803441caa 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2231,6 +2231,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct pt_regs *regs)
{
u64 period = event->hw.sample_period;
+ const u64 last_period = event->hw.last_period;
s64 prev, delta, left;
int record = 0;
@@ -2296,7 +2297,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
+ perf_sample_data_init(&data, ~0ULL, last_period);
if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
perf_get_data_addr(event, regs, &data.addr);
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 1a53ab08447c..d2ffcc7021c5 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -590,6 +590,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct pt_regs *regs)
{
u64 period = event->hw.sample_period;
+ const u64 last_period = event->hw.last_period;
s64 prev, delta, left;
int record = 0;
@@ -632,7 +633,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
if (perf_event_overflow(event, &data, regs))
fsl_emb_pmu_stop(event, 0);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8f218ac0d445..a78a8f3244ef 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1673,6 +1673,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_events *cpuc;
struct perf_event *event;
int idx, handled = 0;
+ u64 last_period;
u64 val;
cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1692,6 +1693,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
continue;
event = cpuc->events[idx];
+ last_period = event->hw.last_period;
val = static_call(x86_pmu_update)(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1705,7 +1707,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
if (!static_call(x86_pmu_set_period)(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7601196d1d18..add791c6e3d9 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3116,16 +3116,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ u64 last_period;
handled++;
if (!test_bit(bit, cpuc->active_mask))
continue;
+ last_period = event->hw.last_period;
+
if (!intel_pmu_save_and_restart(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
if (has_branch_stack(event))
intel_pmu_lbr_save_brstack(&data, cpuc, event);
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
index 034a1f6a457c..3e8ec049b46d 100644
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -241,16 +241,18 @@ static int knc_pmu_handle_irq(struct pt_regs *regs)
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ u64 last_period;
handled++;
if (!test_bit(bit, cpuc->active_mask))
continue;
+ last_period = event->hw.last_period;
if (!intel_pmu_save_and_restart(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH v3 2/5] perf: Allow periodic events to alternate between two sample periods
2025-03-07 20:22 [PATCH v3 0/5] A mechanism for efficient support for per-function metrics mark.barnett
2025-03-07 20:22 ` [PATCH v3 1/5] perf: Record sample last_period before updating mark.barnett
@ 2025-03-07 20:22 ` mark.barnett
2025-03-10 12:44 ` Peter Zijlstra
2025-03-07 20:22 ` [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period mark.barnett
` (2 subsequent siblings)
4 siblings, 1 reply; 15+ messages in thread
From: mark.barnett @ 2025-03-07 20:22 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, irogers
Cc: ben.gainey, deepak.surti, ak, will, james.clark, mark.rutland,
alexander.shishkin, jolsa, adrian.hunter, linux-perf-users,
linux-kernel, linux-arm-kernel, Mark Barnett
From: Ben Gainey <ben.gainey@arm.com>
This change modifies perf_event_attr to add a second, alternative
sample period field, and modifies the core perf overflow handling
such that when specified an event will alternate between two sample
periods.
Currently, perf does not provide a mechanism for decoupling the period
over which counters are counted from the period between samples. This is
problematic for building a tool to measure per-function metrics derived
from a sampled counter group. Ideally such a tool wants a very small
sample window in order to correctly attribute the metrics to a given
function, but prefers a larger sample period that provides representative
coverage without excessive probe effect, triggering throttling, or
generating excessive amounts of data.
By alternating between a long and short sample_period and subsequently
discarding the long samples, tools may decouple the period between
samples that the tool cares about from the window of time over which
interesting counts are collected.
It is expected that typically tools would use this feature with the
cycles or instructions events as an approximation for time, but no
restrictions are applied to which events this can be applied to.
Signed-off-by: Ben Gainey <ben.gainey@arm.com>
Signed-off-by: Mark Barnett <mark.barnett@arm.com>
---
include/linux/perf_event.h | 5 +++++
include/uapi/linux/perf_event.h | 3 +++
kernel/events/core.c | 39 ++++++++++++++++++++++++++++++++-
3 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8333f132f4a9..99ba72c8fb6d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -276,6 +276,11 @@ struct hw_perf_event {
*/
u64 freq_time_stamp;
u64 freq_count_stamp;
+
+ /*
+ * Indicates that the alternative sample period is used
+ */
+ bool using_alt_sample_period;
#endif
};
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 0524d541d4e3..499a8673df8e 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -379,6 +379,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
+#define PERF_ATTR_SIZE_VER9 144 /* add: alt_sample_period */
/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -531,6 +532,8 @@ struct perf_event_attr {
__u64 sig_data;
__u64 config3; /* extension of config2 */
+
+ __u64 alt_sample_period;
};
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bcb09e011e9e..7ec8ec6ba7ef 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4178,6 +4178,8 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
s64 period, sample_period;
s64 delta;
+ WARN_ON_ONCE(hwc->using_alt_sample_period);
+
period = perf_calculate_period(event, nsec, count);
delta = (s64)(period - hwc->sample_period);
@@ -9894,6 +9896,7 @@ static int __perf_event_overflow(struct perf_event *event,
int throttle, struct perf_sample_data *data,
struct pt_regs *regs)
{
+ struct hw_perf_event *hwc = &event->hw;
int events = atomic_read(&event->event_limit);
int ret = 0;
@@ -9913,6 +9916,18 @@ static int __perf_event_overflow(struct perf_event *event,
!bpf_overflow_handler(event, data, regs))
goto out;
+ /*
+ * Swap the sample period to the alternative period
+ */
+ if (event->attr.alt_sample_period) {
+ bool using_alt = hwc->using_alt_sample_period;
+ u64 sample_period = (using_alt ? event->attr.sample_period
+ : event->attr.alt_sample_period);
+
+ hwc->sample_period = sample_period;
+ hwc->using_alt_sample_period = !using_alt;
+ }
+
/*
* XXX event_limit might not quite work as expected on inherited
* events
@@ -12335,9 +12350,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (attr->freq && attr->sample_freq)
hwc->sample_period = 1;
hwc->last_period = hwc->sample_period;
-
local64_set(&hwc->period_left, hwc->sample_period);
+ /*
+ * alt_sample_period cannot be used with freq
+ */
+ if (attr->freq && attr->alt_sample_period)
+ goto err_ns;
+
+ if (attr->alt_sample_period) {
+ hwc->sample_period = attr->alt_sample_period;
+ hwc->using_alt_sample_period = true;
+ }
+
/*
* We do not support PERF_SAMPLE_READ on inherited events unless
* PERF_SAMPLE_TID is also selected, which allows inherited events to
@@ -12807,9 +12832,21 @@ SYSCALL_DEFINE5(perf_event_open,
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
+ if (attr.alt_sample_period)
+ return -EINVAL;
} else {
if (attr.sample_period & (1ULL << 63))
return -EINVAL;
+ if (attr.alt_sample_period) {
+ if (!attr.sample_period)
+ return -EINVAL;
+ if (attr.alt_sample_period & (1ULL << 63))
+ return -EINVAL;
+ if (attr.alt_sample_period > attr.sample_period)
+ return -EINVAL;
+ if (attr.alt_sample_period == attr.sample_period)
+ attr.alt_sample_period = 0;
+ }
}
/* Only privileged users can get physical addresses */
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v3 2/5] perf: Allow periodic events to alternate between two sample periods
2025-03-07 20:22 ` [PATCH v3 2/5] perf: Allow periodic events to alternate between two sample periods mark.barnett
@ 2025-03-10 12:44 ` Peter Zijlstra
0 siblings, 0 replies; 15+ messages in thread
From: Peter Zijlstra @ 2025-03-10 12:44 UTC (permalink / raw)
To: mark.barnett
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On Fri, Mar 07, 2025 at 08:22:44PM +0000, mark.barnett@arm.com wrote:
> From: Ben Gainey <ben.gainey@arm.com>
>
> This change modifies perf_event_attr to add a second, alternative
> sample period field, and modifies the core perf overflow handling
> such that when specified an event will alternate between two sample
> periods.
>
> Currently, perf does not provide a mechanism for decoupling the period
> over which counters are counted from the period between samples. This is
> problematic for building a tool to measure per-function metrics derived
> from a sampled counter group. Ideally such a tool wants a very small
> sample window in order to correctly attribute the metrics to a given
> function, but prefers a larger sample period that provides representative
> coverage without excessive probe effect, triggering throttling, or
> generating excessive amounts of data.
>
> By alternating between a long and short sample_period and subsequently
> discarding the long samples, tools may decouple the period between
> samples that the tool cares about from the window of time over which
> interesting counts are collected.
>
> It is expected that typically tools would use this feature with the
> cycles or instructions events as an approximation for time, but no
> restrictions are applied to which events this can be applied to.
So you do add the constraint that 'alt_sample_period < sample_period'
but there is no natural reason for this to be so.
Additionally, this way the total period ends up being 'sample_period +
alt_sample_period'.
Would not a more natural way to express things be:
p1 = sample_period - alt_sample_period;
p2 = alt_sample_period;
This way you retain the total period to be 'sample_period' and naturally
get the constraint: 'alt_sample_period < sample_period'.
That is; I'm somewhat confused by the state of things; it doesn't seem
consistent.
(Also note that this alternative form might actually work in combination
with attr.freq set -- although that has a number of 'fun' details I'm
sure).
> Signed-off-by: Ben Gainey <ben.gainey@arm.com>
> Signed-off-by: Mark Barnett <mark.barnett@arm.com>
> ---
> include/linux/perf_event.h | 5 +++++
> include/uapi/linux/perf_event.h | 3 +++
> kernel/events/core.c | 39 ++++++++++++++++++++++++++++++++-
> 3 files changed, 46 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 8333f132f4a9..99ba72c8fb6d 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -276,6 +276,11 @@ struct hw_perf_event {
> */
> u64 freq_time_stamp;
> u64 freq_count_stamp;
> +
> + /*
> + * Indicates that the alternative sample period is used
> + */
> + bool using_alt_sample_period;
There's a 4 byte hole in this structure if you look; also please use a
flag, sizeof(_Bool) is ABI dependent.
> #endif
> };
>
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 0524d541d4e3..499a8673df8e 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -379,6 +379,7 @@ enum perf_event_read_format {
> #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
> #define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
> #define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
> +#define PERF_ATTR_SIZE_VER9 144 /* add: alt_sample_period */
>
> /*
> * Hardware event_id to monitor via a performance monitoring event:
> @@ -531,6 +532,8 @@ struct perf_event_attr {
> __u64 sig_data;
>
> __u64 config3; /* extension of config2 */
> +
> + __u64 alt_sample_period;
> };
>
> /*
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index bcb09e011e9e..7ec8ec6ba7ef 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4178,6 +4178,8 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
> s64 period, sample_period;
> s64 delta;
>
> + WARN_ON_ONCE(hwc->using_alt_sample_period);
Groan; so that bit keeps flipping in and off, and statistically we'll
warn, but urgh.
> period = perf_calculate_period(event, nsec, count);
>
> delta = (s64)(period - hwc->sample_period);
> @@ -9894,6 +9896,7 @@ static int __perf_event_overflow(struct perf_event *event,
> int throttle, struct perf_sample_data *data,
> struct pt_regs *regs)
> {
> + struct hw_perf_event *hwc = &event->hw;
> int events = atomic_read(&event->event_limit);
> int ret = 0;
>
> @@ -9913,6 +9916,18 @@ static int __perf_event_overflow(struct perf_event *event,
> !bpf_overflow_handler(event, data, regs))
> goto out;
>
> + /*
> + * Swap the sample period to the alternative period
> + */
> + if (event->attr.alt_sample_period) {
> + bool using_alt = hwc->using_alt_sample_period;
> + u64 sample_period = (using_alt ? event->attr.sample_period
> + : event->attr.alt_sample_period);
> +
> + hwc->sample_period = sample_period;
> + hwc->using_alt_sample_period = !using_alt;
> + }
> +
> /*
> * XXX event_limit might not quite work as expected on inherited
> * events
> @@ -12335,9 +12350,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
> if (attr->freq && attr->sample_freq)
> hwc->sample_period = 1;
> hwc->last_period = hwc->sample_period;
> -
> local64_set(&hwc->period_left, hwc->sample_period);
>
> + /*
> + * alt_sample_period cannot be used with freq
> + */
> + if (attr->freq && attr->alt_sample_period)
> + goto err_ns;
How can this happen? This case has already been filtered in
perf_event_open() below, no?
Also, this doesn't apply to tip/perf/core, someone went and changed
things...
> +
> + if (attr->alt_sample_period) {
> + hwc->sample_period = attr->alt_sample_period;
> + hwc->using_alt_sample_period = true;
> + }
> +
> /*
> * We do not support PERF_SAMPLE_READ on inherited events unless
> * PERF_SAMPLE_TID is also selected, which allows inherited events to
> @@ -12807,9 +12832,21 @@ SYSCALL_DEFINE5(perf_event_open,
> if (attr.freq) {
> if (attr.sample_freq > sysctl_perf_event_sample_rate)
> return -EINVAL;
> + if (attr.alt_sample_period)
> + return -EINVAL;
> } else {
> if (attr.sample_period & (1ULL << 63))
> return -EINVAL;
> + if (attr.alt_sample_period) {
> + if (!attr.sample_period)
> + return -EINVAL;
> + if (attr.alt_sample_period & (1ULL << 63))
> + return -EINVAL;
> + if (attr.alt_sample_period > attr.sample_period)
> + return -EINVAL;
> + if (attr.alt_sample_period == attr.sample_period)
> + attr.alt_sample_period = 0;
> + }
> }
>
> /* Only privileged users can get physical addresses */
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-07 20:22 [PATCH v3 0/5] A mechanism for efficient support for per-function metrics mark.barnett
2025-03-07 20:22 ` [PATCH v3 1/5] perf: Record sample last_period before updating mark.barnett
2025-03-07 20:22 ` [PATCH v3 2/5] perf: Allow periodic events to alternate between two sample periods mark.barnett
@ 2025-03-07 20:22 ` mark.barnett
2025-03-10 12:47 ` Peter Zijlstra
2025-03-11 11:31 ` Peter Zijlstra
2025-03-07 20:22 ` [PATCH v3 4/5] tools/perf: Modify event parser to support alt-period term mark.barnett
2025-03-07 20:22 ` [PATCH v3 5/5] tools/perf: Modify event parser to support alt-period-jitter term mark.barnett
4 siblings, 2 replies; 15+ messages in thread
From: mark.barnett @ 2025-03-07 20:22 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, irogers
Cc: ben.gainey, deepak.surti, ak, will, james.clark, mark.rutland,
alexander.shishkin, jolsa, adrian.hunter, linux-perf-users,
linux-kernel, linux-arm-kernel, Mark Barnett
From: Ben Gainey <ben.gainey@arm.com>
This change modifies the core perf overflow handler, adding some small
random jitter to each sample period whenever an event switches between the
two alternate sample periods. A new flag is added to perf_event_attr to
opt into this behaviour.
This change follows the discussion in [1], where it is recognized that it
may be possible for certain patterns of execution to end up with biased
results.
[1] https://lore.kernel.org/linux-perf-users/Zc24eLqZycmIg3d2@tassilo/
Signed-off-by: Ben Gainey <ben.gainey@arm.com>
Signed-off-by: Mark Barnett <mark.barnett@arm.com>
---
include/uapi/linux/perf_event.h | 7 ++++++-
kernel/events/core.c | 9 ++++++++-
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 499a8673df8e..c0076ce8f80a 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -461,7 +461,12 @@ struct perf_event_attr {
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
- __reserved_1 : 26;
+ /*
+ * Add a limited amount of jitter on each alternate period, where
+ * the jitter is between [0, (2<<jitter_alt_period) - 1]
+ */
+ jitter_alt_period : 3,
+ __reserved_1 : 23;
union {
__u32 wakeup_events; /* wakeup every n events */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7ec8ec6ba7ef..be271e21cd06 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -15,6 +15,7 @@
#include <linux/idr.h>
#include <linux/file.h>
#include <linux/poll.h>
+#include <linux/random.h>
#include <linux/slab.h>
#include <linux/hash.h>
#include <linux/tick.h>
@@ -9922,7 +9923,10 @@ static int __perf_event_overflow(struct perf_event *event,
if (event->attr.alt_sample_period) {
bool using_alt = hwc->using_alt_sample_period;
u64 sample_period = (using_alt ? event->attr.sample_period
- : event->attr.alt_sample_period);
+ : event->attr.alt_sample_period)
+ + (event->attr.jitter_alt_period
+ ? get_random_u32_below(2 << event->attr.jitter_alt_period)
+ : 0);
hwc->sample_period = sample_period;
hwc->using_alt_sample_period = !using_alt;
@@ -12849,6 +12853,9 @@ SYSCALL_DEFINE5(perf_event_open,
}
}
+ if (attr.jitter_alt_period && !attr.alt_sample_period)
+ return -EINVAL;
+
/* Only privileged users can get physical addresses */
if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
err = perf_allow_kernel(&attr);
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-07 20:22 ` [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period mark.barnett
@ 2025-03-10 12:47 ` Peter Zijlstra
2025-03-10 16:27 ` Mark Barnett
2025-03-11 11:31 ` Peter Zijlstra
1 sibling, 1 reply; 15+ messages in thread
From: Peter Zijlstra @ 2025-03-10 12:47 UTC (permalink / raw)
To: mark.barnett
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On Fri, Mar 07, 2025 at 08:22:45PM +0000, mark.barnett@arm.com wrote:
> @@ -9922,7 +9923,10 @@ static int __perf_event_overflow(struct perf_event *event,
> if (event->attr.alt_sample_period) {
> bool using_alt = hwc->using_alt_sample_period;
> u64 sample_period = (using_alt ? event->attr.sample_period
> - : event->attr.alt_sample_period);
> + : event->attr.alt_sample_period)
> + + (event->attr.jitter_alt_period
> + ? get_random_u32_below(2 << event->attr.jitter_alt_period)
> + : 0);
So, ... this here is NMI context, right? Have you looked at the guts of
get_random_u32_below() ?
I would strongly suggest you go do so.
>
> hwc->sample_period = sample_period;
> hwc->using_alt_sample_period = !using_alt;
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-10 12:47 ` Peter Zijlstra
@ 2025-03-10 16:27 ` Mark Barnett
0 siblings, 0 replies; 15+ messages in thread
From: Mark Barnett @ 2025-03-10 16:27 UTC (permalink / raw)
To: Peter Zijlstra
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On 3/10/25 12:47, Peter Zijlstra wrote:
> On Fri, Mar 07, 2025 at 08:22:45PM +0000, mark.barnett@arm.com wrote:
>> @@ -9922,7 +9923,10 @@ static int __perf_event_overflow(struct perf_event *event,
>> if (event->attr.alt_sample_period) {
>> bool using_alt = hwc->using_alt_sample_period;
>> u64 sample_period = (using_alt ? event->attr.sample_period
>> - : event->attr.alt_sample_period);
>> + : event->attr.alt_sample_period)
>> + + (event->attr.jitter_alt_period
>> + ? get_random_u32_below(2 << event->attr.jitter_alt_period)
>> + : 0);
>
> So, ... this here is NMI context, right? Have you looked at the guts of
> get_random_u32_below() ?
>
> I would strongly suggest you go do so.
>
Good catch. I think a pseudo-random generator would be fine here and it
looks like the implementation of prandom is safe to use in an interrupt
context. I can change to use that.
>>
>> hwc->sample_period = sample_period;
>> hwc->using_alt_sample_period = !using_alt;
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-07 20:22 ` [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period mark.barnett
2025-03-10 12:47 ` Peter Zijlstra
@ 2025-03-11 11:31 ` Peter Zijlstra
2025-03-11 11:35 ` Peter Zijlstra
` (2 more replies)
1 sibling, 3 replies; 15+ messages in thread
From: Peter Zijlstra @ 2025-03-11 11:31 UTC (permalink / raw)
To: mark.barnett
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On Fri, Mar 07, 2025 at 08:22:45PM +0000, mark.barnett@arm.com wrote:
> From: Ben Gainey <ben.gainey@arm.com>
>
> This change modifies the core perf overflow handler, adding some small
> random jitter to each sample period whenever an event switches between the
> two alternate sample periods. A new flag is added to perf_event_attr to
> opt into this behaviour.
>
> This change follows the discussion in [1], where it is recognized that it
> may be possible for certain patterns of execution to end up with biased
> results.
>
> [1] https://lore.kernel.org/linux-perf-users/Zc24eLqZycmIg3d2@tassilo/
>
> Signed-off-by: Ben Gainey <ben.gainey@arm.com>
> Signed-off-by: Mark Barnett <mark.barnett@arm.com>
> ---
> include/uapi/linux/perf_event.h | 7 ++++++-
> kernel/events/core.c | 9 ++++++++-
> 2 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 499a8673df8e..c0076ce8f80a 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -461,7 +461,12 @@ struct perf_event_attr {
> inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
> remove_on_exec : 1, /* event is removed from task on exec */
> sigtrap : 1, /* send synchronous SIGTRAP on event */
> - __reserved_1 : 26;
> + /*
> + * Add a limited amount of jitter on each alternate period, where
> + * the jitter is between [0, (2<<jitter_alt_period) - 1]
> + */
> + jitter_alt_period : 3,
> + __reserved_1 : 23;
So; I've been thinking about this interface.
I think I prefer you keep the existing sample_period/sample_freq working
as is and simply modulate with random and high-freq components.
A very little like so..
I've made the hf_sample_period 32bit since I figured that ought to be
enough -- you're aiming at very short periods after all. But there's
enough unused bits left.
So this has sample_period or sample_freq compute hwc->sample_period_base
which is first modified with random such that the average is exactly
sample_period_base (assuming a flat distribution).
This means that sample_period_base is still the right number to use for
computing freq based things. Additionally, have the 'extra' interrupt
ignored for adaptive period crud.
Also, someone needs to consider the eBPF hook and what to do with it.
I've kept the ordering as per this series, but I suspect it's wrong and
we want this before the BPF hook. Please think about this and explicitly
mention this in the next series.
Anyway, very much a sketch of things, incomplete and not been near a
compiler.
---
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 76f4265efee9..c5dd6442e96f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -229,6 +229,10 @@ struct hw_perf_event {
#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
#define PERF_HES_ARCH 0x04
+#define PERF_HES_HF_ON 0x10 /* using high-fred sampling */
+#define PERF_HES_HF_SAMPLE 0x20
+#define PERF_HES_HF_RAND 0x40
+
int state;
/*
@@ -241,6 +245,7 @@ struct hw_perf_event {
* The period to start the next sample with.
*/
u64 sample_period;
+ u64 sample_period_base;
union {
struct { /* Sampling */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 0524d541d4e3..8dbe027f93f1 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -379,6 +379,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
+#define PERF_ATTR_SIZE_VER9 144 /* add: hf_sample */
/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -531,6 +532,14 @@ struct perf_event_attr {
__u64 sig_data;
__u64 config3; /* extension of config2 */
+ union {
+ __u64 hf_sample;
+ struct {
+ __u64 hf_sample_period : 32,
+ hf_sample_rand : 4,
+ __reserved_4 : 28;
+ };
+ };
};
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b87a5ac42ce2..e5a93edf3b5f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8,6 +8,7 @@
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
+#include "linux/random.h"
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/cpu.h>
@@ -55,6 +56,7 @@
#include <linux/pgtable.h>
#include <linux/buildid.h>
#include <linux/task_work.h>
+#include <linux/prandom.h>
#include "internal.h"
@@ -443,6 +445,8 @@ static cpumask_var_t perf_online_pkg_mask;
static cpumask_var_t perf_online_sys_mask;
static struct kmem_cache *perf_event_cache;
+static struct rnd_state perf_rand;
+
/*
* perf event paranoia level:
* -1 - not paranoid at all
@@ -4233,19 +4237,19 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
period = perf_calculate_period(event, nsec, count);
- delta = (s64)(period - hwc->sample_period);
+ delta = (s64)(period - hwc->sample_period_base);
if (delta >= 0)
delta += 7;
else
delta -= 7;
delta /= 8; /* low pass filter */
- sample_period = hwc->sample_period + delta;
+ sample_period = hwc->sample_period_base + delta;
if (!sample_period)
sample_period = 1;
- hwc->sample_period = sample_period;
+ hwc->sample_period_base = sample_period;
if (local64_read(&hwc->period_left) > 8*sample_period) {
if (disable)
@@ -4490,6 +4494,8 @@ void perf_event_task_tick(void)
if (ctx)
perf_adjust_freq_unthr_context(ctx, !!throttled);
rcu_read_unlock();
+
+ prandom_seed_state(&perf_rand, get_random_u64());
}
static int event_enable_on_exec(struct perf_event *event,
@@ -9979,6 +9985,8 @@ static int __perf_event_overflow(struct perf_event *event,
int throttle, struct perf_sample_data *data,
struct pt_regs *regs)
{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 sample_period;
int events = atomic_read(&event->event_limit);
int ret = 0;
@@ -9989,15 +9997,50 @@ static int __perf_event_overflow(struct perf_event *event,
if (unlikely(!is_sampling_event(event)))
return 0;
- ret = __perf_event_account_interrupt(event, throttle);
+ /*
+ * High Freq samples are injected inside the larger period:
+ *
+ * |------------|-|------------|-|
+ * P0 HF P1 HF
+ *
+ * By ignoring the HF samples, we measure the actual period.
+ */
+ if (!(hwc->state & PERF_HES_HF_SAMPLE))
+ ret = __perf_event_account_interrupt(event, throttle);
if (event->attr.aux_pause)
perf_event_aux_pause(event->aux_event, true);
+ /* XXX interaction between HF samples and BPF */
if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
!bpf_overflow_handler(event, data, regs))
goto out;
+ sample_period = hwc->sample_period_base;
+ if (hwc->state & PERF_HES_HF_RAND) {
+ u64 rand = 1 << event->attr.hf_sample_rand;
+ sample_period -= rand / 2;
+ sample_period += prandom_u32_state(&perf_rand) & (rand - 1);
+ }
+ if (hwc->state & PERF_HES_HF_ON) {
+ u64 hf_sample_period = event->attr.hf_sample_period;
+
+ if (sample_period < hf_sample_period) {
+ hwc->state &= ~PERF_HES_HF_ON;
+ goto set_period;
+ }
+
+ if (!(hwc->state & PERF_HES_HF_SAMPLE)) {
+ hwc->sample_period -= hf_sample_period;
+ hwc->state |= PERF_HES_HF_SAMPLE;
+ } else {
+ hwc->sample_period = hf_sample_period;
+ hwc->state &= ~PERF_HES_HF_SAMPLE;
+ }
+ }
+set_period:
+ hwc->sample_period = sample_period;
+
/*
* XXX event_limit might not quite work as expected on inherited
* events
@@ -12458,8 +12501,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
hwc = &event->hw;
hwc->sample_period = attr->sample_period;
- if (attr->freq && attr->sample_freq)
+ hwc->sample_period_base = attr->sample_period;
+ if (attr->freq && attr->sample_freq) {
hwc->sample_period = 1;
+ hwc->sample_period_base = 1;
+ }
hwc->last_period = hwc->sample_period;
local64_set(&hwc->period_left, hwc->sample_period);
@@ -13824,6 +13870,7 @@ inherit_event(struct perf_event *parent_event,
struct hw_perf_event *hwc = &child_event->hw;
hwc->sample_period = sample_period;
+ hwc->sample_period_base = sample_period;
hwc->last_period = sample_period;
local64_set(&hwc->period_left, sample_period);
@@ -14279,6 +14326,8 @@ void __init perf_event_init(void)
{
int ret;
+ prandom_seed_state(&perf_rand, get_random_u64());
+
idr_init(&pmu_idr);
perf_event_init_all_cpus();
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-11 11:31 ` Peter Zijlstra
@ 2025-03-11 11:35 ` Peter Zijlstra
2025-03-12 10:44 ` Peter Zijlstra
2025-03-11 11:37 ` Peter Zijlstra
2025-03-11 17:22 ` Mark Barnett
2 siblings, 1 reply; 15+ messages in thread
From: Peter Zijlstra @ 2025-03-11 11:35 UTC (permalink / raw)
To: mark.barnett
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On Tue, Mar 11, 2025 at 12:31:29PM +0100, Peter Zijlstra wrote:
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index b87a5ac42ce2..e5a93edf3b5f 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -8,6 +8,7 @@
> * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
> */
>
> +#include "linux/random.h"
> #include <linux/fs.h>
> #include <linux/mm.h>
> #include <linux/cpu.h>
Argh, this is neovim trying to be 'helpful'. If anybody reading this
knows how to make it stop adding headers, please let me know, its
driving me nuts.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-11 11:35 ` Peter Zijlstra
@ 2025-03-12 10:44 ` Peter Zijlstra
0 siblings, 0 replies; 15+ messages in thread
From: Peter Zijlstra @ 2025-03-12 10:44 UTC (permalink / raw)
To: mark.barnett
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On Tue, Mar 11, 2025 at 12:35:30PM +0100, Peter Zijlstra wrote:
> On Tue, Mar 11, 2025 at 12:31:29PM +0100, Peter Zijlstra wrote:
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index b87a5ac42ce2..e5a93edf3b5f 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -8,6 +8,7 @@
> > * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
> > */
> >
> > +#include "linux/random.h"
> > #include <linux/fs.h>
> > #include <linux/mm.h>
> > #include <linux/cpu.h>
>
> Argh, this is neovim trying to be 'helpful'. If anybody reading this
> knows how to make it stop adding headers, please let me know, its
> driving me nuts.
Adding this cmd thing to the lspconfig like so:
require 'lspconfig'.clangd.setup() {
cmd = {
"clangd",
"--header-insertion=never",
},
}
seems to do the trick.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-11 11:31 ` Peter Zijlstra
2025-03-11 11:35 ` Peter Zijlstra
@ 2025-03-11 11:37 ` Peter Zijlstra
2025-03-11 17:22 ` Mark Barnett
2 siblings, 0 replies; 15+ messages in thread
From: Peter Zijlstra @ 2025-03-11 11:37 UTC (permalink / raw)
To: mark.barnett
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On Tue, Mar 11, 2025 at 12:31:29PM +0100, Peter Zijlstra wrote:
> + sample_period = hwc->sample_period_base;
> + if (hwc->state & PERF_HES_HF_RAND) {
> + u64 rand = 1 << event->attr.hf_sample_rand;
> + sample_period -= rand / 2;
> + sample_period += prandom_u32_state(&perf_rand) & (rand - 1);
> + }
> + if (hwc->state & PERF_HES_HF_ON) {
> + u64 hf_sample_period = event->attr.hf_sample_period;
> +
> + if (sample_period < hf_sample_period) {
> + hwc->state &= ~PERF_HES_HF_ON;
> + goto set_period;
> + }
> +
> + if (!(hwc->state & PERF_HES_HF_SAMPLE)) {
> + hwc->sample_period -= hf_sample_period;
> + hwc->state |= PERF_HES_HF_SAMPLE;
> + } else {
> + hwc->sample_period = hf_sample_period;
> + hwc->state &= ~PERF_HES_HF_SAMPLE;
and obviously this should be the local sample_period modified above, not
the hwc one.
> + }
> + }
> +set_period:
> + hwc->sample_period = sample_period;
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-11 11:31 ` Peter Zijlstra
2025-03-11 11:35 ` Peter Zijlstra
2025-03-11 11:37 ` Peter Zijlstra
@ 2025-03-11 17:22 ` Mark Barnett
2025-03-12 9:39 ` Peter Zijlstra
2 siblings, 1 reply; 15+ messages in thread
From: Mark Barnett @ 2025-03-11 17:22 UTC (permalink / raw)
To: Peter Zijlstra
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On 3/11/25 11:31, Peter Zijlstra wrote:
> On Fri, Mar 07, 2025 at 08:22:45PM +0000, mark.barnett@arm.com wrote:
>> From: Ben Gainey <ben.gainey@arm.com>
>>
>> This change modifies the core perf overflow handler, adding some small
>> random jitter to each sample period whenever an event switches between the
>> two alternate sample periods. A new flag is added to perf_event_attr to
>> opt into this behaviour.
>>
>> This change follows the discussion in [1], where it is recognized that it
>> may be possible for certain patterns of execution to end up with biased
>> results.
>>
>> [1] https://lore.kernel.org/linux-perf-users/Zc24eLqZycmIg3d2@tassilo/
>>
>> Signed-off-by: Ben Gainey <ben.gainey@arm.com>
>> Signed-off-by: Mark Barnett <mark.barnett@arm.com>
>> ---
>> include/uapi/linux/perf_event.h | 7 ++++++-
>> kernel/events/core.c | 9 ++++++++-
>> 2 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index 499a8673df8e..c0076ce8f80a 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -461,7 +461,12 @@ struct perf_event_attr {
>> inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
>> remove_on_exec : 1, /* event is removed from task on exec */
>> sigtrap : 1, /* send synchronous SIGTRAP on event */
>> - __reserved_1 : 26;
>> + /*
>> + * Add a limited amount of jitter on each alternate period, where
>> + * the jitter is between [0, (2<<jitter_alt_period) - 1]
>> + */
>> + jitter_alt_period : 3,
>> + __reserved_1 : 23;
>
> So; I've been thinking about this interface.
>
> I think I prefer you keep the existing sample_period/sample_freq working
> as is and simply modulate with random and high-freq components.
>
> A very little like so..
>
> I've made the hf_sample_period 32bit since I figured that ought to be
> enough -- you're aiming at very short periods after all. But there's
> enough unused bits left.
>
> So this has sample_period or sample_freq compute hwc->sample_period_base
> which is first modified with random such that the average is exactly
> sample_period_base (assuming a flat distribution).
>
> This means that sample_period_base is still the right number to use for
> computing freq based things. Additionally, have the 'extra' interrupt
> ignored for adaptive period crud.
>
> Also, someone needs to consider the eBPF hook and what to do with it.
> I've kept the ordering as per this series, but I suspect it's wrong and
> we want this before the BPF hook. Please think about this and explicitly
> mention this in the next series.
>
> Anyway, very much a sketch of things, incomplete and not been near a
> compiler.
>
>
Thanks, Peter!
OK, I see what you mean. Packing the fields into hf_sample makes sense.
I'll have a look at the eBPF hook and see if we need to do anything. The
sample period is always stored in perf_sample_data so it's technically
possible for eBPF programs to identify the high-frequency ones, but it's
not a great API. Maybe we should have an explicit flag.
I have one question about interrupt accounting, below...
>
> ---
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 76f4265efee9..c5dd6442e96f 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -229,6 +229,10 @@ struct hw_perf_event {
> #define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
> #define PERF_HES_ARCH 0x04
>
> +#define PERF_HES_HF_ON 0x10 /* using high-fred sampling */
> +#define PERF_HES_HF_SAMPLE 0x20
> +#define PERF_HES_HF_RAND 0x40
> +
> int state;
>
> /*
> @@ -241,6 +245,7 @@ struct hw_perf_event {
> * The period to start the next sample with.
> */
> u64 sample_period;
> + u64 sample_period_base;
>
> union {
> struct { /* Sampling */
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 0524d541d4e3..8dbe027f93f1 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -379,6 +379,7 @@ enum perf_event_read_format {
> #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
> #define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
> #define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
> +#define PERF_ATTR_SIZE_VER9 144 /* add: hf_sample */
>
> /*
> * Hardware event_id to monitor via a performance monitoring event:
> @@ -531,6 +532,14 @@ struct perf_event_attr {
> __u64 sig_data;
>
> __u64 config3; /* extension of config2 */
> + union {
> + __u64 hf_sample;
> + struct {
> + __u64 hf_sample_period : 32,
> + hf_sample_rand : 4,
> + __reserved_4 : 28;
> + };
> + };
> };
>
> /*
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index b87a5ac42ce2..e5a93edf3b5f 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -8,6 +8,7 @@
> * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
> */
>
> +#include "linux/random.h"
> #include <linux/fs.h>
> #include <linux/mm.h>
> #include <linux/cpu.h>
> @@ -55,6 +56,7 @@
> #include <linux/pgtable.h>
> #include <linux/buildid.h>
> #include <linux/task_work.h>
> +#include <linux/prandom.h>
>
> #include "internal.h"
>
> @@ -443,6 +445,8 @@ static cpumask_var_t perf_online_pkg_mask;
> static cpumask_var_t perf_online_sys_mask;
> static struct kmem_cache *perf_event_cache;
>
> +static struct rnd_state perf_rand;
> +
> /*
> * perf event paranoia level:
> * -1 - not paranoid at all
> @@ -4233,19 +4237,19 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
>
> period = perf_calculate_period(event, nsec, count);
>
> - delta = (s64)(period - hwc->sample_period);
> + delta = (s64)(period - hwc->sample_period_base);
> if (delta >= 0)
> delta += 7;
> else
> delta -= 7;
> delta /= 8; /* low pass filter */
>
> - sample_period = hwc->sample_period + delta;
> + sample_period = hwc->sample_period_base + delta;
>
> if (!sample_period)
> sample_period = 1;
>
> - hwc->sample_period = sample_period;
> + hwc->sample_period_base = sample_period;
>
> if (local64_read(&hwc->period_left) > 8*sample_period) {
> if (disable)
> @@ -4490,6 +4494,8 @@ void perf_event_task_tick(void)
> if (ctx)
> perf_adjust_freq_unthr_context(ctx, !!throttled);
> rcu_read_unlock();
> +
> + prandom_seed_state(&perf_rand, get_random_u64());
> }
>
> static int event_enable_on_exec(struct perf_event *event,
> @@ -9979,6 +9985,8 @@ static int __perf_event_overflow(struct perf_event *event,
> int throttle, struct perf_sample_data *data,
> struct pt_regs *regs)
> {
> + struct hw_perf_event *hwc = &event->hw;
> + u64 sample_period;
> int events = atomic_read(&event->event_limit);
> int ret = 0;
>
> @@ -9989,15 +9997,50 @@ static int __perf_event_overflow(struct perf_event *event,
> if (unlikely(!is_sampling_event(event)))
> return 0;
>
> - ret = __perf_event_account_interrupt(event, throttle);
> + /*
> + * High Freq samples are injected inside the larger period:
> + *
> + * |------------|-|------------|-|
> + * P0 HF P1 HF
> + *
> + * By ignoring the HF samples, we measure the actual period.
> + */
> + if (!(hwc->state & PERF_HES_HF_SAMPLE))
> + ret = __perf_event_account_interrupt(event, throttle);
>
The high-frequency samples should still contribute to interrupt
accounting/throttling, right? We'd just need to put guards around the
adaptive period stuff so that HF samples don't contribute to the
frequency training.
> if (event->attr.aux_pause)
> perf_event_aux_pause(event->aux_event, true);
>
> + /* XXX interaction between HF samples and BPF */
> if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
> !bpf_overflow_handler(event, data, regs))
> goto out;
>
> + sample_period = hwc->sample_period_base;
> + if (hwc->state & PERF_HES_HF_RAND) {
> + u64 rand = 1 << event->attr.hf_sample_rand;
> + sample_period -= rand / 2;
> + sample_period += prandom_u32_state(&perf_rand) & (rand - 1);
> + }
> + if (hwc->state & PERF_HES_HF_ON) {
> + u64 hf_sample_period = event->attr.hf_sample_period;
> +
> + if (sample_period < hf_sample_period) {
> + hwc->state &= ~PERF_HES_HF_ON;
> + goto set_period;
> + }
> +
> + if (!(hwc->state & PERF_HES_HF_SAMPLE)) {
> + hwc->sample_period -= hf_sample_period;
> + hwc->state |= PERF_HES_HF_SAMPLE;
> + } else {
> + hwc->sample_period = hf_sample_period;
> + hwc->state &= ~PERF_HES_HF_SAMPLE;
> + }
> + }
> +set_period:
> + hwc->sample_period = sample_period;
> +
> /*
> * XXX event_limit might not quite work as expected on inherited
> * events
> @@ -12458,8 +12501,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
>
> hwc = &event->hw;
> hwc->sample_period = attr->sample_period;
> - if (attr->freq && attr->sample_freq)
> + hwc->sample_period_base = attr->sample_period;
> + if (attr->freq && attr->sample_freq) {
> hwc->sample_period = 1;
> + hwc->sample_period_base = 1;
> + }
> hwc->last_period = hwc->sample_period;
>
> local64_set(&hwc->period_left, hwc->sample_period);
> @@ -13824,6 +13870,7 @@ inherit_event(struct perf_event *parent_event,
> struct hw_perf_event *hwc = &child_event->hw;
>
> hwc->sample_period = sample_period;
> + hwc->sample_period_base = sample_period;
> hwc->last_period = sample_period;
>
> local64_set(&hwc->period_left, sample_period);
> @@ -14279,6 +14326,8 @@ void __init perf_event_init(void)
> {
> int ret;
>
> + prandom_seed_state(&perf_rand, get_random_u64());
> +
> idr_init(&pmu_idr);
>
> perf_event_init_all_cpus();
>
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period
2025-03-11 17:22 ` Mark Barnett
@ 2025-03-12 9:39 ` Peter Zijlstra
0 siblings, 0 replies; 15+ messages in thread
From: Peter Zijlstra @ 2025-03-12 9:39 UTC (permalink / raw)
To: Mark Barnett
Cc: mingo, acme, namhyung, irogers, ben.gainey, deepak.surti, ak,
will, james.clark, mark.rutland, alexander.shishkin, jolsa,
adrian.hunter, linux-perf-users, linux-kernel, linux-arm-kernel
On Tue, Mar 11, 2025 at 05:22:16PM +0000, Mark Barnett wrote:
> > @@ -9979,6 +9985,8 @@ static int __perf_event_overflow(struct perf_event *event,
> > int throttle, struct perf_sample_data *data,
> > struct pt_regs *regs)
> > {
> > + struct hw_perf_event *hwc = &event->hw;
> > + u64 sample_period;
> > int events = atomic_read(&event->event_limit);
> > int ret = 0;
> > @@ -9989,15 +9997,50 @@ static int __perf_event_overflow(struct perf_event *event,
> > if (unlikely(!is_sampling_event(event)))
> > return 0;
> > - ret = __perf_event_account_interrupt(event, throttle);
> > + /*
> > + * High Freq samples are injected inside the larger period:
> > + *
> > + * |------------|-|------------|-|
> > + * P0 HF P1 HF
> > + *
> > + * By ignoring the HF samples, we measure the actual period.
> > + */
> > + if (!(hwc->state & PERF_HES_HF_SAMPLE))
> > + ret = __perf_event_account_interrupt(event, throttle);
>
> The high-frequency samples should still contribute to interrupt
> accounting/throttling, right? We'd just need to put guards around the
> adaptive period stuff so that HF samples don't contribute to the frequency
> training.
Yeah, I suppose it should. This means breaking up that function but that
isn't hard.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v3 4/5] tools/perf: Modify event parser to support alt-period term
2025-03-07 20:22 [PATCH v3 0/5] A mechanism for efficient support for per-function metrics mark.barnett
` (2 preceding siblings ...)
2025-03-07 20:22 ` [PATCH v3 3/5] perf: Allow adding fixed random jitter to the alternate sampling period mark.barnett
@ 2025-03-07 20:22 ` mark.barnett
2025-03-07 20:22 ` [PATCH v3 5/5] tools/perf: Modify event parser to support alt-period-jitter term mark.barnett
4 siblings, 0 replies; 15+ messages in thread
From: mark.barnett @ 2025-03-07 20:22 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, irogers
Cc: ben.gainey, deepak.surti, ak, will, james.clark, mark.rutland,
alexander.shishkin, jolsa, adrian.hunter, linux-perf-users,
linux-kernel, linux-arm-kernel, Mark Barnett
From: Ben Gainey <ben.gainey@arm.com>
parse-events is modified, adding the "alt-period" term which can be used
to specify the alternative sampling period.
Signed-off-by: Ben Gainey <ben.gainey@arm.com>
Signed-off-by: Mark Barnett <mark.barnett@arm.com>
---
tools/include/uapi/linux/perf_event.h | 3 +++
tools/perf/tests/shell/attr/base-record | 3 ++-
tools/perf/tests/shell/attr/base-record-spe | 1 +
tools/perf/tests/shell/attr/base-stat | 3 ++-
tools/perf/tests/shell/attr/system-wide-dummy | 3 ++-
.../tests/shell/attr/test-record-alt-period-term | 12 ++++++++++++
tools/perf/tests/shell/attr/test-record-dummy-C0 | 3 ++-
tools/perf/tests/shell/lib/attr.py | 1 +
tools/perf/util/evsel.c | 1 +
tools/perf/util/parse-events.c | 15 +++++++++++++++
tools/perf/util/parse-events.h | 3 ++-
tools/perf/util/parse-events.l | 1 +
tools/perf/util/perf_event_attr_fprintf.c | 1 +
tools/perf/util/pmu.c | 1 +
14 files changed, 46 insertions(+), 5 deletions(-)
create mode 100644 tools/perf/tests/shell/attr/test-record-alt-period-term
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 0524d541d4e3..499a8673df8e 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -379,6 +379,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
+#define PERF_ATTR_SIZE_VER9 144 /* add: alt_sample_period */
/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -531,6 +532,8 @@ struct perf_event_attr {
__u64 sig_data;
__u64 config3; /* extension of config2 */
+
+ __u64 alt_sample_period;
};
/*
diff --git a/tools/perf/tests/shell/attr/base-record b/tools/perf/tests/shell/attr/base-record
index b44e4e6e4443..28a7233f7bc1 100644
--- a/tools/perf/tests/shell/attr/base-record
+++ b/tools/perf/tests/shell/attr/base-record
@@ -5,7 +5,7 @@ group_fd=-1
flags=0|8
cpu=*
type=0|1
-size=136
+size=144
config=0|1
sample_period=*
sample_type=263
@@ -39,3 +39,4 @@ config2=0
branch_sample_type=0
sample_regs_user=0
sample_stack_user=0
+alt_sample_period=0
diff --git a/tools/perf/tests/shell/attr/base-record-spe b/tools/perf/tests/shell/attr/base-record-spe
index 08fa96b59240..ad8eb72e655a 100644
--- a/tools/perf/tests/shell/attr/base-record-spe
+++ b/tools/perf/tests/shell/attr/base-record-spe
@@ -38,3 +38,4 @@ config2=*
branch_sample_type=*
sample_regs_user=*
sample_stack_user=*
+alt_sample_period=0
diff --git a/tools/perf/tests/shell/attr/base-stat b/tools/perf/tests/shell/attr/base-stat
index fccd8ec4d1b0..2de92c5c300d 100644
--- a/tools/perf/tests/shell/attr/base-stat
+++ b/tools/perf/tests/shell/attr/base-stat
@@ -5,7 +5,7 @@ group_fd=-1
flags=0|8
cpu=*
type=0
-size=136
+size=144
config=0
sample_period=0
sample_type=65536
@@ -39,3 +39,4 @@ config2=0
branch_sample_type=0
sample_regs_user=0
sample_stack_user=0
+alt_sample_period=0
diff --git a/tools/perf/tests/shell/attr/system-wide-dummy b/tools/perf/tests/shell/attr/system-wide-dummy
index a1e1d6a263bf..c0a17bb3c022 100644
--- a/tools/perf/tests/shell/attr/system-wide-dummy
+++ b/tools/perf/tests/shell/attr/system-wide-dummy
@@ -7,7 +7,7 @@ cpu=*
pid=-1
flags=8
type=1
-size=136
+size=144
config=9
sample_period=1
# PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
@@ -50,3 +50,4 @@ config2=0
branch_sample_type=0
sample_regs_user=0
sample_stack_user=0
+alt_sample_period=0
diff --git a/tools/perf/tests/shell/attr/test-record-alt-period-term b/tools/perf/tests/shell/attr/test-record-alt-period-term
new file mode 100644
index 000000000000..fcdb790adcd3
--- /dev/null
+++ b/tools/perf/tests/shell/attr/test-record-alt-period-term
@@ -0,0 +1,12 @@
+[config]
+command = record
+args = --no-bpf-event -e cycles/period=3,alt-period=2/ -- kill >/dev/null 2>&1
+ret = 1
+kernel_since = 6.11
+
+[event-10:base-record]
+sample_period=3
+alt_sample_period=2
+
+freq=0
+sample_type=7
diff --git a/tools/perf/tests/shell/attr/test-record-dummy-C0 b/tools/perf/tests/shell/attr/test-record-dummy-C0
index 91499405fff4..e6315918a95e 100644
--- a/tools/perf/tests/shell/attr/test-record-dummy-C0
+++ b/tools/perf/tests/shell/attr/test-record-dummy-C0
@@ -10,7 +10,7 @@ cpu=0
pid=-1
flags=8
type=1
-size=136
+size=144
config=9
sample_period=4000
# PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
@@ -53,3 +53,4 @@ config2=0
branch_sample_type=0
sample_regs_user=0
sample_stack_user=0
+alt_sample_period=0
diff --git a/tools/perf/tests/shell/lib/attr.py b/tools/perf/tests/shell/lib/attr.py
index 3db9a7d78715..04e95f76005a 100644
--- a/tools/perf/tests/shell/lib/attr.py
+++ b/tools/perf/tests/shell/lib/attr.py
@@ -91,6 +91,7 @@ class Event(dict):
'branch_sample_type',
'sample_regs_user',
'sample_stack_user',
+ 'alt_sample_period',
]
def add(self, data):
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4a0ef095db92..0657e580ea13 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -180,6 +180,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu
WRITE_ASS(branch_sample_type, "llu");
WRITE_ASS(sample_regs_user, "llu");
WRITE_ASS(sample_stack_user, PRIu32);
+ WRITE_ASS(alt_sample_period, "llu");
fclose(file);
return 0;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 35e48fe56dfa..653d84080bc7 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -805,6 +805,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
[PARSE_EVENTS__TERM_TYPE_RAW] = "raw",
[PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
[PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
+ [PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD] = "alt-period",
};
if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
return "unknown term";
@@ -833,6 +834,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
case PARSE_EVENTS__TERM_TYPE_NAME:
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
case PARSE_EVENTS__TERM_TYPE_PERCORE:
return true;
case PARSE_EVENTS__TERM_TYPE_USER:
@@ -981,6 +983,16 @@ do { \
return -EINVAL;
}
break;
+ case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
+ CHECK_TYPE_VAL(NUM);
+ if (term->val.num == 0) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("expected a non-zero value"),
+ NULL);
+ return -EINVAL;
+ }
+ attr->alt_sample_period = term->val.num;
+ break;
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
case PARSE_EVENTS__TERM_TYPE_USER:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
@@ -1108,6 +1120,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
default:
if (err) {
parse_events_error__handle(err, term->err_term,
@@ -1242,6 +1255,7 @@ do { \
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
default:
break;
}
@@ -1296,6 +1310,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e176a34ab088..d00bb6c5d9ab 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -80,7 +80,8 @@ enum parse_events__term_type {
PARSE_EVENTS__TERM_TYPE_RAW,
PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
PARSE_EVENTS__TERM_TYPE_HARDWARE,
-#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1)
+ PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD,
+#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD + 1)
};
struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 7ed86e3e34e3..9c64434167b7 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -335,6 +335,7 @@ aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
+alt-period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD); }
cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index c7f3543b9921..e823240b7dd8 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -334,6 +334,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(aux_start_paused, p_unsigned);
PRINT_ATTRf(aux_pause, p_unsigned);
PRINT_ATTRf(aux_resume, p_unsigned);
+ PRINT_ATTRf(alt_sample_period, p_unsigned);
return ret;
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 57450c73fb63..7576fe35a869 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1804,6 +1804,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
"aux-output",
"aux-action=(pause|resume|start-paused)",
"aux-sample-size=number",
+ "alt-period=number",
};
struct perf_pmu_format *format;
int ret;
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH v3 5/5] tools/perf: Modify event parser to support alt-period-jitter term
2025-03-07 20:22 [PATCH v3 0/5] A mechanism for efficient support for per-function metrics mark.barnett
` (3 preceding siblings ...)
2025-03-07 20:22 ` [PATCH v3 4/5] tools/perf: Modify event parser to support alt-period term mark.barnett
@ 2025-03-07 20:22 ` mark.barnett
4 siblings, 0 replies; 15+ messages in thread
From: mark.barnett @ 2025-03-07 20:22 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, irogers
Cc: ben.gainey, deepak.surti, ak, will, james.clark, mark.rutland,
alexander.shishkin, jolsa, adrian.hunter, linux-perf-users,
linux-kernel, linux-arm-kernel, Mark Barnett
From: Ben Gainey <ben.gainey@arm.com>
parse-events is modified, adding the "alt-period-jitter" term which
can be used to enable random jitter of the alternative sample
period.
Signed-off-by: Ben Gainey <ben.gainey@arm.com>
Signed-off-by: Mark Barnett <mark.barnett@arm.com>
---
tools/include/uapi/linux/perf_event.h | 7 ++++++-
tools/perf/tests/shell/attr/base-record | 1 +
tools/perf/tests/shell/attr/base-record-spe | 1 +
tools/perf/tests/shell/attr/base-stat | 1 +
tools/perf/tests/shell/attr/system-wide-dummy | 1 +
.../shell/attr/test-record-alt-period-jitter-term | 13 +++++++++++++
tools/perf/tests/shell/attr/test-record-dummy-C0 | 1 +
tools/perf/tests/shell/lib/attr.py | 1 +
tools/perf/util/evsel.c | 1 +
tools/perf/util/parse-events.c | 15 +++++++++++++++
tools/perf/util/parse-events.h | 3 ++-
tools/perf/util/parse-events.l | 1 +
tools/perf/util/pmu.c | 3 ++-
13 files changed, 46 insertions(+), 3 deletions(-)
create mode 100644 tools/perf/tests/shell/attr/test-record-alt-period-jitter-term
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 499a8673df8e..c0076ce8f80a 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -461,7 +461,12 @@ struct perf_event_attr {
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
- __reserved_1 : 26;
+ /*
+ * Add a limited amount of jitter on each alternate period, where
+ * the jitter is between [0, (2<<jitter_alt_period) - 1]
+ */
+ jitter_alt_period : 3,
+ __reserved_1 : 23;
union {
__u32 wakeup_events; /* wakeup every n events */
diff --git a/tools/perf/tests/shell/attr/base-record b/tools/perf/tests/shell/attr/base-record
index 28a7233f7bc1..1f5ab125c78d 100644
--- a/tools/perf/tests/shell/attr/base-record
+++ b/tools/perf/tests/shell/attr/base-record
@@ -40,3 +40,4 @@ branch_sample_type=0
sample_regs_user=0
sample_stack_user=0
alt_sample_period=0
+jitter_alt_period=0
diff --git a/tools/perf/tests/shell/attr/base-record-spe b/tools/perf/tests/shell/attr/base-record-spe
index ad8eb72e655a..b35ca04b2ce4 100644
--- a/tools/perf/tests/shell/attr/base-record-spe
+++ b/tools/perf/tests/shell/attr/base-record-spe
@@ -39,3 +39,4 @@ branch_sample_type=*
sample_regs_user=*
sample_stack_user=*
alt_sample_period=0
+jitter_alt_period=0
diff --git a/tools/perf/tests/shell/attr/base-stat b/tools/perf/tests/shell/attr/base-stat
index 2de92c5c300d..2d90a055686a 100644
--- a/tools/perf/tests/shell/attr/base-stat
+++ b/tools/perf/tests/shell/attr/base-stat
@@ -40,3 +40,4 @@ branch_sample_type=0
sample_regs_user=0
sample_stack_user=0
alt_sample_period=0
+jitter_alt_period=0
diff --git a/tools/perf/tests/shell/attr/system-wide-dummy b/tools/perf/tests/shell/attr/system-wide-dummy
index c0a17bb3c022..527707b505e0 100644
--- a/tools/perf/tests/shell/attr/system-wide-dummy
+++ b/tools/perf/tests/shell/attr/system-wide-dummy
@@ -51,3 +51,4 @@ branch_sample_type=0
sample_regs_user=0
sample_stack_user=0
alt_sample_period=0
+jitter_alt_period=0
diff --git a/tools/perf/tests/shell/attr/test-record-alt-period-jitter-term b/tools/perf/tests/shell/attr/test-record-alt-period-jitter-term
new file mode 100644
index 000000000000..6b31c898c905
--- /dev/null
+++ b/tools/perf/tests/shell/attr/test-record-alt-period-jitter-term
@@ -0,0 +1,13 @@
+[config]
+command = record
+args = --no-bpf-event -e cycles/period=3,alt-period=2,alt-period-jitter=7/ -- kill >/dev/null 2>&1
+ret = 1
+kernel_since = 6.11
+
+[event-10:base-record]
+sample_period=3
+alt_sample_period=2
+jitter_alt_period=7
+
+freq=0
+sample_type=7
diff --git a/tools/perf/tests/shell/attr/test-record-dummy-C0 b/tools/perf/tests/shell/attr/test-record-dummy-C0
index e6315918a95e..436534df0434 100644
--- a/tools/perf/tests/shell/attr/test-record-dummy-C0
+++ b/tools/perf/tests/shell/attr/test-record-dummy-C0
@@ -54,3 +54,4 @@ branch_sample_type=0
sample_regs_user=0
sample_stack_user=0
alt_sample_period=0
+jitter_alt_period=0
diff --git a/tools/perf/tests/shell/lib/attr.py b/tools/perf/tests/shell/lib/attr.py
index 04e95f76005a..d15363e925fe 100644
--- a/tools/perf/tests/shell/lib/attr.py
+++ b/tools/perf/tests/shell/lib/attr.py
@@ -92,6 +92,7 @@ class Event(dict):
'sample_regs_user',
'sample_stack_user',
'alt_sample_period',
+ 'jitter_alt_period',
]
def add(self, data):
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0657e580ea13..2bb3e346f2b9 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -181,6 +181,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu
WRITE_ASS(sample_regs_user, "llu");
WRITE_ASS(sample_stack_user, PRIu32);
WRITE_ASS(alt_sample_period, "llu");
+ WRITE_ASS(jitter_alt_period, "d");
fclose(file);
return 0;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 653d84080bc7..2e0ecfb77af3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -806,6 +806,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
[PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
[PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
[PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD] = "alt-period",
+ [PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER] = "alt-period-jitter",
};
if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
return "unknown term";
@@ -835,6 +836,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
+ case PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER:
case PARSE_EVENTS__TERM_TYPE_PERCORE:
return true;
case PARSE_EVENTS__TERM_TYPE_USER:
@@ -993,6 +995,16 @@ do { \
}
attr->alt_sample_period = term->val.num;
break;
+ case PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER:
+ CHECK_TYPE_VAL(NUM);
+ if ((unsigned int)term->val.num > 7) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("expected a value between 0-7"),
+ NULL);
+ return -EINVAL;
+ }
+ attr->jitter_alt_period = (unsigned int)term->val.num;
+ break;
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
case PARSE_EVENTS__TERM_TYPE_USER:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
@@ -1121,6 +1133,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
+ case PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER:
default:
if (err) {
parse_events_error__handle(err, term->err_term,
@@ -1256,6 +1269,7 @@ do { \
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
+ case PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER:
default:
break;
}
@@ -1311,6 +1325,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD:
+ case PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER:
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d00bb6c5d9ab..dafd4b4d0f0e 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -81,7 +81,8 @@ enum parse_events__term_type {
PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
PARSE_EVENTS__TERM_TYPE_HARDWARE,
PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD,
-#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD + 1)
+ PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER,
+#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER + 1)
};
struct parse_events_term {
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 9c64434167b7..91d312a23769 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -336,6 +336,7 @@ aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
alt-period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_ALT_SAMPLE_PERIOD); }
+alt-period-jitter { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER); }
cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7576fe35a869..c665dc35ad6b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1429,7 +1429,7 @@ static int pmu_config_term(const struct perf_pmu *pmu,
break;
case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */
return -EINVAL;
- case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_ALT_PERIOD_JITTER:
/* Skip non-config terms. */
break;
default:
@@ -1805,6 +1805,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
"aux-action=(pause|resume|start-paused)",
"aux-sample-size=number",
"alt-period=number",
+ "alt-period-jitter=number",
};
struct perf_pmu_format *format;
int ret;
--
2.43.0
^ permalink raw reply related [flat|nested] 15+ messages in thread