BPF List
 help / color / mirror / Atom feed
* [PATCH RESEND] perf/lock: Fix non-atomic max/time and min_time updates in contention_data
@ 2026-05-04 13:51 Suchit Karunakaran
  2026-05-04 17:23 ` sashiko-bot
  0 siblings, 1 reply; 4+ messages in thread
From: Suchit Karunakaran @ 2026-05-04 13:51 UTC (permalink / raw)
  To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
	jolsa, irogers, adrian.hunter, james.clark
  Cc: linux-perf-users, linux-kernel, bpf, Suchit Karunakaran

The update_contention_data() had a FIXME noting that max_time and
min_time updates lacked atomicity. Two CPUs could simultaneously
read a stale value, pass the comparison check and race on the
write-back, with the smaller value potentially overwriting the
larger one and silently corrupting the statistics.

Fix this by replacing the bare conditional assignments with a
bpf_loop()-based CAS retry loop. Each field tracks its own
convergence independently via max_done/min_done flags in cas_ctx,
so a successful CAS on one field is never retried even if the
other field needs more attempts.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Suchit Karunakaran <suchitkarunakaran@gmail.com>
---
 .../perf/util/bpf_skel/lock_contention.bpf.c  | 50 +++++++++++++++++--
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 96e7d853b9ed..5c8431be674a 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -175,6 +175,13 @@ struct mm_struct___new {
 	struct rw_semaphore mmap_lock;
 } __attribute__((preserve_access_index));
 
+struct cas_ctx {
+	struct contention_data *data;
+	u64 duration;
+	int max_done;
+	int min_done;
+};
+
 extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak;
 
 /* control flags */
@@ -486,16 +493,49 @@ static inline s32 get_owner_stack_id(u64 *stacktrace)
 	return -1;
 }
 
+static long cas_min_max_cb(u64 idx, void *arg)
+{
+	struct cas_ctx *ctx = arg;
+
+	if (!ctx->max_done) {
+		u64 old_max = ctx->data->max_time;
+		if (old_max >= ctx->duration) {
+			ctx->max_done = 1;
+		} else {
+			u64 r = __sync_val_compare_and_swap(
+				&ctx->data->max_time, old_max, ctx->duration);
+			if (r == old_max)
+				ctx->max_done = 1;
+		}
+	}
+
+	if (!ctx->min_done) {
+		u64 old_min = ctx->data->min_time;
+		if (old_min <= ctx->duration) {
+			ctx->min_done = 1;
+		} else {
+			u64 r = __sync_val_compare_and_swap(
+				&ctx->data->min_time, old_min, ctx->duration);
+			if (r == old_min)
+				ctx->min_done = 1;
+		}
+	}
+
+	return (ctx->max_done && ctx->min_done) ? 1 : 0;
+}
+
 static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count)
 {
 	__sync_fetch_and_add(&data->total_time, duration);
 	__sync_fetch_and_add(&data->count, count);
 
-	/* FIXME: need atomic operations */
-	if (data->max_time < duration)
-		data->max_time = duration;
-	if (data->min_time > duration)
-		data->min_time = duration;
+	struct cas_ctx ctx = {
+		.data     = data,
+		.duration = duration,
+		.max_done = 0,
+		.min_done = 0,
+	};
+	bpf_loop(64, cas_min_max_cb, &ctx, 0);
 }
 
 static inline void update_owner_stat(u32 id, u64 duration, u32 flags)
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH RESEND] perf/lock: Fix non-atomic max/time and min_time updates in contention_data
  2026-05-04 13:51 [PATCH RESEND] perf/lock: Fix non-atomic max/time and min_time updates in contention_data Suchit Karunakaran
@ 2026-05-04 17:23 ` sashiko-bot
  2026-05-05 22:35   ` Namhyung Kim
  0 siblings, 1 reply; 4+ messages in thread
From: sashiko-bot @ 2026-05-04 17:23 UTC (permalink / raw)
  To: Suchit Karunakaran; +Cc: bpf, linux-perf-users

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Low] The use of `__sync_val_compare_and_swap` introduces a compiler dependency on BPF ISA v3, breaking the `perf` tool build on older toolchains.
--

commit 30820839a2cfcfa987fbc2e0f79fa0e2f2cf0dc3
Author: Suchit Karunakaran <suchitkarunakaran@gmail.com>

perf/lock: Fix non-atomic max/time and min_time updates in contention_data

This commit replaces bare conditional assignments with a bpf_loop()-based CAS
retry loop to fix non-atomic updates of max_time and min_time.

> diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> index 96e7d853b9edf..5c8431be674ac 100644
> --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
[ ... ]
> @@ -486,16 +493,49 @@ static inline s32 get_owner_stack_id(u64 *stacktrace)
>  	return -1;
>  }
>  
> +static long cas_min_max_cb(u64 idx, void *arg)
> +{
> +	struct cas_ctx *ctx = arg;
> +
> +	if (!ctx->max_done) {
> +		u64 old_max = ctx->data->max_time;
> +		if (old_max >= ctx->duration) {
> +			ctx->max_done = 1;
> +		} else {
> +			u64 r = __sync_val_compare_and_swap(
> +				&ctx->data->max_time, old_max, ctx->duration);

Does this cause build failures on older toolchains?

The built-in __sync_val_compare_and_swap lowers to the BPF_CMPXCHG
instruction, which requires BPF CPU v3 (-mcpu=v3).

The perf build system compiles BPF skeletons using the host compiler's default
target features, without explicitly passing an -mcpu flag. On toolchains
where the default BPF target is v1 or v2 (such as Clang < 18), the compiler
may fail to generate the instruction and throw a fatal error (e.g.,
'hardware compare and swap not supported' or an unresolved libcall to
__sync_val_compare_and_swap_8).

> +			if (r == old_max)
> +				ctx->max_done = 1;
> +		}
> +	}
[ ... ]

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260504135138.17050-1-suchitkarunakaran@gmail.com?part=1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RESEND] perf/lock: Fix non-atomic max/time and min_time updates in contention_data
  2026-05-04 17:23 ` sashiko-bot
@ 2026-05-05 22:35   ` Namhyung Kim
  2026-05-06  7:04     ` Suchit Karunakaran
  0 siblings, 1 reply; 4+ messages in thread
From: Namhyung Kim @ 2026-05-05 22:35 UTC (permalink / raw)
  To: sashiko; +Cc: Suchit Karunakaran, bpf, linux-perf-users

Hello,

On Mon, May 04, 2026 at 05:23:37PM +0000, sashiko-bot@kernel.org wrote:
> Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
> - [Low] The use of `__sync_val_compare_and_swap` introduces a compiler dependency on BPF ISA v3, breaking the `perf` tool build on older toolchains.
> --
> 
> commit 30820839a2cfcfa987fbc2e0f79fa0e2f2cf0dc3
> Author: Suchit Karunakaran <suchitkarunakaran@gmail.com>
> 
> perf/lock: Fix non-atomic max/time and min_time updates in contention_data
> 
> This commit replaces bare conditional assignments with a bpf_loop()-based CAS
> retry loop to fix non-atomic updates of max_time and min_time.
> 
> > diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> > index 96e7d853b9edf..5c8431be674ac 100644
> > --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> > +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> [ ... ]
> > @@ -486,16 +493,49 @@ static inline s32 get_owner_stack_id(u64 *stacktrace)
> >  	return -1;
> >  }
> >  
> > +static long cas_min_max_cb(u64 idx, void *arg)
> > +{
> > +	struct cas_ctx *ctx = arg;
> > +
> > +	if (!ctx->max_done) {
> > +		u64 old_max = ctx->data->max_time;
> > +		if (old_max >= ctx->duration) {
> > +			ctx->max_done = 1;
> > +		} else {
> > +			u64 r = __sync_val_compare_and_swap(
> > +				&ctx->data->max_time, old_max, ctx->duration);
> 
> Does this cause build failures on older toolchains?
> 
> The built-in __sync_val_compare_and_swap lowers to the BPF_CMPXCHG
> instruction, which requires BPF CPU v3 (-mcpu=v3).

Hmm.. this looks like a real concern.  We could add -mcpu=v3 to the
compiler option, but then there would be compatibility issues with old
kernels.  It seems it's added in v5.1 kernel and the oldest longterm
support kernel version is 5.10.  So I think it's fine to add it.

Could you please update the build flag as well?  It can be a separate
commit.

Thanks,
Namhyung

> 
> The perf build system compiles BPF skeletons using the host compiler's default
> target features, without explicitly passing an -mcpu flag. On toolchains
> where the default BPF target is v1 or v2 (such as Clang < 18), the compiler
> may fail to generate the instruction and throw a fatal error (e.g.,
> 'hardware compare and swap not supported' or an unresolved libcall to
> __sync_val_compare_and_swap_8).
> 
> > +			if (r == old_max)
> > +				ctx->max_done = 1;
> > +		}
> > +	}
> [ ... ]
> 
> -- 
> Sashiko AI review · https://sashiko.dev/#/patchset/20260504135138.17050-1-suchitkarunakaran@gmail.com?part=1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RESEND] perf/lock: Fix non-atomic max/time and min_time updates in contention_data
  2026-05-05 22:35   ` Namhyung Kim
@ 2026-05-06  7:04     ` Suchit Karunakaran
  0 siblings, 0 replies; 4+ messages in thread
From: Suchit Karunakaran @ 2026-05-06  7:04 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: sashiko, bpf, linux-perf-users

Hi Namhyung,

On Wed, 6 May 2026 at 04:05, Namhyung Kim <namhyung@kernel.org> wrote:
>
> Hello,
>
> On Mon, May 04, 2026 at 05:23:37PM +0000, sashiko-bot@kernel.org wrote:
> > Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
> > - [Low] The use of `__sync_val_compare_and_swap` introduces a compiler dependency on BPF ISA v3, breaking the `perf` tool build on older toolchains.
> > --
> >
> > commit 30820839a2cfcfa987fbc2e0f79fa0e2f2cf0dc3
> > Author: Suchit Karunakaran <suchitkarunakaran@gmail.com>
> >
> > perf/lock: Fix non-atomic max/time and min_time updates in contention_data
> >
> > This commit replaces bare conditional assignments with a bpf_loop()-based CAS
> > retry loop to fix non-atomic updates of max_time and min_time.
> >
> > > diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> > > index 96e7d853b9edf..5c8431be674ac 100644
> > > --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> > > +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> > [ ... ]
> > > @@ -486,16 +493,49 @@ static inline s32 get_owner_stack_id(u64 *stacktrace)
> > >     return -1;
> > >  }
> > >
> > > +static long cas_min_max_cb(u64 idx, void *arg)
> > > +{
> > > +   struct cas_ctx *ctx = arg;
> > > +
> > > +   if (!ctx->max_done) {
> > > +           u64 old_max = ctx->data->max_time;
> > > +           if (old_max >= ctx->duration) {
> > > +                   ctx->max_done = 1;
> > > +           } else {
> > > +                   u64 r = __sync_val_compare_and_swap(
> > > +                           &ctx->data->max_time, old_max, ctx->duration);
> >
> > Does this cause build failures on older toolchains?
> >
> > The built-in __sync_val_compare_and_swap lowers to the BPF_CMPXCHG
> > instruction, which requires BPF CPU v3 (-mcpu=v3).
>
> Hmm.. this looks like a real concern.  We could add -mcpu=v3 to the
> compiler option, but then there would be compatibility issues with old
> kernels.  It seems it's added in v5.1 kernel and the oldest longterm
> support kernel version is 5.10.  So I think it's fine to add it.
>
> Could you please update the build flag as well?  It can be a separate
> commit.
>
> Thanks,
> Namhyung
>
> >

Sure, I'll send a patch for that as well.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-05-06  7:04 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-04 13:51 [PATCH RESEND] perf/lock: Fix non-atomic max/time and min_time updates in contention_data Suchit Karunakaran
2026-05-04 17:23 ` sashiko-bot
2026-05-05 22:35   ` Namhyung Kim
2026-05-06  7:04     ` Suchit Karunakaran

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox