* [RFC PATCH 0/2] tracing/osnoise: Track IPIs @ 2026-06-10 13:04 Valentin Schneider 2026-06-10 13:04 ` [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts Valentin Schneider 2026-06-10 13:04 ` [RFC PATCH 2/2] rtla/osnoise: Report IPI count in osnoise top Valentin Schneider 0 siblings, 2 replies; 10+ messages in thread From: Valentin Schneider @ 2026-06-10 13:04 UTC (permalink / raw) To: linux-kernel, linux-trace-kernel Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Tomas Glozar, Costa Shulyupin, Crystal Wood, Ivan Pravdin Hi folks, So I've seen a few times now reports of latency spikes caused by IPIs, usually because of isolation misconfiguration, but only detected at the tail of end e.g. a 24h timerlat run. It's not because those IPIs are rare, but rather that they don't by themselves cause a monitered CPU to reach the latency threshold, it's usually a combined interference that gets us there. I'd like to make it easier to detect such misconfigurations and thus IPIs hitting supposedly-isolated CPUs. I initially kludged a timerlat option to stop tracing as soon as an IPI was sent to a monitored CPU, regardless of the latency threshold. It sort of did the trick, but Tomáš convinced me timerlat wasn't really the place for that. So here's IPI tracking added to osnoise. Two things worth pointing out: o This only adds IPI count tracking, nothing about noise duration - this is already tracked as part of the IRQ noise. o This modifies the osnoise Ftrace entry, I have no idea how acceptable this is, although the only real consumer of these should be rtla... Tested with: $ rtla osnoise top -d 5s $ trace-cmd record -p osnoise hackbench -l 10000 Cheers, Valentin Valentin Schneider (2): tracing/osnoise: Sample IPI counts rtla/osnoise: Report IPI count in osnoise top include/trace/events/osnoise.h | 1 + kernel/trace/trace_entries.h | 6 ++- kernel/trace/trace_osnoise.c | 80 ++++++++++++++++++++++++++-- tools/tracing/rtla/src/osnoise_top.c | 9 +++- 4 files changed, 88 insertions(+), 8 deletions(-) -- 2.54.0 ^ permalink raw reply [flat|nested] 10+ messages in thread
* [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts 2026-06-10 13:04 [RFC PATCH 0/2] tracing/osnoise: Track IPIs Valentin Schneider @ 2026-06-10 13:04 ` Valentin Schneider 2026-06-10 19:51 ` Crystal Wood 2026-06-10 13:04 ` [RFC PATCH 2/2] rtla/osnoise: Report IPI count in osnoise top Valentin Schneider 1 sibling, 1 reply; 10+ messages in thread From: Valentin Schneider @ 2026-06-10 13:04 UTC (permalink / raw) To: linux-kernel, linux-trace-kernel Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Tomas Glozar, Costa Shulyupin, Crystal Wood, Ivan Pravdin Osnoise already implictly accounts IPIs via its IRQ tracking, however it can be interesting to distiguish between the two: undesired IPIs usually imply a software configuration issue (e.g. wrong/incomplete CPU isolation) whereas undesired (non-IPI) IRQs usually imply a hardware configuration issue. Signed-off-by: Valentin Schneider <vschneid@redhat.com> --- Note that this is modifying the osnoise:osnoise_entry Ftrace entry; I know trace events are sort of supposed to be stable, but I'm not sure about ftrace entries. Alternatively I can have this be purely supported in userspace osnoise by hooking into the IPI events and counting IPIs separately from the osnoise events. --- include/trace/events/osnoise.h | 1 + kernel/trace/trace_entries.h | 6 ++- kernel/trace/trace_osnoise.c | 80 ++++++++++++++++++++++++++++++++-- 3 files changed, 81 insertions(+), 6 deletions(-) diff --git a/include/trace/events/osnoise.h b/include/trace/events/osnoise.h index 3f42736238014..58442e58fe652 100644 --- a/include/trace/events/osnoise.h +++ b/include/trace/events/osnoise.h @@ -19,6 +19,7 @@ struct osnoise_sample { int irq_count; /* # IRQs during this sample */ int softirq_count; /* # softirqs during this sample */ int thread_count; /* # threads during this sample */ + int ipi_count; /* # IPIs during this sample */ }; #ifdef CONFIG_TIMERLAT_TRACER diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 54417468fdeb1..aed778d859d37 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -430,16 +430,18 @@ FTRACE_ENTRY(osnoise, osnoise_entry, __field( unsigned int, irq_count ) __field( unsigned int, softirq_count ) __field( unsigned int, thread_count ) + __field( unsigned int, ipi_count ) ), - F_printk("noise:%llu\tmax_sample:%llu\thw:%u\tnmi:%u\tirq:%u\tsoftirq:%u\tthread:%u\n", + F_printk("noise:%llu\tmax_sample:%llu\thw:%u\tnmi:%u\tirq:%u\tsoftirq:%u\tthread:%u\tipi:%u\n", __entry->noise, __entry->max_sample, __entry->hw_count, __entry->nmi_count, __entry->irq_count, __entry->softirq_count, - __entry->thread_count) + __entry->thread_count, + __entry->ipi_count) ); FTRACE_ENTRY(timerlat, timerlat_entry, diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 75678053b21c5..574629a6b22b3 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -35,6 +35,7 @@ #include <trace/events/irq.h> #include <trace/events/sched.h> +#include <trace/events/ipi.h> #define CREATE_TRACE_POINTS #include <trace/events/osnoise.h> @@ -83,6 +84,10 @@ struct osnoise_instance { static struct list_head osnoise_instances; +static struct cpumask osnoise_cpumask; +static struct cpumask save_cpumask; +static struct cpumask kthread_cpumask; + static bool osnoise_has_registered_instances(void) { return !!list_first_or_null_rcu(&osnoise_instances, @@ -203,6 +208,11 @@ struct osn_thread { u64 delta_start; }; +/* IPI runtime info */ +struct osn_ipi { + u64 count; +}; + /* * Runtime information: this structure saves the runtime information used by * one sampling thread. @@ -215,6 +225,7 @@ struct osnoise_variables { struct osn_irq irq; struct osn_softirq softirq; struct osn_thread thread; + struct osn_ipi ipi; local_t int_counter; }; @@ -505,6 +516,7 @@ __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buff entry->irq_count = sample->irq_count; entry->softirq_count = sample->softirq_count; entry->thread_count = sample->thread_count; + entry->ipi_count = sample->ipi_count; trace_buffer_unlock_commit_nostack(buffer, event); } @@ -1288,6 +1300,7 @@ trace_sched_switch_callback(void *data, bool preempt, * Hook the osnoise tracer callbacks to handle the noise from other * threads on the necessary kernel events. */ + static int hook_thread_events(void) { int ret; @@ -1319,6 +1332,60 @@ static void unhook_thread_events(void) unregister_migration_monitor(); } +static void ipi_emission(struct osnoise_variables *osn_var, unsigned int dst_cpu) +{ + if (!osn_var->sampling) + return; + + osn_var->ipi.count++; +} + +static void trace_ipi_send_cpu_callback(void *data, unsigned int cpu, + unsigned long callsite, void *callback) +{ + struct osnoise_variables *osn_var; + + osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); + ipi_emission(osn_var, cpu); +} + +static void trace_ipi_send_cpumask_callback(void *data, const struct cpumask *cpumask, + unsigned long callsite, void *callback) +{ + struct osnoise_variables *osn_var; + int cpu; + + for_each_cpu_and(cpu, cpumask, &osnoise_cpumask) { + osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); + ipi_emission(osn_var, cpu); + } +} + +static int hook_ipi_events(void) +{ + int ret; + + ret = register_trace_ipi_send_cpu(trace_ipi_send_cpu_callback, NULL); + if (ret) + return -EINVAL; + + ret = register_trace_ipi_send_cpumask(trace_ipi_send_cpumask_callback, NULL); + if (ret) + goto out_unreg; + + return 0; + +out_unreg: + unregister_trace_ipi_send_cpu(trace_ipi_send_cpu_callback, NULL); + return -EINVAL; +} + +static void unhook_ipi_events(void) +{ + unregister_trace_ipi_send_cpu(trace_ipi_send_cpu_callback, NULL); + unregister_trace_ipi_send_cpumask(trace_ipi_send_cpumask_callback, NULL); +} + /* * save_osn_sample_stats - Save the osnoise_sample statistics * @@ -1333,6 +1400,7 @@ save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample * s->irq_count = osn_var->irq.count; s->softirq_count = osn_var->softirq.count; s->thread_count = osn_var->thread.count; + s->ipi_count = osn_var->ipi.count; } /* @@ -1349,6 +1417,7 @@ diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample * s->irq_count = osn_var->irq.count - s->irq_count; s->softirq_count = osn_var->softirq.count - s->softirq_count; s->thread_count = osn_var->thread.count - s->thread_count; + s->ipi_count = osn_var->ipi.count - s->ipi_count; } /* @@ -1613,10 +1682,6 @@ static int run_osnoise(void) return ret; } -static struct cpumask osnoise_cpumask; -static struct cpumask save_cpumask; -static struct cpumask kthread_cpumask; - /* * osnoise_sleep - sleep until the next period */ @@ -2892,12 +2957,18 @@ static int osnoise_hook_events(void) goto out_unhook_irq; retval = hook_thread_events(); + if (retval) + goto out_unhook_softirq; + + retval = hook_ipi_events(); /* * All fine! */ if (!retval) return 0; + unhook_thread_events(); +out_unhook_softirq: unhook_softirq_events(); out_unhook_irq: unhook_irq_events(); @@ -2906,6 +2977,7 @@ static int osnoise_hook_events(void) static void osnoise_unhook_events(void) { + unhook_ipi_events(); unhook_thread_events(); unhook_softirq_events(); unhook_irq_events(); -- 2.54.0 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts 2026-06-10 13:04 ` [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts Valentin Schneider @ 2026-06-10 19:51 ` Crystal Wood 2026-06-11 8:59 ` Tomas Glozar 2026-06-11 10:21 ` Valentin Schneider 0 siblings, 2 replies; 10+ messages in thread From: Crystal Wood @ 2026-06-10 19:51 UTC (permalink / raw) To: Valentin Schneider, linux-kernel, linux-trace-kernel Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Tomas Glozar, Costa Shulyupin, Ivan Pravdin On Wed, 2026-06-10 at 15:04 +0200, Valentin Schneider wrote: > Osnoise already implictly accounts IPIs via its IRQ tracking, Does it? It seems that IPIs bypass the kernel/irq subsystem on some arches (including x86, but not ARM). It would be nice to solve this properly by adding generic ipi entry/exit tracing (similar to what ARM already has). > however it > can be interesting to distiguish between the two: undesired IPIs usually > imply a software configuration issue (e.g. wrong/incomplete CPU isolation) > whereas undesired (non-IPI) IRQs usually imply a hardware configuration > issue. > > Signed-off-by: Valentin Schneider <vschneid@redhat.com> > --- > Note that this is modifying the osnoise:osnoise_entry Ftrace entry; I know > trace events are sort of supposed to be stable, but I'm not sure about > ftrace entries. I think old rtla will be OK with this since it looks up fields by name rather than assuming a fixed layout. > Alternatively I can have this be purely supported in userspace osnoise by > hooking into the IPI events and counting IPIs separately from the osnoise > events. One benefit I could see of doing this in kernel osnoise would be if you could atomically correlate the count with the particular noise interval, but this patch doesn't do that. > +static void ipi_emission(struct osnoise_variables *osn_var, unsigned int dst_cpu) > +{ > + if (!osn_var->sampling) > + return; > + > + osn_var->ipi.count++; > +} > + > +static void trace_ipi_send_cpu_callback(void *data, unsigned int cpu, > + unsigned long callsite, void *callback) > +{ > + struct osnoise_variables *osn_var; > + > + osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); > + ipi_emission(osn_var, cpu); > +} > + > +static void trace_ipi_send_cpumask_callback(void *data, const struct cpumask *cpumask, > + unsigned long callsite, void *callback) > +{ > + struct osnoise_variables *osn_var; > + int cpu; > + > + for_each_cpu_and(cpu, cpumask, &osnoise_cpumask) { > + osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); > + ipi_emission(osn_var, cpu); > + } > +} Isn't this racy to do from a different CPU? Both in terms of the counter, and the timing of the increment relative to when the IPI is actually received. Not necessarily a huge deal if you only care about zero versus bignum, but still. At least worth a comment, if we go with this approach. -Crystal ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts 2026-06-10 19:51 ` Crystal Wood @ 2026-06-11 8:59 ` Tomas Glozar 2026-06-11 10:30 ` Valentin Schneider 2026-06-11 10:21 ` Valentin Schneider 1 sibling, 1 reply; 10+ messages in thread From: Tomas Glozar @ 2026-06-11 8:59 UTC (permalink / raw) To: Crystal Wood Cc: Valentin Schneider, linux-kernel, linux-trace-kernel, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Costa Shulyupin, Ivan Pravdin [just replying to comments, will do a full review later] st 10. 6. 2026 v 21:51 odesílatel Crystal Wood <crwood@redhat.com> napsal: > > On Wed, 2026-06-10 at 15:04 +0200, Valentin Schneider wrote: > > Osnoise already implictly accounts IPIs via its IRQ tracking, > > Does it? It seems that IPIs bypass the kernel/irq subsystem on some > arches (including x86, but not ARM). > > It would be nice to solve this properly by adding generic ipi > entry/exit tracing (similar to what ARM already has). > Isn't that precisely what the ipi tracepoints used by this implementation (ipi:ipi_send_cpu) are for? > > however it > > can be interesting to distiguish between the two: undesired IPIs usually > > imply a software configuration issue (e.g. wrong/incomplete CPU isolation) > > whereas undesired (non-IPI) IRQs usually imply a hardware configuration > > issue. > > > > Signed-off-by: Valentin Schneider <vschneid@redhat.com> > > --- > > Note that this is modifying the osnoise:osnoise_entry Ftrace entry; I know > > trace events are sort of supposed to be stable, but I'm not sure about > > ftrace entries. > > I think old rtla will be OK with this since it looks up fields by name > rather than assuming a fixed layout. > Yeah, the fields are either looked up with tep_get_field_val() [2], or with name-based BPF CO-RE relocations against the tracepoint structure [3]. So this shouldn't be an issue, as long as the old counts stay the same. [2] https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/tools/tracing/rtla/src/timerlat_hist.c#n191 [3] https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/tools/tracing/rtla/src/timerlat.bpf.c#n12 > > Alternatively I can have this be purely supported in userspace osnoise by > > hooking into the IPI events and counting IPIs separately from the osnoise > > events. > > One benefit I could see of doing this in kernel osnoise would be if you > could atomically correlate the count with the particular noise > interval, but this patch doesn't do that. > The count is already reported by cycle on the kernel side in the patchset, right? It's only missing in the current RTLA (userspace) part, as there is no statistic using the information. But it can still be collected through custom histogram triggers. > > ... > > > > +static void trace_ipi_send_cpumask_callback(void *data, const struct cpumask *cpumask, > > + unsigned long callsite, void *callback) > > +{ > > + struct osnoise_variables *osn_var; > > + int cpu; > > + > > + for_each_cpu_and(cpu, cpumask, &osnoise_cpumask) { > > + osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); > > + ipi_emission(osn_var, cpu); > > + } > > +} > > Isn't this racy to do from a different CPU? Both in terms of the > counter, and the timing of the increment relative to when the IPI is > actually received. Not necessarily a huge deal if you only care about > zero versus bignum, but still. At least worth a comment, if we go with > this approach. > I also think it's a bit confusing, especially as the other accesses to osn_var are cpu-local, but here, "cpu" is the *target* CPU, not the current CPU. Not sure how expensive it would be to do atomic_add for that, at least it's something to consider. Tomas ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts 2026-06-11 8:59 ` Tomas Glozar @ 2026-06-11 10:30 ` Valentin Schneider 2026-06-11 11:55 ` Tomas Glozar 2026-06-11 20:49 ` Crystal Wood 0 siblings, 2 replies; 10+ messages in thread From: Valentin Schneider @ 2026-06-11 10:30 UTC (permalink / raw) To: Tomas Glozar, Crystal Wood Cc: linux-kernel, linux-trace-kernel, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Costa Shulyupin, Ivan Pravdin On 11/06/26 10:59, Tomas Glozar wrote: > [just replying to comments, will do a full review later] > > st 10. 6. 2026 v 21:51 odesílatel Crystal Wood <crwood@redhat.com> napsal: >> >> On Wed, 2026-06-10 at 15:04 +0200, Valentin Schneider wrote: >> > Osnoise already implictly accounts IPIs via its IRQ tracking, >> >> Does it? It seems that IPIs bypass the kernel/irq subsystem on some >> arches (including x86, but not ARM). >> >> It would be nice to solve this properly by adding generic ipi >> entry/exit tracing (similar to what ARM already has). >> > > Isn't that precisely what the ipi tracepoints used by this > implementation (ipi:ipi_send_cpu) are for? > Well, these catch the emission of the IPI, which is great for investigation - slap a stacktrace trigger and you (most of the time) get the source of your interference. However Crystal's point is that on x86 (and I assume other archs) receiving & handling these IPIs is "special" and doesn't go through the generic irq subsystem and thus has to be tracked separately, which is why osnoise has this fairly lengthy osnoise_arch_register() thing. >> > however it >> > can be interesting to distiguish between the two: undesired IPIs usually >> > imply a software configuration issue (e.g. wrong/incomplete CPU isolation) >> > whereas undesired (non-IPI) IRQs usually imply a hardware configuration >> > issue. >> > >> > Signed-off-by: Valentin Schneider <vschneid@redhat.com> >> > --- >> > Note that this is modifying the osnoise:osnoise_entry Ftrace entry; I know >> > trace events are sort of supposed to be stable, but I'm not sure about >> > ftrace entries. >> >> I think old rtla will be OK with this since it looks up fields by name >> rather than assuming a fixed layout. >> > > Yeah, the fields are either looked up with tep_get_field_val() [2], or > with name-based BPF CO-RE relocations against the tracepoint structure > [3]. So this shouldn't be an issue, as long as the old counts stay the > same. > > [2] https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/tools/tracing/rtla/src/timerlat_hist.c#n191 > [3] https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/tools/tracing/rtla/src/timerlat.bpf.c#n12 > >> > Alternatively I can have this be purely supported in userspace osnoise by >> > hooking into the IPI events and counting IPIs separately from the osnoise >> > events. >> >> One benefit I could see of doing this in kernel osnoise would be if you >> could atomically correlate the count with the particular noise >> interval, but this patch doesn't do that. >> > > The count is already reported by cycle on the kernel side in the > patchset, right? It's only missing in the current RTLA (userspace) > part, as there is no statistic using the information. But it can still > be collected through custom histogram triggers. > >> > ... >> > >> > +static void trace_ipi_send_cpumask_callback(void *data, const struct cpumask *cpumask, >> > + unsigned long callsite, void *callback) >> > +{ >> > + struct osnoise_variables *osn_var; >> > + int cpu; >> > + >> > + for_each_cpu_and(cpu, cpumask, &osnoise_cpumask) { >> > + osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); >> > + ipi_emission(osn_var, cpu); >> > + } >> > +} >> >> Isn't this racy to do from a different CPU? Both in terms of the >> counter, and the timing of the increment relative to when the IPI is >> actually received. Not necessarily a huge deal if you only care about >> zero versus bignum, but still. At least worth a comment, if we go with >> this approach. >> > > I also think it's a bit confusing, especially as the other accesses to > osn_var are cpu-local, but here, "cpu" is the *target* CPU, not the > current CPU. Not sure how expensive it would be to do atomic_add for > that, at least it's something to consider. > I suppose that could be an argument for doing that stat aggregation in userspace osnoise - event handlers are run after the fact via tracefs_iterate_raw_events(), it's all inherently slower since it's just increments of one (one per handled event) but it's also all done in userspace on a control thread and doesn't bog down the kernelspace. > Tomas ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts 2026-06-11 10:30 ` Valentin Schneider @ 2026-06-11 11:55 ` Tomas Glozar 2026-06-12 8:53 ` Valentin Schneider 2026-06-11 20:49 ` Crystal Wood 1 sibling, 1 reply; 10+ messages in thread From: Tomas Glozar @ 2026-06-11 11:55 UTC (permalink / raw) To: Valentin Schneider Cc: Crystal Wood, linux-kernel, linux-trace-kernel, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Costa Shulyupin, Ivan Pravdin čt 11. 6. 2026 v 12:31 odesílatel Valentin Schneider <vschneid@redhat.com> napsal: > > > > Isn't that precisely what the ipi tracepoints used by this > > implementation (ipi:ipi_send_cpu) are for? > > > > Well, these catch the emission of the IPI, which is great for investigation > - slap a stacktrace trigger and you (most of the time) get the source of > your interference. > > However Crystal's point is that on x86 (and I assume other archs) receiving > & handling these IPIs is "special" and doesn't go through the generic irq > subsystem and thus has to be tracked separately, which is why osnoise has > this fairly lengthy osnoise_arch_register() thing. > Ah, right. This is not IPI specific, though, IIUC - Intel also has other IRQs that have to be traced using Intel-specific trace points, like irq_vectors:local_timer, which is also handled in osnoise_arch_register(). On ARM from what I recall, most (all?) IRQs are traced with irq:* tracepoints. So there are two parts to this: - Detecting interference from IPIs firing as osnoise:irq_noise (to be analyzed by timerlat auto analysis, and also will appear by default in trace output if enabled, regardless of the tool, as all osnoise:* tracepoints are enabled there). This is done locally using the already existing path (no race hazard), but requires arch-specific detection. - Counting IPIs when they are being sent. This is the new feature, and the count is being recorded in osnoise_sample. I guess that means that if there were a generic IPI interface, it would be easier to use that for IPI counting, as the event would be CPU-local? As you say, for tracing of the IPI source, the sending tracepoints are better, and that you can already dump the stack trace of with --event/--trigger. timerlat auto-analysis could be extended to connect the specific IPI to the IRQ noise and display its stack trace automatically, instead of manually analyzing the trace output. > >> Isn't this racy to do from a different CPU? Both in terms of the > >> counter, and the timing of the increment relative to when the IPI is > >> actually received. Not necessarily a huge deal if you only care about > >> zero versus bignum, but still. At least worth a comment, if we go with > >> this approach. > >> > > > > I also think it's a bit confusing, especially as the other accesses to > > osn_var are cpu-local, but here, "cpu" is the *target* CPU, not the > > current CPU. Not sure how expensive it would be to do atomic_add for > > that, at least it's something to consider. > > > > I suppose that could be an argument for doing that stat aggregation in > userspace osnoise - event handlers are run after the fact via > tracefs_iterate_raw_events(), it's all inherently slower since it's just > increments of one (one per handled event) but it's also all done in > userspace on a control thread and doesn't bog down the kernelspace. > You can also do per-cpu counters in-kernel and sum them in the end, but that would take cpus^2 space (indexed by [current_cpu, target_cpu]). The question is whether there could be enough samples to overload sample collection (like it happens for timerlat, which collects data in-kernel using BPF instead). In-kernel counting can be tested with " --event ipi:ipi_send_cpu --trigger hist:key=cpu" - IIRC, tracefs histograms use atomic operations (via tracing_map) to protect the entries from races in multi thread access. Of course, that is inferior to what the patchset implements, as it doesn't record which osnoise cycle the IPI was sent in, nor can record cpumask IPIs. Tomas ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts 2026-06-11 11:55 ` Tomas Glozar @ 2026-06-12 8:53 ` Valentin Schneider 0 siblings, 0 replies; 10+ messages in thread From: Valentin Schneider @ 2026-06-12 8:53 UTC (permalink / raw) To: Tomas Glozar Cc: Crystal Wood, linux-kernel, linux-trace-kernel, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Costa Shulyupin, Ivan Pravdin On 11/06/26 13:55, Tomas Glozar wrote: > čt 11. 6. 2026 v 12:31 odesílatel Valentin Schneider > <vschneid@redhat.com> napsal: >> > >> > Isn't that precisely what the ipi tracepoints used by this >> > implementation (ipi:ipi_send_cpu) are for? >> > >> >> Well, these catch the emission of the IPI, which is great for investigation >> - slap a stacktrace trigger and you (most of the time) get the source of >> your interference. >> >> However Crystal's point is that on x86 (and I assume other archs) receiving >> & handling these IPIs is "special" and doesn't go through the generic irq >> subsystem and thus has to be tracked separately, which is why osnoise has >> this fairly lengthy osnoise_arch_register() thing. >> > > Ah, right. This is not IPI specific, though, IIUC - Intel also has > other IRQs that have to be traced using Intel-specific trace points, > like irq_vectors:local_timer, which is also handled in > osnoise_arch_register(). On ARM from what I recall, most (all?) IRQs > are traced with irq:* tracepoints. > > So there are two parts to this: > > - Detecting interference from IPIs firing as osnoise:irq_noise (to be > analyzed by timerlat auto analysis, and also will appear by default in > trace output if enabled, regardless of the tool, as all osnoise:* > tracepoints are enabled there). This is done locally using the already > existing path (no race hazard), but requires arch-specific detection. > > - Counting IPIs when they are being sent. This is the new feature, and > the count is being recorded in osnoise_sample. > > I guess that means that if there were a generic IPI interface, it > would be easier to use that for IPI counting, as the event would be > CPU-local? As you say, for tracing of the IPI source, the sending > tracepoints are better, and that you can already dump the stack trace > of with --event/--trigger. timerlat auto-analysis could be extended to > connect the specific IPI to the IRQ noise and display its stack trace > automatically, instead of manually analyzing the trace output. > Right, at least for the smp_call stuff (which includes irq_work) we can leverage: csd_queue_cpu (on the sending CPU) csd_func_start (on the receiving CPU) by indexing on the @csd address; once upon a time [1] I had this: $ echo 'hist:keys=cpu,csd.hex:ts=common_timestamp.usecs:src=common_cpu' >\ /sys/kernel/tracing/events/csd/csd_queue_cpu/trigger $ echo 'csd_latency unsigned int src_cpu; '\ 'unsigned int dst_cpu; '\ 'unsigned long csd; u64 time' >\ /sys/kernel/tracing/synthetic_events $ echo 'hist:keys=common_cpu,csd.hex: time=common_timestamp.usecs-$ts: onmatch(csd.csd_queue_cpu).trace(csd_latency,$src,common_cpu,csd,$time)' >\ /sys/kernel/tracing/events/csd/csd_function_entry/trigger $ trace-cmd record -e 'synthetic:csd_latency' hackbench $ trace-cmd report <idle>-0 [001] 115.236810: csd_latency: src_cpu=7, dst_cpu=1, csd=18446612682588476192, time=134 <idle>-0 [000] 115.240676: csd_latency: src_cpu=7, dst_cpu=0, csd=18446612682588214048, time=103 <idle>-0 [009] 115.241320: csd_latency: src_cpu=7, dst_cpu=9, csd=18446612682143963384, time=83 <idle>-0 [007] 115.242817: csd_latency: src_cpu=8, dst_cpu=7, csd=18446612682150759032, time=93 <idle>-0 [005] 115.247802: csd_latency: src_cpu=7, dst_cpu=5, csd=18446612682144441144, time=114 <idle>-0 [005] 115.271775: csd_latency: src_cpu=7, dst_cpu=5, csd=18446612682144441144, time=151 <idle>-0 [000] 115.279620: csd_latency: src_cpu=7, dst_cpu=0, csd=18446612682588214048, time=87 <idle>-0 [000] 115.281727: csd_latency: src_cpu=7, dst_cpu=0, csd=18446612682588214048, time=101 [1]: https://lore.kernel.org/lkml/xhsmh4jn8y8vt.mognet@vschneid.remote.csb/ I believe you're right that leveraging this would be useful for timerlat-aa; I'll add it to my todolist :-) >> >> Isn't this racy to do from a different CPU? Both in terms of the >> >> counter, and the timing of the increment relative to when the IPI is >> >> actually received. Not necessarily a huge deal if you only care about >> >> zero versus bignum, but still. At least worth a comment, if we go with >> >> this approach. >> >> >> > >> > I also think it's a bit confusing, especially as the other accesses to >> > osn_var are cpu-local, but here, "cpu" is the *target* CPU, not the >> > current CPU. Not sure how expensive it would be to do atomic_add for >> > that, at least it's something to consider. >> > >> >> I suppose that could be an argument for doing that stat aggregation in >> userspace osnoise - event handlers are run after the fact via >> tracefs_iterate_raw_events(), it's all inherently slower since it's just >> increments of one (one per handled event) but it's also all done in >> userspace on a control thread and doesn't bog down the kernelspace. >> > > You can also do per-cpu counters in-kernel and sum them in the end, > but that would take cpus^2 space (indexed by [current_cpu, > target_cpu]). The question is whether there could be enough samples to > overload sample collection (like it happens for timerlat, which > collects data in-kernel using BPF instead). > > In-kernel counting can be tested with " --event ipi:ipi_send_cpu > --trigger hist:key=cpu" - IIRC, tracefs histograms use atomic > operations (via tracing_map) to protect the entries from races in > multi thread access. Of course, that is inferior to what the patchset > implements, as it doesn't record which osnoise cycle the IPI was sent > in, nor can record cpumask IPIs. > I suppose I'll need to go do some benchmarking, but I'm starting to lean towards the side of atomic incs for IPI counts being okay considering the sort of latencies we track. > > Tomas ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts 2026-06-11 10:30 ` Valentin Schneider 2026-06-11 11:55 ` Tomas Glozar @ 2026-06-11 20:49 ` Crystal Wood 1 sibling, 0 replies; 10+ messages in thread From: Crystal Wood @ 2026-06-11 20:49 UTC (permalink / raw) To: Valentin Schneider, Tomas Glozar Cc: linux-kernel, linux-trace-kernel, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Costa Shulyupin, Ivan Pravdin On Thu, 2026-06-11 at 12:30 +0200, Valentin Schneider wrote: > On 11/06/26 10:59, Tomas Glozar wrote: > > [just replying to comments, will do a full review later] > > > > st 10. 6. 2026 v 21:51 odesílatel Crystal Wood <crwood@redhat.com> napsal: > > > > > > On Wed, 2026-06-10 at 15:04 +0200, Valentin Schneider wrote: > > > > Osnoise already implictly accounts IPIs via its IRQ tracking, > > > > > > Does it? It seems that IPIs bypass the kernel/irq subsystem on some > > > arches (including x86, but not ARM). > > > > > > It would be nice to solve this properly by adding generic ipi > > > entry/exit tracing (similar to what ARM already has). > > > > > > > Isn't that precisely what the ipi tracepoints used by this > > implementation (ipi:ipi_send_cpu) are for? > > > > Well, these catch the emission of the IPI, which is great for investigation > - slap a stacktrace trigger and you (most of the time) get the source of > your interference. > > However Crystal's point is that on x86 (and I assume other archs) receiving > & handling these IPIs is "special" and doesn't go through the generic irq > subsystem and thus has to be tracked separately, which is why osnoise has > this fairly lengthy osnoise_arch_register() thing. Oh, I missed the arch hook. I feel better now :-) (I'd feel better if it didn't rely on osnoise-specific arch code being updated to match if some new interrupt path pops up, but oh well.) > > > > > > Alternatively I can have this be purely supported in userspace osnoise by > > > > hooking into the IPI events and counting IPIs separately from the osnoise > > > > events. > > > > > > One benefit I could see of doing this in kernel osnoise would be if you > > > could atomically correlate the count with the particular noise > > > interval, but this patch doesn't do that. > > > > > > > The count is already reported by cycle on the kernel side in the > > patchset, right? It's only missing in the current RTLA (userspace) > > part, as there is no statistic using the information. But it can still > > be collected through custom histogram triggers. Not sure I follow... this patchset reports a count of IPIs, not cycle info, but the count is based on when the IPIs were sent, not received. The IPI send events capture cycle info, but that's not what this patchset adds. I'm not sure that it really matters though. I had been thinking of this more like the interference count, which is atomic with respect to a single noise (and thus the sender of the noise would be outside that window). But this count is reported over the entire osnoise sample period, so a little slop is probably OK. -Crystal > ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts 2026-06-10 19:51 ` Crystal Wood 2026-06-11 8:59 ` Tomas Glozar @ 2026-06-11 10:21 ` Valentin Schneider 1 sibling, 0 replies; 10+ messages in thread From: Valentin Schneider @ 2026-06-11 10:21 UTC (permalink / raw) To: Crystal Wood, linux-kernel, linux-trace-kernel Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Tomas Glozar, Costa Shulyupin, Ivan Pravdin On 10/06/26 14:51, Crystal Wood wrote: > On Wed, 2026-06-10 at 15:04 +0200, Valentin Schneider wrote: >> Osnoise already implictly accounts IPIs via its IRQ tracking, > > Does it? It seems that IPIs bypass the kernel/irq subsystem on some > arches (including x86, but not ARM). > Right... > It would be nice to solve this properly by adding generic ipi > entry/exit tracing (similar to what ARM already has). > I think for x86 the CSD tracepoints catch a few of these strays - I think the smp_call ones for instance. >> however it >> can be interesting to distiguish between the two: undesired IPIs usually >> imply a software configuration issue (e.g. wrong/incomplete CPU isolation) >> whereas undesired (non-IPI) IRQs usually imply a hardware configuration >> issue. >> >> Signed-off-by: Valentin Schneider <vschneid@redhat.com> >> --- >> Note that this is modifying the osnoise:osnoise_entry Ftrace entry; I know >> trace events are sort of supposed to be stable, but I'm not sure about >> ftrace entries. > > I think old rtla will be OK with this since it looks up fields by name > rather than assuming a fixed layout. > >> Alternatively I can have this be purely supported in userspace osnoise by >> hooking into the IPI events and counting IPIs separately from the osnoise >> events. > > One benefit I could see of doing this in kernel osnoise would be if you > could atomically correlate the count with the particular noise > interval, but this patch doesn't do that. > >> +static void ipi_emission(struct osnoise_variables *osn_var, unsigned int dst_cpu) >> +{ >> + if (!osn_var->sampling) >> + return; >> + >> + osn_var->ipi.count++; >> +} >> + >> +static void trace_ipi_send_cpu_callback(void *data, unsigned int cpu, >> + unsigned long callsite, void *callback) >> +{ >> + struct osnoise_variables *osn_var; >> + >> + osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); >> + ipi_emission(osn_var, cpu); >> +} >> + >> +static void trace_ipi_send_cpumask_callback(void *data, const struct cpumask *cpumask, >> + unsigned long callsite, void *callback) >> +{ >> + struct osnoise_variables *osn_var; >> + int cpu; >> + >> + for_each_cpu_and(cpu, cpumask, &osnoise_cpumask) { >> + osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); >> + ipi_emission(osn_var, cpu); >> + } >> +} > > Isn't this racy to do from a different CPU? Both in terms of the > counter, and the timing of the increment relative to when the IPI is > actually received. Not necessarily a huge deal if you only care about > zero versus bignum, but still. At least worth a comment, if we go with > this approach. > Yes on both points :-) Let me see what Tomas has to say on that... > -Crystal ^ permalink raw reply [flat|nested] 10+ messages in thread
* [RFC PATCH 2/2] rtla/osnoise: Report IPI count in osnoise top 2026-06-10 13:04 [RFC PATCH 0/2] tracing/osnoise: Track IPIs Valentin Schneider 2026-06-10 13:04 ` [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts Valentin Schneider @ 2026-06-10 13:04 ` Valentin Schneider 1 sibling, 0 replies; 10+ messages in thread From: Valentin Schneider @ 2026-06-10 13:04 UTC (permalink / raw) To: linux-kernel, linux-trace-kernel Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Tomas Glozar, Costa Shulyupin, Crystal Wood, Ivan Pravdin The osnoise tracer now also reports IPI count, extract & report them. Signed-off-by: Valentin Schneider <vschneid@redhat.com> --- tools/tracing/rtla/src/osnoise_top.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index e65312ec26c43..6fd5353c82f38 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -25,6 +25,7 @@ struct osnoise_top_cpu { unsigned long long irq_count; unsigned long long softirq_count; unsigned long long thread_count; + unsigned long long ipi_count; int sum_cycles; }; @@ -116,6 +117,9 @@ osnoise_top_handler(struct trace_seq *s, struct tep_record *record, tep_get_field_val(s, event, "thread_count", record, &val, 1); update_sum(&cpu_data->thread_count, &val); + tep_get_field_val(s, event, "ipi_count", record, &val, 1); + update_sum(&cpu_data->ipi_count, &val); + return 0; } @@ -163,7 +167,7 @@ static void osnoise_top_header(struct osnoise_tool *top) if (params->mode == MODE_HWNOISE) goto eol; - trace_seq_printf(s, " IRQ Softirq Thread"); + trace_seq_printf(s, " IRQ Softirq Thread IPI"); eol: if (pretty) @@ -218,7 +222,8 @@ static void osnoise_top_print(struct osnoise_tool *tool, int cpu) trace_seq_printf(s, "%12llu ", cpu_data->irq_count); trace_seq_printf(s, "%12llu ", cpu_data->softirq_count); - trace_seq_printf(s, "%12llu\n", cpu_data->thread_count); + trace_seq_printf(s, "%12llu ", cpu_data->thread_count); + trace_seq_printf(s, "%12llu\n", cpu_data->ipi_count); } /* -- 2.54.0 ^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2026-06-12 8:54 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-06-10 13:04 [RFC PATCH 0/2] tracing/osnoise: Track IPIs Valentin Schneider 2026-06-10 13:04 ` [RFC PATCH 1/2] tracing/osnoise: Sample IPI counts Valentin Schneider 2026-06-10 19:51 ` Crystal Wood 2026-06-11 8:59 ` Tomas Glozar 2026-06-11 10:30 ` Valentin Schneider 2026-06-11 11:55 ` Tomas Glozar 2026-06-12 8:53 ` Valentin Schneider 2026-06-11 20:49 ` Crystal Wood 2026-06-11 10:21 ` Valentin Schneider 2026-06-10 13:04 ` [RFC PATCH 2/2] rtla/osnoise: Report IPI count in osnoise top Valentin Schneider
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox