* [RFC PATCH V0 09/10] trace/kmmscand: Add tracing of scanning and migration
[not found] <20241201153818.2633616-1-raghavendra.kt@amd.com>
@ 2024-12-01 15:38 ` Raghavendra K T
2024-12-05 17:46 ` Steven Rostedt
0 siblings, 1 reply; 4+ messages in thread
From: Raghavendra K T @ 2024-12-01 15:38 UTC (permalink / raw)
To: linux-mm, linux-kernel, gourry, nehagholkar, abhishekd, david,
ying.huang, nphamcs, akpm, hannes, feng.tang, kbusch, bharata,
Hasan.Maruf, sj
Cc: willy, kirill.shutemov, mgorman, vbabka, hughd, rientjes,
shy828301, Liam.Howlett, peterz, mingo, Raghavendra K T,
Steven Rostedt, Masami Hiramatsu, linux-trace-kernel
Add tracing support to track
- start and end of scanning.
- migration.
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Masami Hiramatsu <mhiramat@kernel.org>
CC: linux-trace-kernel@vger.kernel.org
Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
---
include/trace/events/kmem.h | 99 +++++++++++++++++++++++++++++++++++++
mm/kmmscand.c | 12 ++++-
2 files changed, 110 insertions(+), 1 deletion(-)
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index b37eb0a7060f..80978d85300d 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -9,6 +9,105 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
+TRACE_EVENT(kmem_mm_enter,
+
+ TP_PROTO(struct task_struct *t,
+ struct mm_struct *mm),
+
+ TP_ARGS(t, mm),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( struct mm_struct *, mm )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+ __entry->mm = mm;
+ ),
+
+ TP_printk("kmmscand: mm_enter comm =%s mm=%p", __entry->comm, __entry->mm)
+);
+
+TRACE_EVENT(kmem_scan_mm_start,
+
+ TP_PROTO(struct task_struct *t,
+ struct mm_struct *mm),
+
+ TP_ARGS(t, mm),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( struct mm_struct *, mm )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+ __entry->mm = mm;
+ ),
+
+ TP_printk("kmmscand: scan_mm_start comm =%s mm=%p", __entry->comm, __entry->mm)
+);
+
+TRACE_EVENT(kmem_scan_mm_end,
+
+ TP_PROTO(struct task_struct *t,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long total,
+ unsigned long scan_size,
+ unsigned long scan_period),
+
+ TP_ARGS(t, mm, start, total, scan_period, scan_size),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( struct mm_struct *, mm )
+ __field( unsigned long, start )
+ __field( unsigned long, total )
+ __field( unsigned long, scan_period )
+ __field( unsigned long, scan_size )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+ __entry->mm = mm;
+ __entry->start = start;
+ __entry->total = total;
+ __entry->scan_period = scan_period;
+ __entry->scan_size = scan_size;
+ ),
+
+ TP_printk("kmmscand: scan_mm_end comm =%s mm=%p, start = %ld, total = %ld, scan_period = %ld, scan_size = %ld",
+ __entry->comm, __entry->mm, __entry->start,
+ __entry->total, __entry->scan_period, __entry->scan_size)
+);
+
+TRACE_EVENT(kmem_scan_mm_migrate,
+
+ TP_PROTO(struct task_struct *t,
+ struct mm_struct *mm,
+ int rc),
+
+ TP_ARGS(t, mm, rc),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( struct mm_struct *, mm )
+ __field( int, rc )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+ __entry->mm = mm;
+ __entry->rc = rc;
+ ),
+
+ TP_printk("kmmscand: scan_mm_migrate comm =%s mm=%p rc=%d", __entry->comm,
+ __entry->mm, __entry->rc)
+);
+
+
TRACE_EVENT(kmem_cache_alloc,
TP_PROTO(unsigned long call_site,
diff --git a/mm/kmmscand.c b/mm/kmmscand.c
index 682c0523c0b4..70f588a210dd 100644
--- a/mm/kmmscand.c
+++ b/mm/kmmscand.c
@@ -668,8 +668,10 @@ static void kmmscand_migrate_folio(void)
WRITE_ONCE(kmmscand_cur_migrate_mm, info->mm);
spin_unlock(&kmmscand_migrate_lock);
- if (info->mm)
+ if (info->mm) {
ret = kmmscand_promote_folio(info);
+ trace_kmem_scan_mm_migrate(info->mm->owner, info->mm, ret);
+ }
/* TBD: encode migrated count here, currently assume folio_nr_pages */
if (!ret)
@@ -828,6 +830,9 @@ static unsigned long kmmscand_scan_mm_slot(void)
goto outerloop;
}
+ if (mm->owner)
+ trace_kmem_scan_mm_start(mm->owner, mm);
+
now = jiffies;
/*
* Dont scan if :
@@ -868,6 +873,10 @@ static unsigned long kmmscand_scan_mm_slot(void)
update_mmslot_info = true;
+ if (mm->owner)
+ trace_kmem_scan_mm_end(mm->owner, mm, address, total,
+ mm_slot_scan_period, mm_slot_scan_size);
+
count_kmmscand_mm_scans();
outerloop:
@@ -1020,6 +1029,7 @@ void __kmmscand_enter(struct mm_struct *mm)
spin_unlock(&kmmscand_mm_lock);
mmgrab(mm);
+ trace_kmem_mm_enter(mm->owner, mm);
if (wakeup)
wake_up_interruptible(&kmmscand_wait);
}
--
2.39.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH V0 09/10] trace/kmmscand: Add tracing of scanning and migration
2024-12-01 15:38 ` [RFC PATCH V0 09/10] trace/kmmscand: Add tracing of scanning and migration Raghavendra K T
@ 2024-12-05 17:46 ` Steven Rostedt
2024-12-06 6:33 ` Raghavendra K T
0 siblings, 1 reply; 4+ messages in thread
From: Steven Rostedt @ 2024-12-05 17:46 UTC (permalink / raw)
To: Raghavendra K T
Cc: linux-mm, linux-kernel, gourry, nehagholkar, abhishekd, david,
ying.huang, nphamcs, akpm, hannes, feng.tang, kbusch, bharata,
Hasan.Maruf, sj, willy, kirill.shutemov, mgorman, vbabka, hughd,
rientjes, shy828301, Liam.Howlett, peterz, mingo,
Masami Hiramatsu, linux-trace-kernel
On Sun, 1 Dec 2024 15:38:17 +0000
Raghavendra K T <raghavendra.kt@amd.com> wrote:
> Add tracing support to track
> - start and end of scanning.
> - migration.
>
> CC: Steven Rostedt <rostedt@goodmis.org>
> CC: Masami Hiramatsu <mhiramat@kernel.org>
> CC: linux-trace-kernel@vger.kernel.org
>
> Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
> ---
> include/trace/events/kmem.h | 99 +++++++++++++++++++++++++++++++++++++
> mm/kmmscand.c | 12 ++++-
> 2 files changed, 110 insertions(+), 1 deletion(-)
>
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index b37eb0a7060f..80978d85300d 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -9,6 +9,105 @@
> #include <linux/tracepoint.h>
> #include <trace/events/mmflags.h>
>
> +TRACE_EVENT(kmem_mm_enter,
> +
> + TP_PROTO(struct task_struct *t,
> + struct mm_struct *mm),
> +
> + TP_ARGS(t, mm),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )
Is there a reason to record "comm"? There's other ways to retrieve it than
to always write it to the ring buffer.
> + __field( struct mm_struct *, mm )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
> + __entry->mm = mm;
> + ),
> +
> + TP_printk("kmmscand: mm_enter comm =%s mm=%p", __entry->comm, __entry->mm)
> +);
> +
> +TRACE_EVENT(kmem_scan_mm_start,
> +
> + TP_PROTO(struct task_struct *t,
> + struct mm_struct *mm),
> +
> + TP_ARGS(t, mm),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )
> + __field( struct mm_struct *, mm )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
> + __entry->mm = mm;
> + ),
> +
> + TP_printk("kmmscand: scan_mm_start comm =%s mm=%p", __entry->comm, __entry->mm)
No need to write the event name into the TP_printk(). That's redundant.
Also, the above two events are pretty much identical. Please use
DECLARE_EVENT_CLASS().
> +);
> +
> +TRACE_EVENT(kmem_scan_mm_end,
> +
> + TP_PROTO(struct task_struct *t,
> + struct mm_struct *mm,
> + unsigned long start,
> + unsigned long total,
> + unsigned long scan_size,
> + unsigned long scan_period),
> +
> + TP_ARGS(t, mm, start, total, scan_period, scan_size),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )
Again, why comm?
> + __field( struct mm_struct *, mm )
> + __field( unsigned long, start )
> + __field( unsigned long, total )
> + __field( unsigned long, scan_period )
> + __field( unsigned long, scan_size )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
> + __entry->mm = mm;
> + __entry->start = start;
> + __entry->total = total;
> + __entry->scan_period = scan_period;
> + __entry->scan_size = scan_size;
> + ),
> +
> + TP_printk("kmmscand: scan_mm_end comm =%s mm=%p, start = %ld, total = %ld, scan_period = %ld, scan_size = %ld",
> + __entry->comm, __entry->mm, __entry->start,
> + __entry->total, __entry->scan_period, __entry->scan_size)
> +);
> +
> +TRACE_EVENT(kmem_scan_mm_migrate,
> +
> + TP_PROTO(struct task_struct *t,
> + struct mm_struct *mm,
> + int rc),
> +
> + TP_ARGS(t, mm, rc),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )
> + __field( struct mm_struct *, mm )
> + __field( int, rc )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
> + __entry->mm = mm;
> + __entry->rc = rc;
> + ),
> +
> + TP_printk("kmmscand: scan_mm_migrate comm =%s mm=%p rc=%d", __entry->comm,
> + __entry->mm, __entry->rc)
> +);
> +
> +
> TRACE_EVENT(kmem_cache_alloc,
>
> TP_PROTO(unsigned long call_site,
> diff --git a/mm/kmmscand.c b/mm/kmmscand.c
> index 682c0523c0b4..70f588a210dd 100644
> --- a/mm/kmmscand.c
> +++ b/mm/kmmscand.c
> @@ -668,8 +668,10 @@ static void kmmscand_migrate_folio(void)
> WRITE_ONCE(kmmscand_cur_migrate_mm, info->mm);
> spin_unlock(&kmmscand_migrate_lock);
>
> - if (info->mm)
> + if (info->mm) {
> ret = kmmscand_promote_folio(info);
> + trace_kmem_scan_mm_migrate(info->mm->owner, info->mm, ret);
> + }
>
> /* TBD: encode migrated count here, currently assume folio_nr_pages */
> if (!ret)
> @@ -828,6 +830,9 @@ static unsigned long kmmscand_scan_mm_slot(void)
> goto outerloop;
> }
>
> + if (mm->owner)
> + trace_kmem_scan_mm_start(mm->owner, mm);
> +
> now = jiffies;
> /*
> * Dont scan if :
> @@ -868,6 +873,10 @@ static unsigned long kmmscand_scan_mm_slot(void)
>
> update_mmslot_info = true;
>
> + if (mm->owner)
> + trace_kmem_scan_mm_end(mm->owner, mm, address, total,
> + mm_slot_scan_period, mm_slot_scan_size);
Please do not add conditions that is used just for calling a tracepoint.
That takes away the "nop" of the function. You can either use
TRACE_EVENT_CONDITION() or DEFINE_EVENT_CONDITION(), or you can hard code
it here:
if (trace_kmem_scan_mm_end_enabled()) {
if (mm->owner)
trace_kmem_scan_mm_end(mm->owner, mm, address, total,
mm_slot_scan_period, mm_slot_scan_size);
}
But since it is a single condition, I would prefer the *_CONDITION() macros
above.
-- Steve
> +
> count_kmmscand_mm_scans();
>
> outerloop:
> @@ -1020,6 +1029,7 @@ void __kmmscand_enter(struct mm_struct *mm)
> spin_unlock(&kmmscand_mm_lock);
>
> mmgrab(mm);
> + trace_kmem_mm_enter(mm->owner, mm);
> if (wakeup)
> wake_up_interruptible(&kmmscand_wait);
> }
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH V0 09/10] trace/kmmscand: Add tracing of scanning and migration
2024-12-05 17:46 ` Steven Rostedt
@ 2024-12-06 6:33 ` Raghavendra K T
2024-12-06 14:49 ` Steven Rostedt
0 siblings, 1 reply; 4+ messages in thread
From: Raghavendra K T @ 2024-12-06 6:33 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-mm, linux-kernel, gourry, nehagholkar, abhishekd, david,
ying.huang, nphamcs, akpm, hannes, feng.tang, kbusch, bharata,
Hasan.Maruf, sj, willy, kirill.shutemov, mgorman, vbabka, hughd,
rientjes, shy828301, Liam.Howlett, peterz, mingo,
Masami Hiramatsu, linux-trace-kernel
On 12/5/2024 11:16 PM, Steven Rostedt wrote:
> On Sun, 1 Dec 2024 15:38:17 +0000
> Raghavendra K T <raghavendra.kt@amd.com> wrote:
>
>> Add tracing support to track
>> - start and end of scanning.
>> - migration.
>>
>> CC: Steven Rostedt <rostedt@goodmis.org>
>> CC: Masami Hiramatsu <mhiramat@kernel.org>
>> CC: linux-trace-kernel@vger.kernel.org
>>
[...]
>> +
>> + TP_STRUCT__entry(
>> + __array( char, comm, TASK_COMM_LEN )
>
> Is there a reason to record "comm"? There's other ways to retrieve it than
> to always write it to the ring buffer.
>
Thank you for the review Steve. The motivation was to filter benchmark
in the trace to understand the behavior.
I will explore regarding other ways of retrieving comm.
(or may be even PID is enough..)
[...]
>> +
>> + TP_printk("kmmscand: scan_mm_start comm =%s mm=%p", __entry->comm, __entry->mm)
>
> No need to write the event name into the TP_printk(). That's redundant.
>
> Also, the above two events are pretty much identical. Please use
> DECLARE_EVENT_CLASS().
Sure. will do.
>> +
>> + TP_STRUCT__entry(
>> + __array( char, comm, TASK_COMM_LEN )
>
> Again, why comm?
>
Will do same change here too.
[...]
>> + if (mm->owner)
>> + trace_kmem_scan_mm_end(mm->owner, mm, address, total,
>> + mm_slot_scan_period, mm_slot_scan_size);
>
> Please do not add conditions that is used just for calling a tracepoint.
> That takes away the "nop" of the function. You can either use
> TRACE_EVENT_CONDITION() or DEFINE_EVENT_CONDITION(), or you can hard code
> it here:
>
> if (trace_kmem_scan_mm_end_enabled()) {
> if (mm->owner)
> trace_kmem_scan_mm_end(mm->owner, mm, address, total,
> mm_slot_scan_period, mm_slot_scan_size);
> }
>
> But since it is a single condition, I would prefer the *_CONDITION() macros
> above.
>
Very helpful suggestion.
Thanks again.. I will keep these points in mind for next version.
- Raghu
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH V0 09/10] trace/kmmscand: Add tracing of scanning and migration
2024-12-06 6:33 ` Raghavendra K T
@ 2024-12-06 14:49 ` Steven Rostedt
0 siblings, 0 replies; 4+ messages in thread
From: Steven Rostedt @ 2024-12-06 14:49 UTC (permalink / raw)
To: Raghavendra K T
Cc: linux-mm, linux-kernel, gourry, nehagholkar, abhishekd, david,
ying.huang, nphamcs, akpm, hannes, feng.tang, kbusch, bharata,
Hasan.Maruf, sj, willy, kirill.shutemov, mgorman, vbabka, hughd,
rientjes, shy828301, Liam.Howlett, peterz, mingo,
Masami Hiramatsu, linux-trace-kernel
On Fri, 6 Dec 2024 12:03:29 +0530
Raghavendra K T <raghavendra.kt@amd.com> wrote:
> On 12/5/2024 11:16 PM, Steven Rostedt wrote:
> > On Sun, 1 Dec 2024 15:38:17 +0000
> > Raghavendra K T <raghavendra.kt@amd.com> wrote:
> >
> >> Add tracing support to track
> >> - start and end of scanning.
> >> - migration.
> >>
> >> CC: Steven Rostedt <rostedt@goodmis.org>
> >> CC: Masami Hiramatsu <mhiramat@kernel.org>
> >> CC: linux-trace-kernel@vger.kernel.org
> >>
>
> [...]
>
> >> +
> >> + TP_STRUCT__entry(
> >> + __array( char, comm, TASK_COMM_LEN )
> >
> > Is there a reason to record "comm"? There's other ways to retrieve it than
> > to always write it to the ring buffer.
> >
>
> Thank you for the review Steve. The motivation was to filter benchmark
> in the trace to understand the behavior.
> I will explore regarding other ways of retrieving comm.
> (or may be even PID is enough..)
You can filter on current comm for any event with trace-cmd and even with the
"filter" file. It doesn't need to be part of the event.
For the filter file:
# echo "COMM == rcu_preempt" > /sys/kernel/tracing/events/timer/hrtimer_cancel/filter
or with trace-cmd
# trace-cmd start -e hrtimer_cancel -f 'COMM == "rcu_preempt"'
# trace-cmd show
# tracer: nop
#
# entries-in-buffer/entries-written: 10/10 #P:8
#
# _-----=> irqs-off/BH-disabled
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / _-=> migrate-disable
# |||| / delay
# TASK-PID CPU# ||||| TIMESTAMP FUNCTION
# | | | ||||| | |
rcu_preempt-18 [001] d..3. 54968.170887: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54968.177704: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54968.181678: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54968.185679: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54968.186092: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54968.193676: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54968.193686: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54972.871315: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54972.875176: hrtimer_cancel: hrtimer=00000000456b5702
rcu_preempt-18 [001] d..3. 54972.881751: hrtimer_cancel: hrtimer=00000000456b5702
Or you can do it after the fact from a trace.dat file:
# trace-cmd record -e hrtimer_cancel sleep 10
# trace-cmd report | head
cpus=8
sleep-1641 [006] d.h2. 55109.598846: hrtimer_cancel: hrtimer=0xffff9800fdfa1888
sleep-1641 [006] d..3. 55109.599089: hrtimer_cancel: hrtimer=0xffff9800fdfb3140
<idle>-0 [006] d..2. 55109.599111: hrtimer_cancel: hrtimer=0xffff9800fdfa1888
<idle>-0 [006] d.h7. 55109.603848: hrtimer_cancel: hrtimer=0xffff9800fdfb3180
<idle>-0 [006] dN.2. 55109.603895: hrtimer_cancel: hrtimer=0xffff9800fdfa1888
<idle>-0 [000] d.h3. 55109.604478: hrtimer_cancel: hrtimer=0xffff9800fde33180
<idle>-0 [000] dN.2. 55109.604492: hrtimer_cancel: hrtimer=0xffff9800fde21888
rcu_preempt-18 [000] d..3. 55109.604549: hrtimer_cancel: hrtimer=0xffff9800fde33140
<idle>-0 [000] d..2. 55109.604573: hrtimer_cancel: hrtimer=0xffff9800fde21888
# trace-cmd report -F '.*:COMM == "rcu_preempt"'
cpus=8
rcu_preempt-18 [000] d..3. 55109.604549: hrtimer_cancel: hrtimer=0xffff9800fde33140
rcu_preempt-18 [000] d..3. 55109.609320: hrtimer_cancel: hrtimer=0xffff9800fde33140
rcu_preempt-18 [000] d..3. 55109.613350: hrtimer_cancel: hrtimer=0xffff9800fde33140
rcu_preempt-18 [000] d..3. 55119.609772: hrtimer_cancel: hrtimer=0xffff9800fde33140
-- Steve
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-12-06 14:49 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20241201153818.2633616-1-raghavendra.kt@amd.com>
2024-12-01 15:38 ` [RFC PATCH V0 09/10] trace/kmmscand: Add tracing of scanning and migration Raghavendra K T
2024-12-05 17:46 ` Steven Rostedt
2024-12-06 6:33 ` Raghavendra K T
2024-12-06 14:49 ` Steven Rostedt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).