* RFC [Patch] few useful page reclaim mm tracepoints
@ 2009-09-28 16:09 Larry Woodman
2009-09-30 5:14 ` KOSAKI Motohiro
0 siblings, 1 reply; 2+ messages in thread
From: Larry Woodman @ 2009-09-28 16:09 UTC (permalink / raw)
To: linux-kernel, linux-mm
[-- Attachment #1: Type: text/plain, Size: 1172 bytes --]
Here a few mm page reclaim tracepoints that really show what is being
reclaimed and from where. mm_get_scanratio reports the number anonymous
and pagecache pages as well as the percent that will be reclaimed from
each. mm_pagereclaim_shrinkactive reports whether it is shrinking
anonymous or pagecache pages, the number scanned and the number actually
moved(deactivated). mm_pagereclaim_shrinkinactive reports whether it is
shrinking anonymous or pagecache pages, the number scanned and the
number actually reclaimed. These three simple mm tracepoints capture
much of the page reclaim activity.
------------------------------------------------------------------------
# tracer: mm
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
kswapd1-549 [004] 149.524509: mm_get_scanratio: 2043329
anonymous pages, reclaiming 1% - 1312 pagecache pages, reclaiming 99%
kswapd1-549 [004] 149.524709: mm_pagereclaim_shrinkactive:
anonymous, scanned 32, moved 32, priority 12
kswapd1-549 [004] 149.524542:
mm_pagereclaim_shrinkinactive: anonymous, scanned 32, reclaimed 32,
priority 7
[-- Attachment #2: upstream.diff --]
[-- Type: text/x-patch, Size: 3627 bytes --]
diff --git a/include/trace/events/mm.h b/include/trace/events/mm.h
new file mode 100644
index 0000000..d5a5ec2
--- /dev/null
+++ b/include/trace/events/mm.h
@@ -0,0 +1,94 @@
+#if !defined(_TRACE_MM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MM_H
+
+#include <linux/mm.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mm
+
+TRACE_EVENT(mm_pagereclaim_shrinkactive,
+
+ TP_PROTO(unsigned long scanned, unsigned long moved,
+ int file, int priority),
+
+ TP_ARGS(scanned, moved, file, priority),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, scanned)
+ __field(unsigned long, moved)
+ __field(int, file)
+ __field(int, priority)
+ ),
+
+ TP_fast_assign(
+ __entry->scanned = scanned;
+ __entry->moved = moved;
+ __entry->file = file;
+ __entry->priority = priority;
+ ),
+
+ TP_printk("%s, scanned %ld, moved %ld, priority %d",
+ __entry->file ? "pagecache" : "anonymous",
+ __entry->scanned, __entry->moved,
+ __entry->priority)
+ );
+
+TRACE_EVENT(mm_pagereclaim_shrinkinactive,
+
+ TP_PROTO(unsigned long scanned, unsigned long reclaimed,
+ int file, int priority),
+
+ TP_ARGS(scanned, reclaimed, file, priority),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, scanned)
+ __field(unsigned long, reclaimed)
+ __field(int, file)
+ __field(int, priority)
+ ),
+
+ TP_fast_assign(
+ __entry->scanned = scanned;
+ __entry->reclaimed = reclaimed;
+ __entry->file = file;
+ __entry->priority = priority;
+ ),
+
+ TP_printk("%s, scanned %ld, reclaimed %ld, priority %d",
+ __entry->file ? "pagecache" : "anonymous",
+ __entry->scanned, __entry->reclaimed,
+ __entry->priority)
+ );
+
+TRACE_EVENT(mm_get_scanratio,
+
+ TP_PROTO(unsigned long anon, unsigned long file,
+ unsigned long percent_anon, unsigned long percent_file),
+
+ TP_ARGS(anon, file, percent_anon, percent_file),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, anon)
+ __field(unsigned long, file)
+ __field(unsigned long, percent_anon)
+ __field(unsigned long, percent_file)
+ ),
+
+ TP_fast_assign(
+ __entry->anon = anon;
+ __entry->file = file;
+ __entry->percent_anon = percent_anon;
+ __entry->percent_file = percent_file;
+ ),
+
+ TP_printk("%ld anonymous pages, reclaiming %ld%% - %ld pagecache pages, reclaiming %ld%%",
+ __entry->anon, __entry->percent_anon,
+ __entry->file, __entry->percent_file)
+
+ );
+
+#endif /* _TRACE_MM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ba8228e..8797a26 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -40,6 +40,8 @@
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
#include <linux/sysctl.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/mm.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -1168,6 +1170,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
done:
local_irq_enable();
pagevec_release(&pvec);
+ trace_mm_pagereclaim_shrinkinactive(nr_scanned, nr_reclaimed,
+ file, priority);
return nr_reclaimed;
}
@@ -1325,6 +1329,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
LRU_BASE + file * LRU_FILE);
spin_unlock_irq(&zone->lru_lock);
+ trace_mm_pagereclaim_shrinkactive(pgscanned, pgmoved, file, priority);
}
static int inactive_anon_is_low_global(struct zone *zone)
@@ -1491,6 +1496,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
/* Normalize to percentages */
percent[0] = 100 * ap / (ap + fp + 1);
percent[1] = 100 - percent[0];
+ trace_mm_get_scanratio(anon, file, percent[0], percent[1]);
}
/*
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: RFC [Patch] few useful page reclaim mm tracepoints
2009-09-28 16:09 RFC [Patch] few useful page reclaim mm tracepoints Larry Woodman
@ 2009-09-30 5:14 ` KOSAKI Motohiro
0 siblings, 0 replies; 2+ messages in thread
From: KOSAKI Motohiro @ 2009-09-30 5:14 UTC (permalink / raw)
To: Larry Woodman; +Cc: kosaki.motohiro, linux-kernel, linux-mm
Hi
>
> Here a few mm page reclaim tracepoints that really show what is being
> reclaimed and from where. mm_get_scanratio reports the number anonymous
> and pagecache pages as well as the percent that will be reclaimed from
> each. mm_pagereclaim_shrinkactive reports whether it is shrinking
> anonymous or pagecache pages, the number scanned and the number actually
> moved(deactivated). mm_pagereclaim_shrinkinactive reports whether it is
> shrinking anonymous or pagecache pages, the number scanned and the
> number actually reclaimed. These three simple mm tracepoints capture
> much of the page reclaim activity.
>
> ------------------------------------------------------------------------
>
> # tracer: mm
> #
> # TASK-PID CPU# TIMESTAMP FUNCTION
> # | | | | |
> kswapd1-549 [004] 149.524509: mm_get_scanratio: 2043329
> anonymous pages, reclaiming 1% - 1312 pagecache pages, reclaiming 99%
> kswapd1-549 [004] 149.524709: mm_pagereclaim_shrinkactive:
> anonymous, scanned 32, moved 32, priority 12
> kswapd1-549 [004] 149.524542:
> mm_pagereclaim_shrinkinactive: anonymous, scanned 32, reclaimed 32,
> priority 7
Looks good generally. and I have few comment.
> @@ -1168,6 +1170,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
> done:
> local_irq_enable();
> pagevec_release(&pvec);
> + trace_mm_pagereclaim_shrinkinactive(nr_scanned, nr_reclaimed,
> + file, priority);
> return nr_reclaimed;
> }
In shrink_inactive list, the pages will become
(1) moved to active list
(2) moved to inactive list again
(3) moved to unevictable list
(4) freed
your tracepoint only watch freed pages.
maybe, other moving should be watched too. each moving indicate each different pressure.
Plus, I like more shorter tracepoint name personally.
>
> @@ -1325,6 +1329,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
> LRU_BASE + file * LRU_FILE);
>
> spin_unlock_irq(&zone->lru_lock);
> + trace_mm_pagereclaim_shrinkactive(pgscanned, pgmoved, file, priority);
> }
pgmoved don't point meaningful data.
pgmoved mean how much pages isolate from active list. but it doesn't mean
how much pages move to inactive-list although we really need it.
> static int inactive_anon_is_low_global(struct zone *zone)
> @@ -1491,6 +1496,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
> /* Normalize to percentages */
> percent[0] = 100 * ap / (ap + fp + 1);
> percent[1] = 100 - percent[0];
> + trace_mm_get_scanratio(anon, file, percent[0], percent[1]);
> }
Maybe bad place.
shrink_zone() have following code.
if (!sc->may_swap || (nr_swap_pages <= 0)) {
noswap = 1;
percent[0] = 0;
percent[1] = 100;
} else
get_scan_ratio(zone, sc, percent);
(snip)
for_each_evictable_lru(l) {
int file = is_file_lru(l);
unsigned long scan;
scan = zone_nr_pages(zone, sc, l);
if (priority || noswap) {
scan >>= priority;
scan = (scan * percent[file]) / 100;
}
(1) shrink_zone() often don't call get_scan_ratio().
(2) for some reason, "scan" calculation igure percent[file].
Maybe we should log scan variable or nr[l] variable.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2009-09-30 5:13 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-09-28 16:09 RFC [Patch] few useful page reclaim mm tracepoints Larry Woodman
2009-09-30 5:14 ` KOSAKI Motohiro
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).