From: Jeff Layton <jlayton@kernel.org>
To: John Stultz <jstultz@google.com>,
Thomas Gleixner <tglx@linutronix.de>,
Stephen Boyd <sboyd@kernel.org>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,
Steven Rostedt <rostedt@goodmis.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Jonathan Corbet <corbet@lwn.net>,
Chandan Babu R <chandan.babu@oracle.com>,
"Darrick J. Wong" <djwong@kernel.org>,
Theodore Ts'o <tytso@mit.edu>,
Andreas Dilger <adilger.kernel@dilger.ca>,
Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
David Sterba <dsterba@suse.com>, Hugh Dickins <hughd@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Chuck Lever <chuck.lever@oracle.com>,
Vadim Fedorenko <vadim.fedorenko@linux.dev>
Cc: Randy Dunlap <rdunlap@infradead.org>,
linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-trace-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
linux-xfs@vger.kernel.org, linux-ext4@vger.kernel.org,
linux-btrfs@vger.kernel.org, linux-nfs@vger.kernel.org,
linux-mm@kvack.org, Jeff Layton <jlayton@kernel.org>
Subject: [PATCH v8 06/11] fs: add percpu counters for significant multigrain timestamp events
Date: Sat, 14 Sep 2024 13:07:19 -0400 [thread overview]
Message-ID: <20240914-mgtime-v8-6-5bd872330bed@kernel.org> (raw)
In-Reply-To: <20240914-mgtime-v8-0-5bd872330bed@kernel.org>
New percpu counters for counting various stats around mgtimes, and a new
debugfs file for displaying them when CONFIG_DEBUG_FS is enabled:
- number of attempted ctime updates
- number of successful i_ctime_nsec swaps
- number of fine-grained timestamp fetches
- number of coarse-grained floor swaps
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
fs/inode.c | 76 ++++++++++++++++++++++++++++++++++++--
include/linux/timekeeping.h | 1 +
kernel/time/timekeeping.c | 3 +-
kernel/time/timekeeping_debug.c | 12 ++++++
kernel/time/timekeeping_internal.h | 3 ++
5 files changed, 90 insertions(+), 5 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index d7da9d06921f..1f0487104c71 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,8 @@
#include <linux/list_lru.h>
#include <linux/iversion.h>
#include <linux/rw_hint.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
#include <trace/events/writeback.h>
#define CREATE_TRACE_POINTS
#include <trace/events/timestamp.h>
@@ -101,6 +103,70 @@ long get_nr_dirty_inodes(void)
return nr_dirty > 0 ? nr_dirty : 0;
}
+#ifdef CONFIG_DEBUG_FS
+static DEFINE_PER_CPU(long, mg_ctime_updates);
+static DEFINE_PER_CPU(long, mg_fine_stamps);
+static DEFINE_PER_CPU(long, mg_ctime_swaps);
+
+static long get_mg_ctime_updates(void)
+{
+ int i;
+ long sum = 0;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(mg_ctime_updates, i);
+ return sum < 0 ? 0 : sum;
+}
+
+static long get_mg_fine_stamps(void)
+{
+ int i;
+ long sum = 0;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(mg_fine_stamps, i);
+ return sum < 0 ? 0 : sum;
+}
+
+static long get_mg_ctime_swaps(void)
+{
+ int i;
+ long sum = 0;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(mg_ctime_swaps, i);
+ return sum < 0 ? 0 : sum;
+}
+
+#define mgtime_counter_inc(__var) this_cpu_inc(__var)
+
+static int mgts_show(struct seq_file *s, void *p)
+{
+ long ctime_updates = get_mg_ctime_updates();
+ long ctime_swaps = get_mg_ctime_swaps();
+ long fine_stamps = get_mg_fine_stamps();
+ long floor_swaps = get_mg_floor_swaps();
+
+ seq_printf(s, "%ld %ld %ld %ld\n",
+ ctime_updates, ctime_swaps, fine_stamps, floor_swaps);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mgts);
+
+static int __init mg_debugfs_init(void)
+{
+ debugfs_create_file("multigrain_timestamps", S_IFREG | S_IRUGO, NULL, NULL, &mgts_fops);
+ return 0;
+}
+late_initcall(mg_debugfs_init);
+
+#else /* ! CONFIG_DEBUG_FS */
+
+#define mgtime_counter_inc() do { } while (0)
+
+#endif /* CONFIG_DEBUG_FS */
+
/*
* Handle nr_inode sysctl
*/
@@ -2655,10 +2721,9 @@ EXPORT_SYMBOL(timestamp_truncate);
*
* If it is multigrain, then we first see if the coarse-grained timestamp is
* distinct from what we have. If so, then we'll just use that. If we have to
- * get a fine-grained timestamp, then do so, and try to swap it into the floor.
- * We accept the new floor value regardless of the outcome of the cmpxchg.
- * After that, we try to swap the new value into i_ctime_nsec. Again, we take
- * the resulting ctime, regardless of the outcome of the swap.
+ * get a fine-grained timestamp, then do so. After that, we try to swap the new
+ * value into i_ctime_nsec. We take the resulting ctime, regardless of the
+ * outcome of the swap.
*/
struct timespec64 inode_set_ctime_current(struct inode *inode)
{
@@ -2687,8 +2752,10 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
if (timespec64_compare(&now, &ctime) <= 0) {
ktime_get_real_ts64_mg(&now);
now = timestamp_truncate(now, inode);
+ mgtime_counter_inc(mg_fine_stamps);
}
}
+ mgtime_counter_inc(mg_ctime_updates);
/* No need to cmpxchg if it's exactly the same */
if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) {
@@ -2702,6 +2769,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
/* If swap occurred, then we're (mostly) done */
inode->i_ctime_sec = now.tv_sec;
trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur);
+ mgtime_counter_inc(mg_ctime_swaps);
} else {
/*
* Was the change due to someone marking the old ctime QUERIED?
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7aa85246c183..b9c8c597a073 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
/* Multigrain timestamp interfaces */
extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+extern long get_mg_floor_swaps(void);
void getboottime64(struct timespec64 *ts);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16937242b904..94b0219955a2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2440,7 +2440,7 @@ EXPORT_SYMBOL_GPL(ktime_get_coarse_real_ts64_mg);
* regardless of the outcome of the swap. Note that this is a filesystem
* specific interface and should be avoided outside of that context.
*/
-void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie)
+void ktime_get_real_ts64_mg(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
ktime_t old = atomic64_read(&mg_floor);
@@ -2464,6 +2464,7 @@ void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie)
if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) {
ts->tv_nsec = 0;
timespec64_add_ns(ts, nsecs);
+ mgtime_counter_inc(mg_floor_swaps);
} else {
/*
* Something has changed mg_floor since "old" was
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index b73e8850e58d..9a3792072762 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -17,6 +17,9 @@
#define NUM_BINS 32
+/* incremented every time mg_floor is updated */
+DEFINE_PER_CPU(long, mg_floor_swaps);
+
static unsigned int sleep_time_bin[NUM_BINS] = {0};
static int tk_debug_sleep_time_show(struct seq_file *s, void *data)
@@ -53,3 +56,12 @@ void tk_debug_account_sleep_time(const struct timespec64 *t)
(s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
}
+long get_mg_floor_swaps(void)
+{
+ int i;
+ long sum = 0;
+
+ for_each_possible_cpu(i)
+ sum += per_cpu(mg_floor_swaps, i);
+ return sum < 0 ? 0 : sum;
+}
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 4ca2787d1642..2b49332b45a5 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -11,8 +11,11 @@
*/
#ifdef CONFIG_DEBUG_FS
extern void tk_debug_account_sleep_time(const struct timespec64 *t);
+DECLARE_PER_CPU(long, mg_floor_swaps);
+#define mgtime_counter_inc(__var) this_cpu_inc(__var)
#else
#define tk_debug_account_sleep_time(x)
+#define mgtime_counter_inc() do { } while (0)
#endif
#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE
--
2.46.0
next prev parent reply other threads:[~2024-09-14 17:07 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-14 17:07 [PATCH v8 00/11] fs: multigrain timestamp redux Jeff Layton
2024-09-14 17:07 ` [PATCH v8 01/11] timekeeping: move multigrain timestamp floor handling into timekeeper Jeff Layton
2024-09-14 20:10 ` John Stultz
2024-09-14 23:14 ` Jeff Layton
2024-09-16 10:12 ` Thomas Gleixner
2024-09-16 10:32 ` Thomas Gleixner
2024-09-16 10:57 ` Jeff Layton
2024-09-30 19:16 ` Thomas Gleixner
2024-09-30 19:37 ` Jeff Layton
2024-09-30 20:19 ` Thomas Gleixner
2024-09-30 20:53 ` Jeff Layton
2024-09-30 21:35 ` Thomas Gleixner
2024-10-01 9:45 ` Jeff Layton
2024-10-01 12:45 ` Thomas Gleixner
2024-10-02 12:41 ` Jeff Layton
2024-09-19 16:50 ` Jeff Layton
2024-09-30 19:43 ` Thomas Gleixner
2024-09-30 20:12 ` Jeff Layton
2024-09-30 19:13 ` Thomas Gleixner
2024-09-30 19:27 ` Jeff Layton
2024-09-30 20:15 ` Thomas Gleixner
2024-09-14 17:07 ` [PATCH v8 02/11] fs: add infrastructure for multigrain timestamps Jeff Layton
2024-09-14 17:07 ` [PATCH v8 03/11] fs: have setattr_copy handle multigrain timestamps appropriately Jeff Layton
2024-09-14 17:07 ` [PATCH v8 04/11] fs: handle delegated timestamps in setattr_copy_mgtime Jeff Layton
2024-09-14 17:07 ` [PATCH v8 05/11] fs: tracepoints around multigrain timestamp events Jeff Layton
2024-09-15 8:21 ` Steven Rostedt
2024-09-14 17:07 ` Jeff Layton [this message]
2024-09-16 10:20 ` [PATCH v8 06/11] fs: add percpu counters for significant " Thomas Gleixner
2024-09-14 17:07 ` [PATCH v8 07/11] Documentation: add a new file documenting multigrain timestamps Jeff Layton
2024-09-16 1:01 ` Bagas Sanjaya
2024-09-19 16:53 ` Jeff Layton
2024-09-14 17:07 ` [PATCH v8 08/11] xfs: switch to " Jeff Layton
2024-09-14 17:07 ` [PATCH v8 09/11] ext4: " Jeff Layton
2024-09-14 17:07 ` [PATCH v8 10/11] btrfs: convert " Jeff Layton
2024-09-14 17:07 ` [PATCH v8 11/11] tmpfs: add support for " Jeff Layton
2024-09-26 16:59 ` [PATCH v8 00/11] fs: multigrain timestamp redux Randy Dunlap
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240914-mgtime-v8-6-5bd872330bed@kernel.org \
--to=jlayton@kernel.org \
--cc=adilger.kernel@dilger.ca \
--cc=akpm@linux-foundation.org \
--cc=brauner@kernel.org \
--cc=chandan.babu@oracle.com \
--cc=chuck.lever@oracle.com \
--cc=clm@fb.com \
--cc=corbet@lwn.net \
--cc=djwong@kernel.org \
--cc=dsterba@suse.com \
--cc=hughd@google.com \
--cc=jack@suse.cz \
--cc=josef@toxicpanda.com \
--cc=jstultz@google.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=rdunlap@infradead.org \
--cc=rostedt@goodmis.org \
--cc=sboyd@kernel.org \
--cc=tglx@linutronix.de \
--cc=tytso@mit.edu \
--cc=vadim.fedorenko@linux.dev \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).