From: Alex Markuze <amarkuze@redhat.com>
To: ceph-devel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, idryomov@gmail.com,
vdubeyko@redhat.com, Alex Markuze <amarkuze@redhat.com>
Subject: [PATCH v4 06/11] ceph: add manual reset debugfs control and tracepoints
Date: Thu, 7 May 2026 12:27:32 +0000 [thread overview]
Message-ID: <20260507122737.2804094-7-amarkuze@redhat.com> (raw)
In-Reply-To: <20260507122737.2804094-1-amarkuze@redhat.com>
Add the debugfs and trace plumbing used to trigger and observe
manual client reset.
The reset interface exposes a trigger file for operator-initiated
reset and a status file for tracking the most recent run. The
tracepoints record scheduling, completion, and blocked caller
behavior so reset progress can be diagnosed from the client side.
debugfs layout under /sys/kernel/debug/ceph/<client>/reset/:
trigger - write to initiate a manual reset
status - read to see the most recent reset result
The reset directory is cleaned up via debugfs_remove_recursive()
on the parent, so individual file dentries are not stored.
Tracepoints:
ceph_client_reset_schedule - reset queued
ceph_client_reset_complete - reset finished (success or failure)
ceph_client_reset_blocked - caller blocked waiting for reset
ceph_client_reset_unblocked - caller unblocked after reset
All tracepoints use a null-safe access for monc.auth->global_id
to guard against early-init or late-teardown edge cases.
Signed-off-by: Alex Markuze <amarkuze@redhat.com>
---
fs/ceph/debugfs.c | 103 ++++++++++++++++++++++++++++++++++++
fs/ceph/mds_client.c | 7 +++
fs/ceph/super.h | 1 +
include/trace/events/ceph.h | 67 +++++++++++++++++++++++
4 files changed, 178 insertions(+)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index e2463f93cf6b..18eb5da03411 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -9,6 +9,7 @@
#include <linux/seq_file.h>
#include <linux/math64.h>
#include <linux/ktime.h>
+#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/ceph/libceph.h>
@@ -392,6 +393,90 @@ static int status_show(struct seq_file *s, void *p)
return 0;
}
+static int reset_status_show(struct seq_file *s, void *p)
+{
+ struct ceph_fs_client *fsc = s->private;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_client_reset_state *st;
+ u64 trigger = 0, success = 0, failure = 0;
+ unsigned long last_start = 0, last_finish = 0;
+ int last_errno = 0;
+ enum ceph_client_reset_phase phase = CEPH_CLIENT_RESET_IDLE;
+ bool drain_timed_out = false;
+ int sessions_reset = 0;
+ int blocked_requests = 0;
+ char reason[CEPH_CLIENT_RESET_REASON_LEN];
+
+ if (!mdsc)
+ return 0;
+
+ st = &mdsc->reset_state;
+
+ spin_lock(&st->lock);
+ trigger = st->trigger_count;
+ success = st->success_count;
+ failure = st->failure_count;
+ last_start = st->last_start;
+ last_finish = st->last_finish;
+ last_errno = st->last_errno;
+ phase = st->phase;
+ drain_timed_out = st->drain_timed_out;
+ sessions_reset = st->sessions_reset;
+ strscpy(reason, st->last_reason, sizeof(reason));
+ spin_unlock(&st->lock);
+
+ blocked_requests = atomic_read(&st->blocked_requests);
+
+ seq_printf(s, "phase: %s\n", ceph_reset_phase_name(phase));
+ seq_printf(s, "trigger_count: %llu\n", trigger);
+ seq_printf(s, "success_count: %llu\n", success);
+ seq_printf(s, "failure_count: %llu\n", failure);
+ if (last_start)
+ seq_printf(s, "last_start_ms_ago: %u\n",
+ jiffies_to_msecs(jiffies - last_start));
+ else
+ seq_puts(s, "last_start_ms_ago: (never)\n");
+ if (last_finish)
+ seq_printf(s, "last_finish_ms_ago: %u\n",
+ jiffies_to_msecs(jiffies - last_finish));
+ else
+ seq_puts(s, "last_finish_ms_ago: (never)\n");
+ seq_printf(s, "last_errno: %d\n", last_errno);
+ seq_printf(s, "last_reason: %s\n",
+ reason[0] ? reason : "(none)");
+ seq_printf(s, "drain_timed_out: %s\n",
+ drain_timed_out ? "yes" : "no");
+ seq_printf(s, "sessions_reset: %d\n", sessions_reset);
+ seq_printf(s, "blocked_requests: %d\n", blocked_requests);
+
+ return 0;
+}
+
+static ssize_t reset_trigger_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct ceph_fs_client *fsc = file->private_data;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ char reason[CEPH_CLIENT_RESET_REASON_LEN];
+ size_t copy;
+ int ret;
+
+ if (!mdsc)
+ return -ENODEV;
+
+ copy = min_t(size_t, len, sizeof(reason) - 1);
+ if (copy && copy_from_user(reason, buf, copy))
+ return -EFAULT;
+ reason[copy] = '\0';
+ strim(reason);
+
+ ret = ceph_mdsc_schedule_reset(mdsc, reason);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
static int subvolume_metrics_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
@@ -450,6 +535,7 @@ DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(status);
+DEFINE_SHOW_ATTRIBUTE(reset_status);
DEFINE_SHOW_ATTRIBUTE(metrics_file);
DEFINE_SHOW_ATTRIBUTE(metrics_latency);
DEFINE_SHOW_ATTRIBUTE(metrics_size);
@@ -521,6 +607,13 @@ static int metric_features_show(struct seq_file *s, void *p)
DEFINE_SHOW_ATTRIBUTE(metric_features);
+static const struct file_operations ceph_reset_trigger_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = reset_trigger_write,
+ .llseek = noop_llseek,
+};
+
/*
* debugfs
*/
@@ -554,6 +647,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_caps);
debugfs_remove(fsc->debugfs_status);
debugfs_remove(fsc->debugfs_mdsc);
+ debugfs_remove_recursive(fsc->debugfs_reset_dir);
debugfs_remove(fsc->debugfs_subvolume_metrics);
debugfs_remove_recursive(fsc->debugfs_metrics_dir);
doutc(fsc->client, "done\n");
@@ -602,6 +696,15 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc,
&caps_fops);
+ fsc->debugfs_reset_dir = debugfs_create_dir("reset",
+ fsc->client->debugfs_dir);
+ debugfs_create_file("trigger", 0200,
+ fsc->debugfs_reset_dir, fsc,
+ &ceph_reset_trigger_fops);
+ debugfs_create_file("status", 0400,
+ fsc->debugfs_reset_dir, fsc,
+ &reset_status_fops);
+
fsc->debugfs_status = debugfs_create_file("status",
0400,
fsc->client->debugfs_dir,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ce773b1095da..b16638ebff7f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -5324,6 +5324,7 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
blocked_count = atomic_inc_return(&st->blocked_requests);
doutc(cl, "request blocked during reset, %d total blocked\n",
blocked_count);
+ trace_ceph_client_reset_blocked(mdsc, blocked_count);
retry:
remaining = max_t(long, deadline - jiffies, 1);
@@ -5334,10 +5335,12 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
if (wait_ret == 0) {
atomic_dec(&st->blocked_requests);
pr_warn_client(cl, "timed out waiting for reset to complete\n");
+ trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
return -ETIMEDOUT;
}
if (wait_ret < 0) {
atomic_dec(&st->blocked_requests);
+ trace_ceph_client_reset_unblocked(mdsc, (int)wait_ret);
return (int)wait_ret; /* -ERESTARTSYS */
}
@@ -5352,12 +5355,14 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
if (time_before(jiffies, deadline))
goto retry;
atomic_dec(&st->blocked_requests);
+ trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
return -ETIMEDOUT;
}
ret = st->last_errno;
spin_unlock(&st->lock);
atomic_dec(&st->blocked_requests);
+ trace_ceph_client_reset_unblocked(mdsc, ret);
return ret ? -EAGAIN : 0;
}
@@ -5387,6 +5392,7 @@ static void ceph_mdsc_reset_complete(struct ceph_mds_client *mdsc, int ret)
/* Wake up all requests that were blocked waiting for reset */
wake_up_all(&st->blocked_wq);
+ trace_ceph_client_reset_complete(mdsc, ret);
}
static void ceph_mdsc_reset_workfn(struct work_struct *work)
@@ -5749,6 +5755,7 @@ int ceph_mdsc_schedule_reset(struct ceph_mds_client *mdsc,
pr_info_client(mdsc->fsc->client,
"manual session reset scheduled (reason=\"%s\")\n",
msg);
+ trace_ceph_client_reset_schedule(mdsc, msg);
return 0;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a4993644d543..1d6aab060780 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -179,6 +179,7 @@ struct ceph_fs_client {
struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions;
struct dentry *debugfs_metrics_dir;
+ struct dentry *debugfs_reset_dir;
struct dentry *debugfs_subvolume_metrics;
#endif
diff --git a/include/trace/events/ceph.h b/include/trace/events/ceph.h
index 08cb0659fbfc..1b990632f62b 100644
--- a/include/trace/events/ceph.h
+++ b/include/trace/events/ceph.h
@@ -226,6 +226,73 @@ TRACE_EVENT(ceph_handle_caps,
__entry->mseq)
);
+/*
+ * Client reset tracepoints - identify the client by its monitor-
+ * assigned global_id so traces remain meaningful when kernel pointer
+ * hashing is enabled.
+ */
+TRACE_EVENT(ceph_client_reset_schedule,
+ TP_PROTO(const struct ceph_mds_client *mdsc, const char *reason),
+ TP_ARGS(mdsc, reason),
+ TP_STRUCT__entry(
+ __field(u64, client_id)
+ __string(reason, reason ? reason : "")
+ ),
+ TP_fast_assign(
+ __entry->client_id = mdsc->fsc->client->monc.auth ?
+ mdsc->fsc->client->monc.auth->global_id : 0;
+ __assign_str(reason);
+ ),
+ TP_printk("client_id=%llu reason=%s",
+ __entry->client_id, __get_str(reason))
+);
+
+TRACE_EVENT(ceph_client_reset_complete,
+ TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
+ TP_ARGS(mdsc, ret),
+ TP_STRUCT__entry(
+ __field(u64, client_id)
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ __entry->client_id = mdsc->fsc->client->monc.auth ?
+ mdsc->fsc->client->monc.auth->global_id : 0;
+ __entry->ret = ret;
+ ),
+ TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
+);
+
+TRACE_EVENT(ceph_client_reset_blocked,
+ TP_PROTO(const struct ceph_mds_client *mdsc, int blocked_count),
+ TP_ARGS(mdsc, blocked_count),
+ TP_STRUCT__entry(
+ __field(u64, client_id)
+ __field(int, blocked_count)
+ ),
+ TP_fast_assign(
+ __entry->client_id = mdsc->fsc->client->monc.auth ?
+ mdsc->fsc->client->monc.auth->global_id : 0;
+ __entry->blocked_count = blocked_count;
+ ),
+ TP_printk("client_id=%llu blocked_count=%d", __entry->client_id,
+ __entry->blocked_count)
+);
+
+TRACE_EVENT(ceph_client_reset_unblocked,
+ TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
+ TP_ARGS(mdsc, ret),
+ TP_STRUCT__entry(
+ __field(u64, client_id)
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ __entry->client_id = mdsc->fsc->client->monc.auth ?
+ mdsc->fsc->client->monc.auth->global_id : 0;
+ __entry->ret = ret;
+ ),
+ TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
+);
+
#undef EM
#undef E_
#endif /* _TRACE_CEPH_H */
--
2.34.1
next prev parent reply other threads:[~2026-05-07 12:27 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-07 12:27 [PATCH v4 00/11] ceph: manual client session reset Alex Markuze
2026-05-07 12:27 ` [PATCH v4 01/11] ceph: convert inode flags to named bit positions and atomic bitops Alex Markuze
2026-05-07 18:35 ` Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 02/11] ceph: use proper endian conversion for flock_len in reconnect Alex Markuze
2026-05-07 12:27 ` [PATCH v4 03/11] ceph: harden send_mds_reconnect and handle active-MDS peer reset Alex Markuze
2026-05-07 18:43 ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 04/11] ceph: add diagnostic timeout loop to wait_caps_flush() Alex Markuze
2026-05-07 19:01 ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 05/11] ceph: add client reset state machine and session teardown Alex Markuze
2026-05-07 19:17 ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` Alex Markuze [this message]
2026-05-07 19:22 ` [EXTERNAL] [PATCH v4 06/11] ceph: add manual reset debugfs control and tracepoints Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 07/11] selftests: ceph: add reset consistency checker Alex Markuze
2026-05-07 19:24 ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 08/11] selftests: ceph: add reset stress test Alex Markuze
2026-05-07 19:29 ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 09/11] selftests: ceph: add reset corner-case tests Alex Markuze
2026-05-07 19:31 ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 10/11] selftests: ceph: add validation harness Alex Markuze
2026-05-07 19:33 ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 11/11] selftests: ceph: wire up Ceph reset kselftests and documentation Alex Markuze
2026-05-07 19:38 ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 18:28 ` [EXTERNAL] [PATCH v4 00/11] ceph: manual client session reset Viacheslav Dubeyko
2026-05-08 17:49 ` Viacheslav Dubeyko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260507122737.2804094-7-amarkuze@redhat.com \
--to=amarkuze@redhat.com \
--cc=ceph-devel@vger.kernel.org \
--cc=idryomov@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=vdubeyko@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox