The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Alex Markuze <amarkuze@redhat.com>
To: ceph-devel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, idryomov@gmail.com,
	vdubeyko@redhat.com, Alex Markuze <amarkuze@redhat.com>
Subject: [PATCH v4 06/11] ceph: add manual reset debugfs control and tracepoints
Date: Thu,  7 May 2026 12:27:32 +0000	[thread overview]
Message-ID: <20260507122737.2804094-7-amarkuze@redhat.com> (raw)
In-Reply-To: <20260507122737.2804094-1-amarkuze@redhat.com>

Add the debugfs and trace plumbing used to trigger and observe
manual client reset.

The reset interface exposes a trigger file for operator-initiated
reset and a status file for tracking the most recent run.  The
tracepoints record scheduling, completion, and blocked caller
behavior so reset progress can be diagnosed from the client side.

debugfs layout under /sys/kernel/debug/ceph/<client>/reset/:
  trigger - write to initiate a manual reset
  status  - read to see the most recent reset result

The reset directory is cleaned up via debugfs_remove_recursive()
on the parent, so individual file dentries are not stored.

Tracepoints:
  ceph_client_reset_schedule  - reset queued
  ceph_client_reset_complete  - reset finished (success or failure)
  ceph_client_reset_blocked   - caller blocked waiting for reset
  ceph_client_reset_unblocked - caller unblocked after reset

All tracepoints use a null-safe access for monc.auth->global_id
to guard against early-init or late-teardown edge cases.

Signed-off-by: Alex Markuze <amarkuze@redhat.com>
---
 fs/ceph/debugfs.c           | 103 ++++++++++++++++++++++++++++++++++++
 fs/ceph/mds_client.c        |   7 +++
 fs/ceph/super.h             |   1 +
 include/trace/events/ceph.h |  67 +++++++++++++++++++++++
 4 files changed, 178 insertions(+)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index e2463f93cf6b..18eb5da03411 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -9,6 +9,7 @@
 #include <linux/seq_file.h>
 #include <linux/math64.h>
 #include <linux/ktime.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 
 #include <linux/ceph/libceph.h>
@@ -392,6 +393,90 @@ static int status_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+static int reset_status_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_client_reset_state *st;
+	u64 trigger = 0, success = 0, failure = 0;
+	unsigned long last_start = 0, last_finish = 0;
+	int last_errno = 0;
+	enum ceph_client_reset_phase phase = CEPH_CLIENT_RESET_IDLE;
+	bool drain_timed_out = false;
+	int sessions_reset = 0;
+	int blocked_requests = 0;
+	char reason[CEPH_CLIENT_RESET_REASON_LEN];
+
+	if (!mdsc)
+		return 0;
+
+	st = &mdsc->reset_state;
+
+	spin_lock(&st->lock);
+	trigger = st->trigger_count;
+	success = st->success_count;
+	failure = st->failure_count;
+	last_start = st->last_start;
+	last_finish = st->last_finish;
+	last_errno = st->last_errno;
+	phase = st->phase;
+	drain_timed_out = st->drain_timed_out;
+	sessions_reset = st->sessions_reset;
+	strscpy(reason, st->last_reason, sizeof(reason));
+	spin_unlock(&st->lock);
+
+	blocked_requests = atomic_read(&st->blocked_requests);
+
+	seq_printf(s, "phase: %s\n", ceph_reset_phase_name(phase));
+	seq_printf(s, "trigger_count: %llu\n", trigger);
+	seq_printf(s, "success_count: %llu\n", success);
+	seq_printf(s, "failure_count: %llu\n", failure);
+	if (last_start)
+		seq_printf(s, "last_start_ms_ago: %u\n",
+			   jiffies_to_msecs(jiffies - last_start));
+	else
+		seq_puts(s, "last_start_ms_ago: (never)\n");
+	if (last_finish)
+		seq_printf(s, "last_finish_ms_ago: %u\n",
+			   jiffies_to_msecs(jiffies - last_finish));
+	else
+		seq_puts(s, "last_finish_ms_ago: (never)\n");
+	seq_printf(s, "last_errno: %d\n", last_errno);
+	seq_printf(s, "last_reason: %s\n",
+		   reason[0] ? reason : "(none)");
+	seq_printf(s, "drain_timed_out: %s\n",
+		   drain_timed_out ? "yes" : "no");
+	seq_printf(s, "sessions_reset: %d\n", sessions_reset);
+	seq_printf(s, "blocked_requests: %d\n", blocked_requests);
+
+	return 0;
+}
+
+static ssize_t reset_trigger_write(struct file *file, const char __user *buf,
+				   size_t len, loff_t *ppos)
+{
+	struct ceph_fs_client *fsc = file->private_data;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	char reason[CEPH_CLIENT_RESET_REASON_LEN];
+	size_t copy;
+	int ret;
+
+	if (!mdsc)
+		return -ENODEV;
+
+	copy = min_t(size_t, len, sizeof(reason) - 1);
+	if (copy && copy_from_user(reason, buf, copy))
+		return -EFAULT;
+	reason[copy] = '\0';
+	strim(reason);
+
+	ret = ceph_mdsc_schedule_reset(mdsc, reason);
+	if (ret)
+		return ret;
+
+	return len;
+}
+
 static int subvolume_metrics_show(struct seq_file *s, void *p)
 {
 	struct ceph_fs_client *fsc = s->private;
@@ -450,6 +535,7 @@ DEFINE_SHOW_ATTRIBUTE(mdsc);
 DEFINE_SHOW_ATTRIBUTE(caps);
 DEFINE_SHOW_ATTRIBUTE(mds_sessions);
 DEFINE_SHOW_ATTRIBUTE(status);
+DEFINE_SHOW_ATTRIBUTE(reset_status);
 DEFINE_SHOW_ATTRIBUTE(metrics_file);
 DEFINE_SHOW_ATTRIBUTE(metrics_latency);
 DEFINE_SHOW_ATTRIBUTE(metrics_size);
@@ -521,6 +607,13 @@ static int metric_features_show(struct seq_file *s, void *p)
 
 DEFINE_SHOW_ATTRIBUTE(metric_features);
 
+static const struct file_operations ceph_reset_trigger_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = reset_trigger_write,
+	.llseek = noop_llseek,
+};
+
 /*
  * debugfs
  */
@@ -554,6 +647,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 	debugfs_remove(fsc->debugfs_caps);
 	debugfs_remove(fsc->debugfs_status);
 	debugfs_remove(fsc->debugfs_mdsc);
+	debugfs_remove_recursive(fsc->debugfs_reset_dir);
 	debugfs_remove(fsc->debugfs_subvolume_metrics);
 	debugfs_remove_recursive(fsc->debugfs_metrics_dir);
 	doutc(fsc->client, "done\n");
@@ -602,6 +696,15 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 						fsc,
 						&caps_fops);
 
+	fsc->debugfs_reset_dir = debugfs_create_dir("reset",
+						    fsc->client->debugfs_dir);
+	debugfs_create_file("trigger", 0200,
+			    fsc->debugfs_reset_dir, fsc,
+			    &ceph_reset_trigger_fops);
+	debugfs_create_file("status", 0400,
+			    fsc->debugfs_reset_dir, fsc,
+			    &reset_status_fops);
+
 	fsc->debugfs_status = debugfs_create_file("status",
 						  0400,
 						  fsc->client->debugfs_dir,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ce773b1095da..b16638ebff7f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -5324,6 +5324,7 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
 	blocked_count = atomic_inc_return(&st->blocked_requests);
 	doutc(cl, "request blocked during reset, %d total blocked\n",
 	      blocked_count);
+	trace_ceph_client_reset_blocked(mdsc, blocked_count);
 
 retry:
 	remaining = max_t(long, deadline - jiffies, 1);
@@ -5334,10 +5335,12 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
 	if (wait_ret == 0) {
 		atomic_dec(&st->blocked_requests);
 		pr_warn_client(cl, "timed out waiting for reset to complete\n");
+		trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
 		return -ETIMEDOUT;
 	}
 	if (wait_ret < 0) {
 		atomic_dec(&st->blocked_requests);
+		trace_ceph_client_reset_unblocked(mdsc, (int)wait_ret);
 		return (int)wait_ret;  /* -ERESTARTSYS */
 	}
 
@@ -5352,12 +5355,14 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
 		if (time_before(jiffies, deadline))
 			goto retry;
 		atomic_dec(&st->blocked_requests);
+		trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
 		return -ETIMEDOUT;
 	}
 	ret = st->last_errno;
 	spin_unlock(&st->lock);
 
 	atomic_dec(&st->blocked_requests);
+	trace_ceph_client_reset_unblocked(mdsc, ret);
 	return ret ? -EAGAIN : 0;
 }
 
@@ -5387,6 +5392,7 @@ static void ceph_mdsc_reset_complete(struct ceph_mds_client *mdsc, int ret)
 	/* Wake up all requests that were blocked waiting for reset */
 	wake_up_all(&st->blocked_wq);
 
+	trace_ceph_client_reset_complete(mdsc, ret);
 }
 
 static void ceph_mdsc_reset_workfn(struct work_struct *work)
@@ -5749,6 +5755,7 @@ int ceph_mdsc_schedule_reset(struct ceph_mds_client *mdsc,
 	pr_info_client(mdsc->fsc->client,
 		       "manual session reset scheduled (reason=\"%s\")\n",
 		       msg);
+	trace_ceph_client_reset_schedule(mdsc, msg);
 	return 0;
 }
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a4993644d543..1d6aab060780 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -179,6 +179,7 @@ struct ceph_fs_client {
 	struct dentry *debugfs_status;
 	struct dentry *debugfs_mds_sessions;
 	struct dentry *debugfs_metrics_dir;
+	struct dentry *debugfs_reset_dir;
 	struct dentry *debugfs_subvolume_metrics;
 #endif
 
diff --git a/include/trace/events/ceph.h b/include/trace/events/ceph.h
index 08cb0659fbfc..1b990632f62b 100644
--- a/include/trace/events/ceph.h
+++ b/include/trace/events/ceph.h
@@ -226,6 +226,73 @@ TRACE_EVENT(ceph_handle_caps,
 		  __entry->mseq)
 );
 
+/*
+ * Client reset tracepoints - identify the client by its monitor-
+ * assigned global_id so traces remain meaningful when kernel pointer
+ * hashing is enabled.
+ */
+TRACE_EVENT(ceph_client_reset_schedule,
+	TP_PROTO(const struct ceph_mds_client *mdsc, const char *reason),
+	TP_ARGS(mdsc, reason),
+	TP_STRUCT__entry(
+		__field(u64, client_id)
+		__string(reason, reason ? reason : "")
+	),
+	TP_fast_assign(
+		__entry->client_id = mdsc->fsc->client->monc.auth ?
+			mdsc->fsc->client->monc.auth->global_id : 0;
+		__assign_str(reason);
+	),
+	TP_printk("client_id=%llu reason=%s",
+		  __entry->client_id, __get_str(reason))
+);
+
+TRACE_EVENT(ceph_client_reset_complete,
+	TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
+	TP_ARGS(mdsc, ret),
+	TP_STRUCT__entry(
+		__field(u64, client_id)
+		__field(int, ret)
+	),
+	TP_fast_assign(
+		__entry->client_id = mdsc->fsc->client->monc.auth ?
+			mdsc->fsc->client->monc.auth->global_id : 0;
+		__entry->ret = ret;
+	),
+	TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
+);
+
+TRACE_EVENT(ceph_client_reset_blocked,
+	TP_PROTO(const struct ceph_mds_client *mdsc, int blocked_count),
+	TP_ARGS(mdsc, blocked_count),
+	TP_STRUCT__entry(
+		__field(u64, client_id)
+		__field(int, blocked_count)
+	),
+	TP_fast_assign(
+		__entry->client_id = mdsc->fsc->client->monc.auth ?
+			mdsc->fsc->client->monc.auth->global_id : 0;
+		__entry->blocked_count = blocked_count;
+	),
+	TP_printk("client_id=%llu blocked_count=%d", __entry->client_id,
+		  __entry->blocked_count)
+);
+
+TRACE_EVENT(ceph_client_reset_unblocked,
+	TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
+	TP_ARGS(mdsc, ret),
+	TP_STRUCT__entry(
+		__field(u64, client_id)
+		__field(int, ret)
+	),
+	TP_fast_assign(
+		__entry->client_id = mdsc->fsc->client->monc.auth ?
+			mdsc->fsc->client->monc.auth->global_id : 0;
+		__entry->ret = ret;
+	),
+	TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
+);
+
 #undef EM
 #undef E_
 #endif /* _TRACE_CEPH_H */
-- 
2.34.1


  parent reply	other threads:[~2026-05-07 12:27 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-07 12:27 [PATCH v4 00/11] ceph: manual client session reset Alex Markuze
2026-05-07 12:27 ` [PATCH v4 01/11] ceph: convert inode flags to named bit positions and atomic bitops Alex Markuze
2026-05-07 18:35   ` Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 02/11] ceph: use proper endian conversion for flock_len in reconnect Alex Markuze
2026-05-07 12:27 ` [PATCH v4 03/11] ceph: harden send_mds_reconnect and handle active-MDS peer reset Alex Markuze
2026-05-07 18:43   ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 04/11] ceph: add diagnostic timeout loop to wait_caps_flush() Alex Markuze
2026-05-07 19:01   ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 05/11] ceph: add client reset state machine and session teardown Alex Markuze
2026-05-07 19:17   ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` Alex Markuze [this message]
2026-05-07 19:22   ` [EXTERNAL] [PATCH v4 06/11] ceph: add manual reset debugfs control and tracepoints Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 07/11] selftests: ceph: add reset consistency checker Alex Markuze
2026-05-07 19:24   ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 08/11] selftests: ceph: add reset stress test Alex Markuze
2026-05-07 19:29   ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 09/11] selftests: ceph: add reset corner-case tests Alex Markuze
2026-05-07 19:31   ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 10/11] selftests: ceph: add validation harness Alex Markuze
2026-05-07 19:33   ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 12:27 ` [PATCH v4 11/11] selftests: ceph: wire up Ceph reset kselftests and documentation Alex Markuze
2026-05-07 19:38   ` [EXTERNAL] " Viacheslav Dubeyko
2026-05-07 18:28 ` [EXTERNAL] [PATCH v4 00/11] ceph: manual client session reset Viacheslav Dubeyko
2026-05-08 17:49   ` Viacheslav Dubeyko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260507122737.2804094-7-amarkuze@redhat.com \
    --to=amarkuze@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=vdubeyko@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox