[PATCH v2 6/7] ceph: add manual reset debugfs control and tracepoints

From: Alex Markuze

Date: Wed Apr 15 2026 - 13:05:41 EST


Add the debugfs and trace plumbing used to trigger and observe
manual client reset.

The reset interface exposes a trigger file for operator-initiated
reset and a status file for tracking the most recent run. The
tracepoints record scheduling, completion, and blocked caller
behavior so reset progress can be diagnosed from the client side.

debugfs layout under /sys/kernel/debug/ceph/<client>/reset/:
trigger - write to initiate a manual reset
status - read to see the most recent reset result

Tracepoints:
ceph_client_reset_schedule - reset queued
ceph_client_reset_complete - reset finished (success or failure)
ceph_client_reset_blocked - caller blocked waiting for reset
ceph_client_reset_unblocked - caller unblocked after reset

Signed-off-by: Alex Markuze <amarkuze@xxxxxxxxxx>
---
fs/ceph/debugfs.c | 104 ++++++++++++++++++++++++++++++++++++
fs/ceph/mds_client.c | 8 +++
fs/ceph/super.h | 3 ++
include/trace/events/ceph.h | 63 ++++++++++++++++++++++
4 files changed, 178 insertions(+)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 7dc307790240..d46d41ec7a86 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -9,6 +9,7 @@
#include <linux/seq_file.h>
#include <linux/math64.h>
#include <linux/ktime.h>
+#include <linux/uaccess.h>

#include <linux/ceph/libceph.h>
#include <linux/ceph/mon_client.h>
@@ -360,16 +361,107 @@ static int status_show(struct seq_file *s, void *p)
return 0;
}

+static int reset_status_show(struct seq_file *s, void *p)
+{
+ struct ceph_fs_client *fsc = s->private;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_client_reset_state *st;
+ u64 trigger = 0, success = 0, failure = 0;
+ unsigned long last_start = 0, last_finish = 0;
+ int last_errno = 0;
+ enum ceph_client_reset_phase phase = CEPH_CLIENT_RESET_IDLE;
+ bool drain_timed_out = false;
+ int sessions_reset = 0;
+ int blocked_requests = 0;
+ char reason[CEPH_CLIENT_RESET_REASON_LEN];
+
+ if (!mdsc)
+ return 0;
+
+ st = &mdsc->reset_state;
+
+ spin_lock(&st->lock);
+ trigger = st->trigger_count;
+ success = st->success_count;
+ failure = st->failure_count;
+ last_start = st->last_start;
+ last_finish = st->last_finish;
+ last_errno = st->last_errno;
+ phase = st->phase;
+ drain_timed_out = st->drain_timed_out;
+ sessions_reset = st->sessions_reset;
+ strscpy(reason, st->last_reason, sizeof(reason));
+ spin_unlock(&st->lock);
+
+ blocked_requests = atomic_read(&st->blocked_requests);
+
+ seq_printf(s, "phase: %s\n", ceph_reset_phase_name(phase));
+ seq_printf(s, "trigger_count: %llu\n", trigger);
+ seq_printf(s, "success_count: %llu\n", success);
+ seq_printf(s, "failure_count: %llu\n", failure);
+ if (last_start)
+ seq_printf(s, "last_start_ms_ago: %u\n",
+ jiffies_to_msecs(jiffies - last_start));
+ else
+ seq_puts(s, "last_start_ms_ago: (never)\n");
+ if (last_finish)
+ seq_printf(s, "last_finish_ms_ago: %u\n",
+ jiffies_to_msecs(jiffies - last_finish));
+ else
+ seq_puts(s, "last_finish_ms_ago: (never)\n");
+ seq_printf(s, "last_errno: %d\n", last_errno);
+ seq_printf(s, "last_reason: %s\n",
+ reason[0] ? reason : "(none)");
+ seq_printf(s, "drain_timed_out: %s\n",
+ drain_timed_out ? "yes" : "no");
+ seq_printf(s, "sessions_reset: %d\n", sessions_reset);
+ seq_printf(s, "blocked_requests: %d\n", blocked_requests);
+
+ return 0;
+}
+
+static ssize_t reset_trigger_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct ceph_fs_client *fsc = file->private_data;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ char reason[CEPH_CLIENT_RESET_REASON_LEN];
+ size_t copy;
+ int ret;
+
+ if (!mdsc)
+ return -ENODEV;
+
+ copy = min_t(size_t, len, sizeof(reason) - 1);
+ if (copy && copy_from_user(reason, buf, copy))
+ return -EFAULT;
+ reason[copy] = '\0';
+ strim(reason);
+
+ ret = ceph_mdsc_schedule_reset(mdsc, reason);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
DEFINE_SHOW_ATTRIBUTE(mdsmap);
DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(status);
+DEFINE_SHOW_ATTRIBUTE(reset_status);
DEFINE_SHOW_ATTRIBUTE(metrics_file);
DEFINE_SHOW_ATTRIBUTE(metrics_latency);
DEFINE_SHOW_ATTRIBUTE(metrics_size);
DEFINE_SHOW_ATTRIBUTE(metrics_caps);

+static const struct file_operations ceph_reset_trigger_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = reset_trigger_write,
+ .llseek = noop_llseek,
+};

/*
* debugfs
@@ -404,6 +496,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_caps);
debugfs_remove(fsc->debugfs_status);
debugfs_remove(fsc->debugfs_mdsc);
+ debugfs_remove_recursive(fsc->debugfs_reset_dir);
debugfs_remove_recursive(fsc->debugfs_metrics_dir);
doutc(fsc->client, "done\n");
}
@@ -451,6 +544,17 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc,
&caps_fops);

+ fsc->debugfs_reset_dir = debugfs_create_dir("reset",
+ fsc->client->debugfs_dir);
+ fsc->debugfs_reset_trigger =
+ debugfs_create_file("trigger", 0200,
+ fsc->debugfs_reset_dir, fsc,
+ &ceph_reset_trigger_fops);
+ fsc->debugfs_reset_status =
+ debugfs_create_file("status", 0400,
+ fsc->debugfs_reset_dir, fsc,
+ &reset_status_fops);
+
fsc->debugfs_status = debugfs_create_file("status",
0400,
fsc->client->debugfs_dir,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 7e399b0dcc55..98a882cf8b65 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -5213,6 +5213,7 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
blocked_count = atomic_inc_return(&st->blocked_requests);
doutc(cl, "request blocked during reset, %d total blocked\n",
blocked_count);
+ trace_ceph_client_reset_blocked(mdsc, blocked_count);

retry:
wait_ret = wait_event_interruptible_timeout(st->blocked_wq,
@@ -5223,10 +5224,12 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
if (wait_ret == 0) {
atomic_dec(&st->blocked_requests);
pr_warn_client(cl, "timed out waiting for reset to complete\n");
+ trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
return -ETIMEDOUT;
}
if (wait_ret < 0) {
atomic_dec(&st->blocked_requests);
+ trace_ceph_client_reset_unblocked(mdsc, (int)wait_ret);
return (int)wait_ret; /* -ERESTARTSYS */
}

@@ -5241,12 +5244,14 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
if (time_before(jiffies, deadline))
goto retry;
atomic_dec(&st->blocked_requests);
+ trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
return -ETIMEDOUT;
}
ret = st->last_errno;
spin_unlock(&st->lock);

atomic_dec(&st->blocked_requests);
+ trace_ceph_client_reset_unblocked(mdsc, ret);
return ret;
}

@@ -5275,6 +5280,8 @@ static void ceph_mdsc_reset_complete(struct ceph_mds_client *mdsc, int ret)

/* Wake up all requests that were blocked waiting for reset */
wake_up_all(&st->blocked_wq);
+
+ trace_ceph_client_reset_complete(mdsc, ret);
}

static void ceph_mdsc_reset_workfn(struct work_struct *work)
@@ -5559,6 +5566,7 @@ int ceph_mdsc_schedule_reset(struct ceph_mds_client *mdsc,
pr_info_client(mdsc->fsc->client,
"manual session reset scheduled (reason=\"%s\")\n",
msg);
+ trace_ceph_client_reset_schedule(mdsc, msg);
return 0;
}

diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1f901b1647e6..98af0a823c81 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -179,6 +179,9 @@ struct ceph_fs_client {
struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions;
struct dentry *debugfs_metrics_dir;
+ struct dentry *debugfs_reset_dir;
+ struct dentry *debugfs_reset_trigger;
+ struct dentry *debugfs_reset_status;
#endif

#ifdef CONFIG_CEPH_FSCACHE
diff --git a/include/trace/events/ceph.h b/include/trace/events/ceph.h
index 08cb0659fbfc..e853c891ef71 100644
--- a/include/trace/events/ceph.h
+++ b/include/trace/events/ceph.h
@@ -226,6 +226,69 @@ TRACE_EVENT(ceph_handle_caps,
__entry->mseq)
);

+/*
+ * Client reset tracepoints - identify the client by its monitor-
+ * assigned global_id so traces remain meaningful when kernel pointer
+ * hashing is enabled.
+ */
+TRACE_EVENT(ceph_client_reset_schedule,
+ TP_PROTO(const struct ceph_mds_client *mdsc, const char *reason),
+ TP_ARGS(mdsc, reason),
+ TP_STRUCT__entry(
+ __field(u64, client_id)
+ __string(reason, reason ? reason : "")
+ ),
+ TP_fast_assign(
+ __entry->client_id = mdsc->fsc->client->monc.auth->global_id;
+ __assign_str(reason);
+ ),
+ TP_printk("client_id=%llu reason=%s",
+ __entry->client_id, __get_str(reason))
+);
+
+TRACE_EVENT(ceph_client_reset_complete,
+ TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
+ TP_ARGS(mdsc, ret),
+ TP_STRUCT__entry(
+ __field(u64, client_id)
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ __entry->client_id = mdsc->fsc->client->monc.auth->global_id;
+ __entry->ret = ret;
+ ),
+ TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
+);
+
+TRACE_EVENT(ceph_client_reset_blocked,
+ TP_PROTO(const struct ceph_mds_client *mdsc, int blocked_count),
+ TP_ARGS(mdsc, blocked_count),
+ TP_STRUCT__entry(
+ __field(u64, client_id)
+ __field(int, blocked_count)
+ ),
+ TP_fast_assign(
+ __entry->client_id = mdsc->fsc->client->monc.auth->global_id;
+ __entry->blocked_count = blocked_count;
+ ),
+ TP_printk("client_id=%llu blocked_count=%d", __entry->client_id,
+ __entry->blocked_count)
+);
+
+TRACE_EVENT(ceph_client_reset_unblocked,
+ TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
+ TP_ARGS(mdsc, ret),
+ TP_STRUCT__entry(
+ __field(u64, client_id)
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ __entry->client_id = mdsc->fsc->client->monc.auth->global_id;
+ __entry->ret = ret;
+ ),
+ TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
+);
+
#undef EM
#undef E_
#endif /* _TRACE_CEPH_H */
--
2.34.1