Re: [EXTERNAL] [PATCH v2 6/7] ceph: add manual reset debugfs control and tracepoints
From: Viacheslav Dubeyko
Date: Fri Apr 17 2026 - 14:42:43 EST
On Wed, 2026-04-15 at 17:00 +0000, Alex Markuze wrote:
> Add the debugfs and trace plumbing used to trigger and observe
> manual client reset.
>
> The reset interface exposes a trigger file for operator-initiated
> reset and a status file for tracking the most recent run. The
> tracepoints record scheduling, completion, and blocked caller
> behavior so reset progress can be diagnosed from the client side.
>
> debugfs layout under /sys/kernel/debug/ceph/<client>/reset/:
> trigger - write to initiate a manual reset
> status - read to see the most recent reset result
>
> Tracepoints:
> ceph_client_reset_schedule - reset queued
> ceph_client_reset_complete - reset finished (success or failure)
> ceph_client_reset_blocked - caller blocked waiting for reset
> ceph_client_reset_unblocked - caller unblocked after reset
>
> Signed-off-by: Alex Markuze <amarkuze@xxxxxxxxxx>
> ---
> fs/ceph/debugfs.c | 104 ++++++++++++++++++++++++++++++++++++
> fs/ceph/mds_client.c | 8 +++
> fs/ceph/super.h | 3 ++
> include/trace/events/ceph.h | 63 ++++++++++++++++++++++
> 4 files changed, 178 insertions(+)
>
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index 7dc307790240..d46d41ec7a86 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -9,6 +9,7 @@
> #include <linux/seq_file.h>
> #include <linux/math64.h>
> #include <linux/ktime.h>
> +#include <linux/uaccess.h>
>
> #include <linux/ceph/libceph.h>
> #include <linux/ceph/mon_client.h>
> @@ -360,16 +361,107 @@ static int status_show(struct seq_file *s, void *p)
> return 0;
> }
>
> +static int reset_status_show(struct seq_file *s, void *p)
> +{
> + struct ceph_fs_client *fsc = s->private;
> + struct ceph_mds_client *mdsc = fsc->mdsc;
> + struct ceph_client_reset_state *st;
> + u64 trigger = 0, success = 0, failure = 0;
> + unsigned long last_start = 0, last_finish = 0;
> + int last_errno = 0;
> + enum ceph_client_reset_phase phase = CEPH_CLIENT_RESET_IDLE;
> + bool drain_timed_out = false;
> + int sessions_reset = 0;
> + int blocked_requests = 0;
> + char reason[CEPH_CLIENT_RESET_REASON_LEN];
> +
> + if (!mdsc)
> + return 0;
> +
> + st = &mdsc->reset_state;
> +
> + spin_lock(&st->lock);
> + trigger = st->trigger_count;
> + success = st->success_count;
> + failure = st->failure_count;
> + last_start = st->last_start;
> + last_finish = st->last_finish;
> + last_errno = st->last_errno;
> + phase = st->phase;
> + drain_timed_out = st->drain_timed_out;
> + sessions_reset = st->sessions_reset;
> + strscpy(reason, st->last_reason, sizeof(reason));
> + spin_unlock(&st->lock);
> +
> + blocked_requests = atomic_read(&st->blocked_requests);
> +
> + seq_printf(s, "phase: %s\n", ceph_reset_phase_name(phase));
> + seq_printf(s, "trigger_count: %llu\n", trigger);
> + seq_printf(s, "success_count: %llu\n", success);
> + seq_printf(s, "failure_count: %llu\n", failure);
> + if (last_start)
> + seq_printf(s, "last_start_ms_ago: %u\n",
> + jiffies_to_msecs(jiffies - last_start));
> + else
> + seq_puts(s, "last_start_ms_ago: (never)\n");
> + if (last_finish)
> + seq_printf(s, "last_finish_ms_ago: %u\n",
> + jiffies_to_msecs(jiffies - last_finish));
> + else
> + seq_puts(s, "last_finish_ms_ago: (never)\n");
> + seq_printf(s, "last_errno: %d\n", last_errno);
> + seq_printf(s, "last_reason: %s\n",
> + reason[0] ? reason : "(none)");
> + seq_printf(s, "drain_timed_out: %s\n",
> + drain_timed_out ? "yes" : "no");
> + seq_printf(s, "sessions_reset: %d\n", sessions_reset);
> + seq_printf(s, "blocked_requests: %d\n", blocked_requests);
> +
> + return 0;
> +}
> +
> +static ssize_t reset_trigger_write(struct file *file, const char __user *buf,
> + size_t len, loff_t *ppos)
> +{
> + struct ceph_fs_client *fsc = file->private_data;
> + struct ceph_mds_client *mdsc = fsc->mdsc;
> + char reason[CEPH_CLIENT_RESET_REASON_LEN];
> + size_t copy;
> + int ret;
> +
> + if (!mdsc)
> + return -ENODEV;
> +
> + copy = min_t(size_t, len, sizeof(reason) - 1);
> + if (copy && copy_from_user(reason, buf, copy))
> + return -EFAULT;
> + reason[copy] = '\0';
> + strim(reason);
> +
> + ret = ceph_mdsc_schedule_reset(mdsc, reason);
> + if (ret)
> + return ret;
> +
> + return len;
> +}
> +
> DEFINE_SHOW_ATTRIBUTE(mdsmap);
> DEFINE_SHOW_ATTRIBUTE(mdsc);
> DEFINE_SHOW_ATTRIBUTE(caps);
> DEFINE_SHOW_ATTRIBUTE(mds_sessions);
> DEFINE_SHOW_ATTRIBUTE(status);
> +DEFINE_SHOW_ATTRIBUTE(reset_status);
> DEFINE_SHOW_ATTRIBUTE(metrics_file);
> DEFINE_SHOW_ATTRIBUTE(metrics_latency);
> DEFINE_SHOW_ATTRIBUTE(metrics_size);
> DEFINE_SHOW_ATTRIBUTE(metrics_caps);
>
> +static const struct file_operations ceph_reset_trigger_fops = {
> + .owner = THIS_MODULE,
> + .open = simple_open,
> + .write = reset_trigger_write,
> + .llseek = noop_llseek,
> +};
>
> /*
> * debugfs
> @@ -404,6 +496,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
> debugfs_remove(fsc->debugfs_caps);
> debugfs_remove(fsc->debugfs_status);
> debugfs_remove(fsc->debugfs_mdsc);
> + debugfs_remove_recursive(fsc->debugfs_reset_dir);
> debugfs_remove_recursive(fsc->debugfs_metrics_dir);
> doutc(fsc->client, "done\n");
> }
> @@ -451,6 +544,17 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
> fsc,
> &caps_fops);
>
> + fsc->debugfs_reset_dir = debugfs_create_dir("reset",
> + fsc->client->debugfs_dir);
> + fsc->debugfs_reset_trigger =
> + debugfs_create_file("trigger", 0200,
> + fsc->debugfs_reset_dir, fsc,
> + &ceph_reset_trigger_fops);
> + fsc->debugfs_reset_status =
> + debugfs_create_file("status", 0400,
> + fsc->debugfs_reset_dir, fsc,
> + &reset_status_fops);
> +
> fsc->debugfs_status = debugfs_create_file("status",
> 0400,
> fsc->client->debugfs_dir,
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 7e399b0dcc55..98a882cf8b65 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -5213,6 +5213,7 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
> blocked_count = atomic_inc_return(&st->blocked_requests);
> doutc(cl, "request blocked during reset, %d total blocked\n",
> blocked_count);
> + trace_ceph_client_reset_blocked(mdsc, blocked_count);
>
> retry:
> wait_ret = wait_event_interruptible_timeout(st->blocked_wq,
> @@ -5223,10 +5224,12 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
> if (wait_ret == 0) {
> atomic_dec(&st->blocked_requests);
> pr_warn_client(cl, "timed out waiting for reset to complete\n");
> + trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
> return -ETIMEDOUT;
> }
> if (wait_ret < 0) {
> atomic_dec(&st->blocked_requests);
> + trace_ceph_client_reset_unblocked(mdsc, (int)wait_ret);
> return (int)wait_ret; /* -ERESTARTSYS */
> }
>
> @@ -5241,12 +5244,14 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
> if (time_before(jiffies, deadline))
> goto retry;
> atomic_dec(&st->blocked_requests);
> + trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
> return -ETIMEDOUT;
> }
> ret = st->last_errno;
> spin_unlock(&st->lock);
>
> atomic_dec(&st->blocked_requests);
> + trace_ceph_client_reset_unblocked(mdsc, ret);
> return ret;
> }
>
> @@ -5275,6 +5280,8 @@ static void ceph_mdsc_reset_complete(struct ceph_mds_client *mdsc, int ret)
>
> /* Wake up all requests that were blocked waiting for reset */
> wake_up_all(&st->blocked_wq);
> +
> + trace_ceph_client_reset_complete(mdsc, ret);
> }
>
> static void ceph_mdsc_reset_workfn(struct work_struct *work)
> @@ -5559,6 +5566,7 @@ int ceph_mdsc_schedule_reset(struct ceph_mds_client *mdsc,
> pr_info_client(mdsc->fsc->client,
> "manual session reset scheduled (reason=\"%s\")\n",
> msg);
> + trace_ceph_client_reset_schedule(mdsc, msg);
> return 0;
> }
>
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 1f901b1647e6..98af0a823c81 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -179,6 +179,9 @@ struct ceph_fs_client {
> struct dentry *debugfs_status;
> struct dentry *debugfs_mds_sessions;
> struct dentry *debugfs_metrics_dir;
> + struct dentry *debugfs_reset_dir;
> + struct dentry *debugfs_reset_trigger;
> + struct dentry *debugfs_reset_status;
The debugfs_reset_trigger and debugfs_reset_status initialized in
ceph_fs_debugfs_init() but never used in any methods. Do we really need these
fields?
> #endif
>
> #ifdef CONFIG_CEPH_FSCACHE
> diff --git a/include/trace/events/ceph.h b/include/trace/events/ceph.h
> index 08cb0659fbfc..e853c891ef71 100644
> --- a/include/trace/events/ceph.h
> +++ b/include/trace/events/ceph.h
> @@ -226,6 +226,69 @@ TRACE_EVENT(ceph_handle_caps,
> __entry->mseq)
> );
>
> +/*
> + * Client reset tracepoints - identify the client by its monitor-
> + * assigned global_id so traces remain meaningful when kernel pointer
> + * hashing is enabled.
> + */
> +TRACE_EVENT(ceph_client_reset_schedule,
> + TP_PROTO(const struct ceph_mds_client *mdsc, const char *reason),
> + TP_ARGS(mdsc, reason),
> + TP_STRUCT__entry(
> + __field(u64, client_id)
> + __string(reason, reason ? reason : "")
> + ),
> + TP_fast_assign(
> + __entry->client_id = mdsc->fsc->client->monc.auth->global_id;
All four tracepoints dereference mdsc->fsc->client->monc.auth->global_id. Could
monc.auth be NULL during early client init or late teardown?
Thanks,
Slava.
> + __assign_str(reason);
> + ),
> + TP_printk("client_id=%llu reason=%s",
> + __entry->client_id, __get_str(reason))
> +);
> +
> +TRACE_EVENT(ceph_client_reset_complete,
> + TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
> + TP_ARGS(mdsc, ret),
> + TP_STRUCT__entry(
> + __field(u64, client_id)
> + __field(int, ret)
> + ),
> + TP_fast_assign(
> + __entry->client_id = mdsc->fsc->client->monc.auth->global_id;
> + __entry->ret = ret;
> + ),
> + TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
> +);
> +
> +TRACE_EVENT(ceph_client_reset_blocked,
> + TP_PROTO(const struct ceph_mds_client *mdsc, int blocked_count),
> + TP_ARGS(mdsc, blocked_count),
> + TP_STRUCT__entry(
> + __field(u64, client_id)
> + __field(int, blocked_count)
> + ),
> + TP_fast_assign(
> + __entry->client_id = mdsc->fsc->client->monc.auth->global_id;
> + __entry->blocked_count = blocked_count;
> + ),
> + TP_printk("client_id=%llu blocked_count=%d", __entry->client_id,
> + __entry->blocked_count)
> +);
> +
> +TRACE_EVENT(ceph_client_reset_unblocked,
> + TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
> + TP_ARGS(mdsc, ret),
> + TP_STRUCT__entry(
> + __field(u64, client_id)
> + __field(int, ret)
> + ),
> + TP_fast_assign(
> + __entry->client_id = mdsc->fsc->client->monc.auth->global_id;
> + __entry->ret = ret;
> + ),
> + TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
> +);
> +
> #undef EM
> #undef E_
> #endif /* _TRACE_CEPH_H */