Re: [PATCH v2] fuse: add a new "connections" file to show longest waiting reqeust

From: Joanne Koong
Date: Mon Feb 03 2025 - 16:53:14 EST


On Mon, Feb 3, 2025 at 11:57 AM Jeff Layton <jlayton@xxxxxxxxxx> wrote:
>
> Add a new file to the "connections" directory that shows how long (in
> seconds) the oldest fuse_req in the processing hash or pending queue has
> been waiting.
>
> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>

Reviewed-by: Joanne Koong <joannelkoong@xxxxxxxxx>
> ---
> This is based on top of Joanne's timeout patches, as it requires the
> "create_time" field in fuse_req. We have some internal detection of
> hung fuse server processes that relies on seeing elevated values in the
> "waiting" sysfs file. The problem with that method is that it can't
> detect when highly serialized workloads on a FUSE mount are hung. This
> adds another metric that we can use to detect this situation.
> ---
> Changes in v2:
> - use list_first_entry_or_null() when checking hash lists
> - take fiq->lock when checking pending list
> - ensure that if there are no waiting reqs, that the output will be 0
> - use time_before() to compare jiffies values
> - no need to hold fc->lock when walking pending queue
> - Link to v1: https://lore.kernel.org/r/20250203-fuse-sysfs-v1-1-36faa01f2338@xxxxxxxxxx
> ---
> fs/fuse/control.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/fuse/fuse_i.h | 2 +-
> 2 files changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/fs/fuse/control.c b/fs/fuse/control.c
> index 2a730d88cc3bdb50ea1f8a3185faad5f05fc6e74..b27f2120499826040af77d7662d2dad0e9f37ee6 100644
> --- a/fs/fuse/control.c
> +++ b/fs/fuse/control.c
> @@ -180,6 +180,57 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
> return ret;
> }
>
> +/* Show how long (in s) the oldest request has been waiting */
> +static ssize_t fuse_conn_oldest_read(struct file *file, char __user *buf,
> + size_t len, loff_t *ppos)
> +{
> + char tmp[32];
> + size_t size;
> + unsigned long now = jiffies;
> + unsigned long oldest = now;
> +
> + if (!*ppos) {
> + struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
> + struct fuse_iqueue *fiq = &fc->iq;
> + struct fuse_dev *fud;
> + struct fuse_req *req;
> +
> + if (!fc)
> + return 0;
> +
> + spin_lock(&fc->lock);
> + list_for_each_entry(fud, &fc->devices, entry) {
> + struct fuse_pqueue *fpq = &fud->pq;
> + int i;
> +
> + spin_lock(&fpq->lock);
> + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) {
> + /*
> + * Only check the first request in the queue. The
> + * assumption is that the one at the head of the list
> + * will always be the oldest.
> + */
> + req = list_first_entry_or_null(&fpq->processing[i],
> + struct fuse_req, list);
> + if (req && time_before(req->create_time, oldest))
> + oldest = req->create_time;
> + }
> + spin_unlock(&fpq->lock);
> + }
> + spin_unlock(&fc->lock);
> +
> + spin_lock(&fiq->lock);
> + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list);
> + if (req && time_before(req->create_time, oldest))
> + oldest = req->create_time;
> + spin_unlock(&fiq->lock);
> +
> + fuse_conn_put(fc);
> + }
> + size = sprintf(tmp, "%ld\n", (now - oldest)/HZ);
> + return simple_read_from_buffer(buf, len, ppos, tmp, size);
> +}
> +
> static const struct file_operations fuse_ctl_abort_ops = {
> .open = nonseekable_open,
> .write = fuse_conn_abort_write,
> @@ -202,6 +253,11 @@ static const struct file_operations fuse_conn_congestion_threshold_ops = {
> .write = fuse_conn_congestion_threshold_write,
> };
>
> +static const struct file_operations fuse_ctl_oldest_ops = {
> + .open = nonseekable_open,
> + .read = fuse_conn_oldest_read,
> +};
> +
> static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
> struct fuse_conn *fc,
> const char *name,
> @@ -264,6 +320,8 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
>
> if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1,
> NULL, &fuse_ctl_waiting_ops) ||
> + !fuse_ctl_add_dentry(parent, fc, "oldest", S_IFREG | 0400, 1,
> + NULL, &fuse_ctl_oldest_ops) ||
> !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1,
> NULL, &fuse_ctl_abort_ops) ||
> !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600,
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index dcc1c327a0574b1fd1adda4b7ca047aa353b6a0a..b46c26bc977ad2d75d10fb306d3ecc4caf2c53bd 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -42,7 +42,7 @@
> #define FUSE_NAME_MAX 1024
>
> /** Number of dentries for each connection in the control filesystem */
> -#define FUSE_CTL_NUM_DENTRIES 5
> +#define FUSE_CTL_NUM_DENTRIES 6
>
> /* Frequency (in seconds) of request timeout checks, if opted into */
> #define FUSE_TIMEOUT_TIMER_FREQ 15
>
> ---
> base-commit: 9afd7336f3acbe5678cca3b3bc5baefb51ce9564
> change-id: 20250203-fuse-sysfs-ce351d105cf0
>
> Best regards,
> --
> Jeff Layton <jlayton@xxxxxxxxxx>
>