Re: [PATCH v2] fs/resctrl: Fix deadlock for errors during mount

From: Reinette Chatre

Date: Wed May 06 2026 - 18:17:19 EST

Hi Tony,

On 5/4/26 3:01 PM, Tony Luck wrote:
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index 5dfdaa6f9d8f..6709b74bd655 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -2855,10 +2855,6 @@ static int rdt_get_tree(struct fs_context *fc)
> if (ret)
> goto out_mondata;
>
> - ret = kernfs_get_tree(fc);
> - if (ret < 0)
> - goto out_psl;
> -
> if (resctrl_arch_alloc_capable())
> resctrl_arch_enable_alloc();
> if (resctrl_arch_mon_capable())
> @@ -2874,10 +2870,26 @@ static int rdt_get_tree(struct fs_context *fc)
> RESCTRL_PICK_ANY_CPU);
> }
>
> - goto out;
> + /* Release locks because kernfs_get_tree() may call rdt_kill_sb() */

I neglected to add a rdt_last_cmd_clear() to this path now used for success and failure.
It is needed on success path to ensure that the last_cmd_status file does not show stale
information.

> + mutex_unlock(&rdtgroup_mutex);
> + cpus_read_unlock();
> + ret = kernfs_get_tree(fc);
> + if (!ret || ctx->kfc.new_sb_created) {
> + /* mount succeeded, or failed and already cleaned up */
> + return ret;
> + }
> + cpus_read_lock();
> + mutex_lock(&rdtgroup_mutex);
> +
> + if (resctrl_arch_alloc_capable())
> + resctrl_arch_disable_alloc();
> + if (resctrl_arch_mon_capable())
> + resctrl_arch_disable_mon();
> +
> + resctrl_mounted = false;

I find symmetrical code significantly easier to reason about while the above introduces
asymmetrical code in two ways:
- kernfs_get_tree() failures have identical resctrl state but the resctrl
cleanup is done differently (rdt_kill_sb() or rdt_get_tree() error path) based on the
specific failure that has nothing to do with resctrl. I think it will be simpler if
resctrl state is cleaned up consistently irrespective of how kernfs_get_tree() fails.
- Above change creates inconsistency in how kernfs_get_tree() is called without
locks but its "partner" function kernfs_kill_sb() is called with both
CPU hotplug lock and rdtgroup_mutex held. Here too I believe it will be
simpler to just keep locking consistent when interacting with these
kernfs calls.

>
> -out_psl:
> rdt_pseudo_lock_release();
> +
> out_mondata:
> if (resctrl_arch_mon_capable())
> kernfs_remove(kn_mondata);

While comparing rdt_kill_sb() and this error exit path to make sure they do the same
I seem to have stumbled on a bug where out_mondata is missing mon_put_kn_priv(). Do you
agree?

Putting all comments together, how about something like below?

diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 5dfdaa6f9d8f..6d0d6ab34985 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -73,6 +73,11 @@ static char last_cmd_status_buf[512];
static int rdtgroup_setup_root(struct rdt_fs_context *ctx);

static void rdtgroup_destroy_root(void);
+/*
+ * Temporary forward declaration for testing only. Move functions instead.
+ */
+static void resctrl_unmount(void);
+static void mon_put_kn_priv(void);

struct dentry *debugfs_resctrl;

@@ -2855,10 +2860,6 @@ static int rdt_get_tree(struct fs_context *fc)
if (ret)
goto out_mondata;

- ret = kernfs_get_tree(fc);
- if (ret < 0)
- goto out_psl;
-
if (resctrl_arch_alloc_capable())
resctrl_arch_enable_alloc();
if (resctrl_arch_mon_capable())
@@ -2874,13 +2875,24 @@ static int rdt_get_tree(struct fs_context *fc)
RESCTRL_PICK_ANY_CPU);
}

- goto out;
+ rdt_last_cmd_clear();
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ ret = kernfs_get_tree(fc);
+ /*
+ * resctrl can only be mounted once, new superblock only expected
+ * to be created once.
+ */
+ if (!ctx->kfc.new_sb_created)
+ resctrl_unmount();
+ return ret;

-out_psl:
- rdt_pseudo_lock_release();
out_mondata:
- if (resctrl_arch_mon_capable())
+ if (resctrl_arch_mon_capable()) {
kernfs_remove(kn_mondata);
+ mon_put_kn_priv(); /* separate fix */
+ }
out_mongrp:
if (resctrl_arch_mon_capable()) {
rdtgroup_unassign_cntrs(&rdtgroup_default);
@@ -2896,7 +2908,6 @@ static int rdt_get_tree(struct fs_context *fc)
out_root:
rdtgroup_destroy_root();
out:
- rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
return ret;
@@ -3169,7 +3180,7 @@ static void resctrl_fs_teardown(void)
rdtgroup_destroy_root();
}

-static void rdt_kill_sb(struct super_block *sb)
+static void resctrl_unmount(void)
{
struct rdt_resource *r;

@@ -3188,11 +3199,17 @@ static void rdt_kill_sb(struct super_block *sb)
if (resctrl_arch_mon_capable())
resctrl_arch_disable_mon();
resctrl_mounted = false;
- kernfs_kill_sb(sb);
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
}

+
+static void rdt_kill_sb(struct super_block *sb)
+{
+ resctrl_unmount();
+ kernfs_kill_sb(sb);
+}
+
static struct file_system_type rdt_fs_type = {
.name = "resctrl",
.init_fs_context = rdt_init_fs_context,

Reinette