Re: [PATCH 1/1] hung_task: show the blocker task if the task is hung on semaphore

From: Lance Yang
Date: Thu Mar 06 2025 - 01:06:06 EST


Hi Masami,

Could you give me a quick feedback before sending out the v2?

It seems unnecessary to make 'blocker' a union. I replaced 'struct mutex
*blocker_mutex' with 'unsigned long blocker', as only one blocker is active
at a time, IIUC ;)

The blocker filed can store both the lock addrees and the lock type, with
LSB used to encode the type as you suggested, making it easier to extend
the feature to cover other types of locks.

Also, once the lock type is determined, we can directly extract the address
and cast it to a lock pointer ;)

---
include/linux/hung_task.h | 82 +++++++++++++++++++++++++++++++++++++++
include/linux/sched.h | 2 +-
kernel/hung_task.c | 15 ++++---
kernel/locking/mutex.c | 8 +++-
4 files changed, 99 insertions(+), 8 deletions(-)
create mode 100644 include/linux/hung_task.h

diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h
new file mode 100644
index 000000000000..569d2e579f36
--- /dev/null
+++ b/include/linux/hung_task.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Detect Hung Task: detecting tasks stuck in D state
+ *
+ * Copyright 2025 Lance Yang <ioworker0@xxxxxxxxx>
+ */
+#ifndef __LINUX_HUNG_TASK_H
+#define __LINUX_HUNG_TASK_H
+
+#include <linux/bug.h>
+#include <linux/sched.h>
+#include <linux/compiler.h>
+
+/*
+ * @blocker: Combines lock address and blocking type.
+ *
+ * Since lock pointers are at least 4-byte aligned(32-bit) or 8-byte
+ * aligned(64-bit). This leaves the 2 least bits (LSBs) of the pointer
+ * always zero. So we can use these bits to encode the specific blocking
+ * type.
+ *
+ * Type encoding:
+ * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX)
+ * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM)
+ * 10 - Blocked on rw-mutex (BLOCKER_TYPE_RWMUTEX)
+ * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM)
+ */
+#define BLOCKER_TYPE_MUTEX 0x00UL
+#define BLOCKER_TYPE_SEM 0x01UL
+#define BLOCKER_TYPE_RWMUTEX 0x02UL
+#define BLOCKER_TYPE_RWSEM 0x03UL
+
+#define BLOCKER_TYPE_MASK 0x03UL
+
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+static inline void hung_task_set_blocker(void *lock, unsigned long type)
+{
+ unsigned long lock_ptr = (unsigned long)lock;
+
+ WARN_ON_ONCE(!lock_ptr);
+ WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK);
+ WARN_ON_ONCE(READ_ONCE(current->blocker));
+
+ WRITE_ONCE(current->blocker, lock_ptr | type);
+}
+
+static inline void hung_task_clear_blocker(void)
+{
+ WARN_ON_ONCE(!READ_ONCE(current->blocker));
+
+ WRITE_ONCE(current->blocker, 0UL);
+}
+
+static inline bool hung_task_blocker_is_type(unsigned long blocker,
+ unsigned long type)
+{
+ WARN_ON_ONCE(!blocker);
+
+ return (blocker & BLOCKER_TYPE_MASK) == type;
+}
+
+static inline void *hung_task_blocker_to_lock(unsigned long blocker)
+{
+ WARN_ON_ONCE(!blocker);
+
+ return (void *)(blocker & ~BLOCKER_TYPE_MASK);
+}
+#else
+#define hung_task_set_blocker(lock, type) do {} while (0)
+#define hung_task_clear_blocker() do {} while (0)
+static inline bool hung_task_blocker_is_type(unsigned long blocker,
+ unsigned long type)
+{
+ return false;
+}
+static inline void *hung_task_blocker_to_lock(unsigned long blocker)
+{
+ return NULL;
+}
+#endif
+
+#endif /* __LINUX_HUNG_TASK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1419d94c8e87..f27060dac499 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1218,7 +1218,7 @@ struct task_struct {
#endif

#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
- struct mutex *blocker_mutex;
+ unsigned long blocker;
#endif

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index ccd7217fcec1..f7fa832261c8 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -25,6 +25,10 @@

#include <trace/events/sched.h>

+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+#include <linux/hung_task.h>
+#endif
+
/*
* The number of tasks checked:
*/
@@ -98,16 +102,17 @@ static struct notifier_block panic_block = {
static void debug_show_blocker(struct task_struct *task)
{
struct task_struct *g, *t;
- unsigned long owner;
- struct mutex *lock;
+ unsigned long owner, blocker;

RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held");

- lock = READ_ONCE(task->blocker_mutex);
- if (!lock)
+ blocker = READ_ONCE(task->blocker);
+ if (!blocker || !hung_task_blocker_is_type(blocker, BLOCKER_TYPE_MUTEX))
return;

- owner = mutex_get_owner(lock);
+ owner = mutex_get_owner(
+ (struct mutex *)hung_task_blocker_to_lock(blocker));
+
if (unlikely(!owner)) {
pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
task->comm, task->pid);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 6a543c204a14..642d6398e0dd 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -42,6 +42,10 @@
# define MUTEX_WARN_ON(cond)
#endif

+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+#include <linux/hung_task.h>
+#endif
+
void
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
{
@@ -189,7 +193,7 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct list_head *list)
{
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
- WRITE_ONCE(current->blocker_mutex, lock);
+ hung_task_set_blocker(lock, BLOCKER_TYPE_MUTEX);
#endif
debug_mutex_add_waiter(lock, waiter, current);

@@ -207,7 +211,7 @@ __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)

debug_mutex_remove_waiter(lock, waiter, current);
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
- WRITE_ONCE(current->blocker_mutex, NULL);
+ hung_task_clear_blocker();
#endif
}

--

Thanks,
Lance