[PATCH 1/1] lib/list_debug.c: add object information in case of invalid object

From: Maninder Singh
Date: Mon Dec 30 2024 - 05:11:45 EST


As of now during link list corruption it prints about cluprit address
and its wrong value, but sometime it is not enough to catch the actual
issue point.

If it prints allocation and free path of that corrupted node,
it will be a lot easier to find and fix the issues.

Adding the same information when data mismatch is found in link list
debug data:

[ 14.243055] slab kmalloc-32 start ffff0000cda19320 data offset 32 pointer offset 8 size 32 allocated at add_to_list+0x28/0xb0
[ 14.245259] __kmalloc_cache_noprof+0x1c4/0x358
[ 14.245572] add_to_list+0x28/0xb0
...
[ 14.248632] do_el0_svc_compat+0x1c/0x34
[ 14.249018] el0_svc_compat+0x2c/0x80
[ 14.249244] Free path:
[ 14.249410] kfree+0x24c/0x2f0
[ 14.249724] do_force_corruption+0xbc/0x100
...
[ 14.252266] el0_svc_common.constprop.0+0x40/0xe0
[ 14.252540] do_el0_svc_compat+0x1c/0x34
[ 14.252763] el0_svc_compat+0x2c/0x80
[ 14.253071] ------------[ cut here ]------------
[ 14.253303] list_del corruption. next->prev should be ffff0000cda192a8, but was 6b6b6b6b6b6b6b6b. (next=ffff0000cda19348)
[ 14.254255] WARNING: CPU: 3 PID: 84 at lib/list_debug.c:65 __list_del_entry_valid_or_report+0x158/0x164

moved prototype of mem_dump_obj() to bug.h, as mm.h can not be included
in bug.h.

Signed-off-by: Maninder Singh <maninder1.s@xxxxxxxxxxx>
---
Comment: I am not sure about moving of prototype, we can make a new wrapper also,
so please suggest what is best option. because name mem_dump_obj does
not go with bug.h

fs/open.c | 2 +-
fs/super.c | 2 +-
include/linux/bug.h | 10 +++++++++-
include/linux/mm.h | 6 ------
lib/list_debug.c | 22 +++++++++++-----------
5 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 0a5d2f6061c6..932e5a6de63b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1529,7 +1529,7 @@ static int filp_flush(struct file *filp, fl_owner_t id)
{
int retval = 0;

- if (CHECK_DATA_CORRUPTION(file_count(filp) == 0,
+ if (CHECK_DATA_CORRUPTION(file_count(filp) == 0, filp,
"VFS: Close: file count is 0 (f_op=%ps)",
filp->f_op)) {
return 0;
diff --git a/fs/super.c b/fs/super.c
index c9c7223bc2a2..5a7db4a556e3 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -647,7 +647,7 @@ void generic_shutdown_super(struct super_block *sb)
*/
fscrypt_destroy_keyring(sb);

- if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes),
+ if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes), NULL,
"VFS: Busy inodes after unmount of %s (%s)",
sb->s_id, sb->s_type->name)) {
/*
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 348acf2558f3..a9948a9f1093 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -73,15 +73,23 @@ static inline void generic_bug_clear_once(void) {}

#endif /* CONFIG_GENERIC_BUG */

+#ifdef CONFIG_PRINTK
+void mem_dump_obj(void *object);
+#else
+static inline void mem_dump_obj(void *object) {}
+#endif
+
/*
* Since detected data corruption should stop operation on the affected
* structures. Return value must be checked and sanely acted on by caller.
*/
static inline __must_check bool check_data_corruption(bool v) { return v; }
-#define CHECK_DATA_CORRUPTION(condition, fmt, ...) \
+#define CHECK_DATA_CORRUPTION(condition, addr, fmt, ...) \
check_data_corruption(({ \
bool corruption = unlikely(condition); \
if (corruption) { \
+ if (addr) \
+ mem_dump_obj(addr); \
if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
pr_err(fmt, ##__VA_ARGS__); \
BUG(); \
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d61b9c7a3a7b..9cabab47a23e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4097,12 +4097,6 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,

extern int sysctl_nr_trim_pages;

-#ifdef CONFIG_PRINTK
-void mem_dump_obj(void *object);
-#else
-static inline void mem_dump_obj(void *object) {}
-#endif
-
#ifdef CONFIG_ANON_VMA_NAME
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in,
diff --git a/lib/list_debug.c b/lib/list_debug.c
index db602417febf..ee7eeeb8f92c 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -22,17 +22,17 @@ __list_valid_slowpath
bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
- if (CHECK_DATA_CORRUPTION(prev == NULL,
+ if (CHECK_DATA_CORRUPTION(prev == NULL, NULL,
"list_add corruption. prev is NULL.\n") ||
- CHECK_DATA_CORRUPTION(next == NULL,
+ CHECK_DATA_CORRUPTION(next == NULL, NULL,
"list_add corruption. next is NULL.\n") ||
- CHECK_DATA_CORRUPTION(next->prev != prev,
+ CHECK_DATA_CORRUPTION(next->prev != prev, next,
"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
prev, next->prev, next) ||
- CHECK_DATA_CORRUPTION(prev->next != next,
+ CHECK_DATA_CORRUPTION(prev->next != next, prev,
"list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
next, prev->next, prev) ||
- CHECK_DATA_CORRUPTION(new == prev || new == next,
+ CHECK_DATA_CORRUPTION(new == prev || new == next, NULL,
"list_add double add: new=%px, prev=%px, next=%px.\n",
new, prev, next))
return false;
@@ -49,20 +49,20 @@ bool __list_del_entry_valid_or_report(struct list_head *entry)
prev = entry->prev;
next = entry->next;

- if (CHECK_DATA_CORRUPTION(next == NULL,
+ if (CHECK_DATA_CORRUPTION(next == NULL, NULL,
"list_del corruption, %px->next is NULL\n", entry) ||
- CHECK_DATA_CORRUPTION(prev == NULL,
+ CHECK_DATA_CORRUPTION(prev == NULL, NULL,
"list_del corruption, %px->prev is NULL\n", entry) ||
- CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+ CHECK_DATA_CORRUPTION(next == LIST_POISON1, next,
"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
entry, LIST_POISON1) ||
- CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+ CHECK_DATA_CORRUPTION(prev == LIST_POISON2, prev,
"list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
entry, LIST_POISON2) ||
- CHECK_DATA_CORRUPTION(prev->next != entry,
+ CHECK_DATA_CORRUPTION(prev->next != entry, prev,
"list_del corruption. prev->next should be %px, but was %px. (prev=%px)\n",
entry, prev->next, prev) ||
- CHECK_DATA_CORRUPTION(next->prev != entry,
+ CHECK_DATA_CORRUPTION(next->prev != entry, next,
"list_del corruption. next->prev should be %px, but was %px. (next=%px)\n",
entry, next->prev, next))
return false;
--
2.25.1