[PATCH] fs/buffer.c: add debug print for __getblk_gfp() stall problem

From: Tetsuo Handa
Date: Fri Jul 20 2018 - 06:29:06 EST


Among syzbot's unresolved hung task reports, 18 out of 65 reports contain
__getblk_gfp() line in the backtrace. Since there is a comment block that
says that __getblk_gfp() will lock up the machine if try_to_free_buffers()
attempt from grow_dev_page() is failing, let's start from checking whether
syzbot is hitting that case. This change will be removed after the bug is
fixed.

Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Cc: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
---
fs/buffer.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
include/linux/sched.h | 7 +++++++
lib/Kconfig.debug | 6 ++++++
3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index be31e28..ebf78ab 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -955,10 +955,20 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
end_block = init_page_buffers(page, bdev,
(sector_t)index << sizebits,
size);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x01;
+#endif
goto done;
}
- if (!try_to_free_buffers(page))
+ if (!try_to_free_buffers(page)) {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x02;
+#endif
goto failed;
+ }
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x04;
+#endif
}

/*
@@ -978,6 +988,9 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
spin_unlock(&inode->i_mapping->private_lock);
done:
ret = (block < end_block) ? 1 : -ENXIO;
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x08;
+#endif
failed:
unlock_page(page);
put_page(page);
@@ -1033,6 +1046,12 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
return NULL;
}

+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_stamp = jiffies;
+ current->getblk_executed = 0;
+ current->getblk_bh_count = 0;
+ current->getblk_bh_state = 0;
+#endif
for (;;) {
struct buffer_head *bh;
int ret;
@@ -1044,6 +1063,18 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
ret = grow_buffers(bdev, block, size, gfp);
if (ret < 0)
return NULL;
+
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ if (!time_after(jiffies, current->getblk_stamp + 3 * HZ))
+ continue;
+ printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx\n",
+ current->comm, current->pid, current->getblk_executed,
+ current->getblk_bh_count, current->getblk_bh_state);
+ current->getblk_executed = 0;
+ current->getblk_bh_count = 0;
+ current->getblk_bh_state = 0;
+ current->getblk_stamp = jiffies;
+#endif
}
}

@@ -3216,6 +3247,11 @@ int sync_dirty_buffer(struct buffer_head *bh)
*/
static inline int buffer_busy(struct buffer_head *bh)
{
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x80;
+ current->getblk_bh_count = atomic_read(&bh->b_count);
+ current->getblk_bh_state = bh->b_state;
+#endif
return atomic_read(&bh->b_count) |
(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
}
@@ -3254,11 +3290,18 @@ int try_to_free_buffers(struct page *page)
int ret = 0;

BUG_ON(!PageLocked(page));
- if (PageWriteback(page))
+ if (PageWriteback(page)) {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x10;
+#endif
return 0;
+ }

if (mapping == NULL) { /* can this still happen? */
ret = drop_buffers(page, &buffers_to_free);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x20;
+#endif
goto out;
}

@@ -3282,6 +3325,9 @@ int try_to_free_buffers(struct page *page)
if (ret)
cancel_dirty_page(page);
spin_unlock(&mapping->private_lock);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ current->getblk_executed |= 0x40;
+#endif
out:
if (buffers_to_free) {
struct buffer_head *bh = buffers_to_free;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c4e0aba..95b143e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1207,6 +1207,13 @@ struct task_struct {
unsigned long prev_lowest_stack;
#endif

+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+ unsigned long getblk_stamp;
+ unsigned int getblk_executed;
+ unsigned int getblk_bh_count;
+ unsigned long getblk_bh_state;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c731ff9..0747ce7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2058,3 +2058,9 @@ config IO_STRICT_DEVMEM
if the driver using a given range cannot be disabled.

If in doubt, say Y.
+
+config DEBUG_AID_FOR_SYZBOT
+ bool "Additional debug code for syzbot"
+ default n
+ help
+ This option is intended for testing by syzbot.
--
1.8.3.1