Re: xfstests 073 regression

From: Christoph Hellwig
Date: Mon Aug 01 2011 - 12:53:00 EST


wb_check_background_flush is indeed what we're hitting. See the trace
output using a patch inspired by Curt's below:

# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
<...>-4279 [000] 113.034052: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034052: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034052: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034052: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034053: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034053: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034053: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034054: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034054: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034054: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034055: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034055: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034055: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034056: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034056: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
<...>-4279 [000] 113.034056: writeback_grab_super_failed: bdi 7:0: sb_dev 0:0 nr_pages=9223372036854775807 sync_mode=0 kupdate=0 range_cyclic=1 background=1 reason=wb_check_background_flush
From: Christoph Hellwig <hch@xxxxxx>
Subject: writeback: trace the source of writeback works

Trace who queued up work to the flusher thread. Also add a new tracepoint to
the failed grab_super_passive case.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Index: xfs/fs/fs-writeback.c
===================================================================
--- xfs.orig/fs/fs-writeback.c 2011-08-01 17:59:15.318991514 +0200
+++ xfs/fs/fs-writeback.c 2011-08-01 18:31:33.221826327 +0200
@@ -41,7 +41,7 @@ struct wb_writeback_work {
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
-
+ enum writeback_reason why;
struct list_head list; /* pending work list */
struct completion *done; /* set if the caller waits */
};
@@ -115,7 +115,7 @@ static void bdi_queue_work(struct backin

static void
__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
- bool range_cyclic)
+ bool range_cyclic, enum writeback_reason why)
{
struct wb_writeback_work *work;

@@ -135,6 +135,7 @@ __bdi_start_writeback(struct backing_dev
work->sync_mode = WB_SYNC_NONE;
work->nr_pages = nr_pages;
work->range_cyclic = range_cyclic;
+ work->why = why;

bdi_queue_work(bdi, work);
}
@@ -152,7 +153,7 @@ __bdi_start_writeback(struct backing_dev
*/
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
{
- __bdi_start_writeback(bdi, nr_pages, true);
+ __bdi_start_writeback(bdi, nr_pages, true, WB_START_WRITEBACK);
}

/**
@@ -618,6 +619,7 @@ static long __writeback_inodes_wb(struct
struct super_block *sb = inode->i_sb;

if (!grab_super_passive(sb)) {
+ trace_writeback_grab_super_failed(wb->bdi, work);
redirty_tail(inode, wb);
continue;
}
@@ -636,12 +638,14 @@ static long __writeback_inodes_wb(struct
return wrote;
}

-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+ enum writeback_reason why)
{
struct wb_writeback_work work = {
.nr_pages = nr_pages,
.sync_mode = WB_SYNC_NONE,
.range_cyclic = 1,
+ .why = why,
};

spin_lock(&wb->list_lock);
@@ -813,6 +817,7 @@ static long wb_check_background_flush(st
.sync_mode = WB_SYNC_NONE,
.for_background = 1,
.range_cyclic = 1,
+ .why = WB_BACKGROUND,
};

return wb_writeback(wb, &work);
@@ -846,6 +851,7 @@ static long wb_check_old_data_flush(stru
.sync_mode = WB_SYNC_NONE,
.for_kupdate = 1,
.range_cyclic = 1,
+ .why = WB_OLD,
};

return wb_writeback(wb, &work);
@@ -977,7 +983,7 @@ void wakeup_flusher_threads(long nr_page
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
- __bdi_start_writeback(bdi, nr_pages, false);
+ __bdi_start_writeback(bdi, nr_pages, false, WB_FLUSHER_THREADS);
}
rcu_read_unlock();
}
@@ -1207,6 +1213,7 @@ void writeback_inodes_sb_nr(struct super
.tagged_writepages = 1,
.done = &done,
.nr_pages = nr,
+ .why = WB_WRITEBACK_INODES,
};

WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1285,6 +1292,7 @@ void sync_inodes_sb(struct super_block *
.nr_pages = LONG_MAX,
.range_cyclic = 0,
.done = &done,
+ .why = WB_SYNC_INODES,
};

WARN_ON(!rwsem_is_locked(&sb->s_umount));
Index: xfs/include/trace/events/writeback.h
===================================================================
--- xfs.orig/include/trace/events/writeback.h 2011-08-01 17:58:27.649249764 +0200
+++ xfs/include/trace/events/writeback.h 2011-08-01 18:31:26.501862733 +0200
@@ -21,6 +21,16 @@
{I_REFERENCED, "I_REFERENCED"} \
)

+#define WB_TYPES \
+ { WB_SYNC_INODES, "sync_inodes_sb" }, \
+ { WB_WRITEBACK_INODES, "writeback_inodes_sb" }, \
+ { WB_START_WRITEBACK, "bdi_start_writeback" }, \
+ { WB_FLUSHER_THREADS, "wakeup_flusher_threads" }, \
+ { WB_EMERGENCY, "emergency flush" }, \
+ { WB_BALANCE, "balance_dirty_pages" }, \
+ { WB_BACKGROUND, "wb_check_background_flush" }, \
+ { WB_OLD, "wb_check_old_data_flush" }
+
struct wb_writeback_work;

DECLARE_EVENT_CLASS(writeback_work_class,
@@ -34,6 +44,7 @@ DECLARE_EVENT_CLASS(writeback_work_class
__field(int, for_kupdate)
__field(int, range_cyclic)
__field(int, for_background)
+ __field(int, why)
),
TP_fast_assign(
strncpy(__entry->name, dev_name(bdi->dev), 32);
@@ -43,16 +54,18 @@ DECLARE_EVENT_CLASS(writeback_work_class
__entry->for_kupdate = work->for_kupdate;
__entry->range_cyclic = work->range_cyclic;
__entry->for_background = work->for_background;
+ __entry->why = work->why;
),
TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
- "kupdate=%d range_cyclic=%d background=%d",
+ "kupdate=%d range_cyclic=%d background=%d reason=%s",
__entry->name,
MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
__entry->nr_pages,
__entry->sync_mode,
__entry->for_kupdate,
__entry->range_cyclic,
- __entry->for_background
+ __entry->for_background,
+ __print_symbolic(__entry->why, WB_TYPES)
)
);
#define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -65,6 +78,7 @@ DEFINE_WRITEBACK_WORK_EVENT(writeback_ex
DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
DEFINE_WRITEBACK_WORK_EVENT(writeback_written);
DEFINE_WRITEBACK_WORK_EVENT(writeback_wait);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_grab_super_failed);

TRACE_EVENT(writeback_pages_written,
TP_PROTO(long pages_written),
Index: xfs/include/linux/writeback.h
===================================================================
--- xfs.orig/include/linux/writeback.h 2011-08-01 18:24:26.754136700 +0200
+++ xfs/include/linux/writeback.h 2011-08-01 18:32:37.158146620 +0200
@@ -79,13 +79,26 @@ struct writeback_control {
* fs/fs-writeback.c
*/
struct bdi_writeback;
+
+enum writeback_reason {
+ WB_SYNC_INODES,
+ WB_WRITEBACK_INODES,
+ WB_START_WRITEBACK,
+ WB_FLUSHER_THREADS,
+ WB_EMERGENCY,
+ WB_BALANCE,
+ WB_BACKGROUND,
+ WB_OLD,
+};
+
int inode_wait(void *);
void writeback_inodes_sb(struct super_block *);
void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
int writeback_inodes_sb_if_idle(struct super_block *);
int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
void sync_inodes_sb(struct super_block *);
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+ enum writeback_reason why);
long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
void wakeup_flusher_threads(long nr_pages);

Index: xfs/mm/backing-dev.c
===================================================================
--- xfs.orig/mm/backing-dev.c 2011-08-01 18:24:26.774136593 +0200
+++ xfs/mm/backing-dev.c 2011-08-01 18:25:09.910569569 +0200
@@ -456,7 +456,7 @@ static int bdi_forker_thread(void *ptr)
* the bdi from the thread. Hopefully 1024 is
* large enough for efficient IO.
*/
- writeback_inodes_wb(&bdi->wb, 1024);
+ writeback_inodes_wb(&bdi->wb, 1024, WB_EMERGENCY);
} else {
/*
* The spinlock makes sure we do not lose
Index: xfs/mm/page-writeback.c
===================================================================
--- xfs.orig/mm/page-writeback.c 2011-08-01 18:24:26.794136483 +0200
+++ xfs/mm/page-writeback.c 2011-08-01 18:27:43.019740106 +0200
@@ -738,7 +738,8 @@ static void balance_dirty_pages(struct a
trace_balance_dirty_start(bdi);
if (bdi_nr_reclaimable > task_bdi_thresh) {
pages_written += writeback_inodes_wb(&bdi->wb,
- write_chunk);
+ write_chunk,
+ WB_BALANCE);
trace_balance_dirty_written(bdi, pages_written);
if (pages_written >= write_chunk)
break; /* We've done our duty */