[patch 45/46] fs: sync_sb_inodes fix

From: Greg KH
Date: Thu Jan 22 2009 - 20:29:27 EST


2.6.28-stable review patch. If anyone has any objections, please let us know.

------------------

From: Nick Piggin <npiggin@xxxxxxx>

commit 38f21977663126fef53f5585e7f1653d8ebe55c4 upstream.

Fix data integrity semantics required by sys_sync, by iterating over all
inodes and waiting for any writeback pages after the initial writeout.
Comments explain the exact problem.

Signed-off-by: Nick Piggin <npiggin@xxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>

---
fs/fs-writeback.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 53 insertions(+), 7 deletions(-)

--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -440,6 +440,7 @@ void generic_sync_sb_inodes(struct super
struct writeback_control *wbc)
{
const unsigned long start = jiffies; /* livelock avoidance */
+ int sync = wbc->sync_mode == WB_SYNC_ALL;

spin_lock(&inode_lock);
if (!wbc->for_kupdate || list_empty(&sb->s_io))
@@ -516,7 +517,49 @@ void generic_sync_sb_inodes(struct super
if (!list_empty(&sb->s_more_io))
wbc->more_io = 1;
}
- spin_unlock(&inode_lock);
+
+ if (sync) {
+ struct inode *inode, *old_inode = NULL;
+
+ /*
+ * Data integrity sync. Must wait for all pages under writeback,
+ * because there may have been pages dirtied before our sync
+ * call, but which had writeout started before we write it out.
+ * In which case, the inode may not be on the dirty list, but
+ * we still have to wait for that writeout.
+ */
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ struct address_space *mapping;
+
+ if (inode->i_state & (I_FREEING|I_WILL_FREE))
+ continue;
+ mapping = inode->i_mapping;
+ if (mapping->nrpages == 0)
+ continue;
+ __iget(inode);
+ spin_unlock(&inode_lock);
+ /*
+ * We hold a reference to 'inode' so it couldn't have
+ * been removed from s_inodes list while we dropped the
+ * inode_lock. We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it
+ * under inode_lock. So we keep the reference and iput
+ * it later.
+ */
+ iput(old_inode);
+ old_inode = inode;
+
+ filemap_fdatawait(mapping);
+
+ cond_resched();
+
+ spin_lock(&inode_lock);
+ }
+ spin_unlock(&inode_lock);
+ iput(old_inode);
+ } else
+ spin_unlock(&inode_lock);
+
return; /* Leave any unwritten inodes on s_io */
}
EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
@@ -596,13 +639,16 @@ void sync_inodes_sb(struct super_block *
.range_start = 0,
.range_end = LLONG_MAX,
};
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);

- wbc.nr_to_write = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused) +
- nr_dirty + nr_unstable;
- wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */
+ if (!wait) {
+ unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+ unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+
+ wbc.nr_to_write = nr_dirty + nr_unstable +
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+ } else
+ wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
+
sync_sb_inodes(sb, &wbc);
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/