[PATCH v1 7/9] ext4: stop using VFS for dirty superblock management

From: Artem Bityutskiy
Date: Tue Mar 20 2012 - 10:42:44 EST


From: Artem Bityutskiy <artem.bityutskiy@xxxxxxxxxxxxxxx>

Do not use VFS for managing dirty superblock but do it inside ext4. Remove the
'ext4_write_super()' VFS callback and instead, schedule a delayed work when the
superblock is marked as dirty. Use the 'dio_unwritten_wq' which we already have
for these purposes.

We add memory barriers to make sure the 's_dirt' flag changes are visible to
other CPUs as soon as possible to avoid queuing unnecessary works.

The big changes comparing to the previous behavior:
1. We use 'dio_unwritten' queue, so the superblock will be written by
per-filesystem 'ext4-dio-unwrit' thread, instead of 'sync_supers' thread.
2. We allocate memory in 'ext4_mark_super_dirty()'.

Note: the final goal is to get rid of the 'sync_supers()' kernel thread which
wakes up every 5 seconds and even if there is nothing to do. Thus, we are
pushing superblock management from VFS down to file-systems.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@xxxxxxxxxxxxxxx>
---
fs/ext4/super.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d9543f3..7d453f7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -73,7 +73,6 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
-static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data);
@@ -1294,7 +1293,6 @@ static const struct super_operations ext4_nojournal_sops = {
.dirty_inode = ext4_dirty_inode,
.drop_inode = ext4_drop_inode,
.evict_inode = ext4_evict_inode,
- .write_super = ext4_write_super,
.put_super = ext4_put_super,
.statfs = ext4_statfs,
.remount_fs = ext4_remount,
@@ -4140,6 +4138,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)

if (!sbh || block_device_ejected(sb))
return error;
+
+ sb->s_dirt = 0;
+ /*
+ * Make sure we first mark the superblock as clean and then start
+ * writing it out.
+ */
+ smp_wmb();
+
if (buffer_write_io_error(sbh)) {
/*
* Oh, dear. A previous attempt to write the
@@ -4180,7 +4186,6 @@ static int ext4_commit_super(struct super_block *sb, int sync)
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
- sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
mark_buffer_dirty(sbh);
if (sync) {
@@ -4199,9 +4204,64 @@ static int ext4_commit_super(struct super_block *sb, int sync)
return error;
}

+struct sb_delayed_work {
+ struct delayed_work dwork;
+ struct super_block *sb;
+};
+
+static struct sb_delayed_work *work_to_sbwork(struct work_struct *work)
+{
+ struct delayed_work *dwork;
+
+ dwork = container_of(work, struct delayed_work, work);
+ return container_of(dwork, struct sb_delayed_work, dwork);
+}
+
+static void write_super(struct work_struct *work)
+{
+ struct sb_delayed_work *sbwork = work_to_sbwork(work);
+ struct super_block *sb = sbwork->sb;
+
+ kfree(sbwork);
+
+ smp_rmb();
+ if (!sb->s_dirt)
+ return;
+
+ lock_super(sb);
+ ext4_commit_super(sb, 1);
+ unlock_super(sb);
+}
+
void __ext4_mark_super_dirty(struct super_block *sb)
{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct sb_delayed_work *sbwork;
+ unsigned long delay;
+
+ /* Make sure we see 's_dirt' changes ASAP */
+ smp_rmb();
+ if (sb->s_dirt == 1)
+ return;
sb->s_dirt = 1;
+ /* Make other CPUs see the 's_dirt' change as soon as possible */
+ smp_wmb();
+
+ sbwork = kmalloc(sizeof(struct sb_delayed_work), GFP_NOFS);
+ if (!sbwork) {
+ /*
+ * Well, should not be a big deal - the system must be in
+ * trouble anyway, and the SB will be written out on unmount or
+ * we may be luckier next time it is marked as dirty.
+ */
+ sb->s_dirt = 2;
+ return;
+ }
+
+ INIT_DELAYED_WORK(&sbwork->dwork, write_super);
+ sbwork->sb = sb;
+ delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+ queue_delayed_work(sbi->dio_unwritten_wq, &sbwork->dwork, delay);
}

/*
@@ -4291,13 +4351,6 @@ int ext4_force_commit(struct super_block *sb)
return ret;
}

-static void ext4_write_super(struct super_block *sb)
-{
- lock_super(sb);
- ext4_commit_super(sb, 1);
- unlock_super(sb);
-}
-
static int ext4_sync_fs(struct super_block *sb, int wait)
{
int ret = 0;
--
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/