[RFC] ext3/jbd, kernel 2.6.13, make ext3 mountable as ext2 when journal is empty.
From: Jan Willem van den Brand
Date: Wed Jul 09 2008 - 11:24:39 EST
This patch makes ext3 mountable as ext2 even in case of a power
failure while mounted as ext3. We have tested it for kernel 2.6.13 but
it should be fairly easy to get it to work for other versions.
When mounting an ext3 file system as ext2 (without journalling) an
incompatibility flag is checked to assure that the journal can be
safely ignored. This INCOMPAT_RECOVER flag is set when ext3 is
unmounted. The idea is that, at this point, all checkpointing data is
transferred to disk.
In case of a power failure, the INCOMPAT flag is not reset. Systems
that suffer from frequent power failure (e.g. SD-cards that are
unsafely removed) will often not be mountable as ext2.
I think that ext3 can be mounted as ext2 when there is no
checkpointing data in the journal (no data being written from journal
to disk). The journal is then skipped by both e2fsck and ext3
mounting. To make aforementioned systems more frequently mountable as
ext2 we reset the INCOMPAT flag when we are sure that there is no
checkpointing data in the journal. We set it again as soon as there
is.
Furthermore, we observed that there is alsmost always checkpointing
data in the journal. Therefore, we flush the journal on every file
sync (journal flushing flushes checkppointing data) and we perform a
file sync after every file close.
Obviously, this solution will result in poor performance when many
small files are frequently closed after write but that is not the case
in our system (TomTom navigation device).
I'd like to hear opinions about this solution.
Best regards,
Jan Willem van den Brand
==== linux-s3c24xx.org/fs/ext3/fsync.c#1 - linux-s3c24xx/fs/ext3/fsync.c ====
--- linux-s3c24xx.org/fs/ext3/fsync.c 2008-06-30 11:28:51.000000000 +0200
+++ linux-s3c24xx/fs/ext3/fsync.c 2008-06-30 11:28:27.000000000 +0200
@@ -29,6 +29,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
/*
* akpm: A new design for ext3_sync_file().
@@ -45,6 +46,7 @@
int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) {
struct inode *inode = dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
int ret = 0;
J_ASSERT(ext3_journal_current_handle() == 0); @@ -84,5 +86,10 @@
ret = sync_inode(inode, &wbc);
}
out:
+ /* After an fsync, we empty the journal (checkpoint all
+ * T_FINISHED transactions to disk. */
+ journal_lock_updates(EXT3_SB(sb)->s_journal);
+ journal_flush(EXT3_SB(sb)->s_journal);
+ journal_unlock_updates(EXT3_SB(sb)->s_journal);
return ret;
}
==== linux-s3c24xx.org/fs/jbd/journal.c#1 - /linux-s3c24xx/fs/jbd/journal.c ====
--- linux-s3c24xx.org/fs/jbd/journal.c 2008-06-30 11:28:51.000000000 +0200
+++ linux-s3c24xx/fs/jbd/journal.c 2008-06-30 11:26:16.000000000 +0200
@@ -22,10 +22,13 @@
* journaling (ext2 can use a reserved inode for storing the log).
*/
+
+
#include <linux/module.h>
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
@@ -37,6 +40,23 @@
#include <asm/page.h>
#include <linux/proc_fs.h>
+static void ext3_commit_super (struct super_block * sb,
+ struct ext3_super_block * es,
+ int sync)
+{
+ struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
+
+ if (!sbh)
+ return;
+ es->s_wtime = cpu_to_le32(get_seconds());
+ es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
+ es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
+ BUFFER_TRACE(sbh, "marking dirty");
+ mark_buffer_dirty(sbh);
+ if (sync)
+ sync_dirty_buffer(sbh);
+}
+
EXPORT_SYMBOL(journal_start);
EXPORT_SYMBOL(journal_restart);
EXPORT_SYMBOL(journal_extend);
@@ -938,6 +958,8 @@
{
journal_superblock_t *sb = journal->j_superblock;
struct buffer_head *bh = journal->j_sb_buffer;
+ struct super_block * sb_p;
+ int s_start_was_zero = 0;
/*
* As a special case, if the on-disk copy is already marked as needing @@ -959,8 +981,23 @@
jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+ if (sb->s_start==0)
+ s_start_was_zero = 1;
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(journal->j_tail);
+ /* If s_start gets a non-zero value here, we reset the
+ * imcompatibility flags */
+ if (s_start_was_zero && sb->s_start!=0)
+ {
+ sb_p = (struct super_block *)(journal->j_private);
+ EXT3_SET_INCOMPAT_FEATURE(sb_p, EXT3_FEATURE_INCOMPAT_RECOVER);
+ /* flush super_block to disk. */
+ ext3_commit_super(sb_p, EXT3_SB(sb_p)->s_es, 1);
+ }
sb->s_errno = cpu_to_be32(journal->j_errno);
spin_unlock(&journal->j_state_lock);
@@ -1342,6 +1379,7 @@
int err = 0;
transaction_t *transaction = NULL;
unsigned long old_tail;
+ struct super_block * sb_p;
spin_lock(&journal->j_state_lock);
@@ -1390,6 +1428,17 @@
J_ASSERT(!journal->j_checkpoint_transactions);
J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
+
+ /* The journal is empty, we can update the compatibility flags
+ * in ext3's superblock and flush it to disk */
+ sb_p = (struct super_block *)(journal->j_private);
+ EXT3_CLEAR_INCOMPAT_FEATURE(sb_p, EXT3_FEATURE_INCOMPAT_RECOVER);
+ ext3_commit_super(sb_p, EXT3_SB(sb_p)->s_es, 1);
spin_unlock(&journal->j_state_lock);
return err;
}