[PATCH 2/2] jbd: Refine commit writeout logic

From: Jan Kara
Date: Mon Feb 21 2011 - 11:25:37 EST


Currently we write out all journal buffers in WRITE_SYNC mode. This improves
performance for fsync heavy workloads but hinders performance when writes
are mostly asynchronous. So add possibility for callers starting a transaction
commit to specify whether they are going to wait for the commit and submit
journal writes in WRITE_SYNC mode only in that case.

Signed-off-by: Jan Kara <jack@xxxxxxx>
---
fs/ext3/fsync.c | 2 +-
fs/ext3/super.c | 2 +-
fs/jbd/checkpoint.c | 2 +-
fs/jbd/commit.c | 4 ++--
fs/jbd/journal.c | 19 ++++++++++---------
fs/jbd/transaction.c | 9 ++++-----
include/linux/jbd.h | 21 ++++++++++++---------
7 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 09b13bb..5396dd6 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -81,7 +81,7 @@ int ext3_sync_file(struct file *file, int datasync)
if (test_opt(inode->i_sb, BARRIER) &&
!journal_trans_will_send_data_barrier(journal, commit_tid))
needs_barrier = 1;
- log_start_commit(journal, commit_tid);
+ log_start_commit(journal, commit_tid, true);
ret = log_wait_commit(journal, commit_tid);

/*
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8..58a4424 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2497,7 +2497,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
{
tid_t target;

- if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
+ if (journal_start_commit(EXT3_SB(sb)->s_journal, &target, true)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
}
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e4b87bc..26ca2c3 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -297,7 +297,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,

spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- log_start_commit(journal, tid);
+ log_start_commit(journal, tid, true);
log_wait_commit(journal, tid);
ret = 1;
} else if (!buffer_dirty(bh)) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 34a4861..ac188cb 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -294,7 +294,7 @@ void journal_commit_transaction(journal_t *journal)
int first_tag = 0;
int tag_flag;
int i;
- int write_op = WRITE_SYNC;
+ int write_op = WRITE;

/*
* First job: lock down the current transaction and wait for
@@ -332,7 +332,7 @@ void journal_commit_transaction(journal_t *journal)
* we unplug the device. We don't do explicit unplugging in here,
* instead we rely on sync_buffer() doing the unplug for us.
*/
- if (commit_transaction->t_synchronous_commit)
+ if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid))
write_op = WRITE_SYNC_PLUG;
spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) {
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index da1b5e4..5743b4c 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -434,7 +434,7 @@ int __log_space_left(journal_t *journal)
/*
* Called under j_state_lock. Returns true if a transaction commit was started.
*/
-int __log_start_commit(journal_t *journal, tid_t target)
+int __log_start_commit(journal_t *journal, tid_t target, bool will_wait)
{
/*
* Are we already doing a recent enough commit?
@@ -444,7 +444,8 @@ int __log_start_commit(journal_t *journal, tid_t target)
* We want a new commit: OK, mark the request and wakeup the
* commit thread. We do _not_ do the commit ourselves.
*/
-
+ if (will_wait && !tid_geq(journal->j_commit_waited, target))
+ journal->j_commit_waited = target;
journal->j_commit_request = target;
jbd_debug(1, "JBD: requesting commit %d/%d\n",
journal->j_commit_request,
@@ -455,12 +456,12 @@ int __log_start_commit(journal_t *journal, tid_t target)
return 0;
}

-int log_start_commit(journal_t *journal, tid_t tid)
+int log_start_commit(journal_t *journal, tid_t tid, bool will_wait)
{
int ret;

spin_lock(&journal->j_state_lock);
- ret = __log_start_commit(journal, tid);
+ ret = __log_start_commit(journal, tid, will_wait);
spin_unlock(&journal->j_state_lock);
return ret;
}
@@ -483,7 +484,7 @@ int journal_force_commit_nested(journal_t *journal)
spin_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction;
- __log_start_commit(journal, transaction->t_tid);
+ __log_start_commit(journal, transaction->t_tid, true);
} else if (journal->j_committing_transaction)
transaction = journal->j_committing_transaction;

@@ -503,7 +504,7 @@ int journal_force_commit_nested(journal_t *journal)
* if a transaction is going to be committed (or is currently already
* committing), and fills its tid in at *ptid
*/
-int journal_start_commit(journal_t *journal, tid_t *ptid)
+int journal_start_commit(journal_t *journal, tid_t *ptid, bool will_wait)
{
int ret = 0;

@@ -511,7 +512,7 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid;

- __log_start_commit(journal, tid);
+ __log_start_commit(journal, tid, will_wait);
/* There's a running transaction and we've just made sure
* it's commit has been scheduled. */
if (ptid)
@@ -1439,7 +1440,7 @@ int journal_flush(journal_t *journal)
/* Force everything buffered to the log... */
if (journal->j_running_transaction) {
transaction = journal->j_running_transaction;
- __log_start_commit(journal, transaction->t_tid);
+ __log_start_commit(journal, transaction->t_tid, true);
} else if (journal->j_committing_transaction)
transaction = journal->j_committing_transaction;

@@ -1573,7 +1574,7 @@ static void __journal_abort_hard(journal_t *journal)
journal->j_flags |= JFS_ABORT;
transaction = journal->j_running_transaction;
if (transaction)
- __log_start_commit(journal, transaction->t_tid);
+ __log_start_commit(journal, transaction->t_tid, false);
spin_unlock(&journal->j_state_lock);
}

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5b2e4c3..a12c0a3 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -178,7 +178,7 @@ repeat_locked:
spin_unlock(&transaction->t_handle_lock);
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
- __log_start_commit(journal, transaction->t_tid);
+ __log_start_commit(journal, transaction->t_tid, false);
spin_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
@@ -411,7 +411,7 @@ int journal_restart(handle_t *handle, int nblocks)
spin_unlock(&transaction->t_handle_lock);

jbd_debug(2, "restarting handle %p\n", handle);
- __log_start_commit(journal, transaction->t_tid);
+ __log_start_commit(journal, transaction->t_tid, false);
spin_unlock(&journal->j_state_lock);

lock_map_release(&handle->h_lockdep_map);
@@ -1431,8 +1431,6 @@ int journal_stop(handle_t *handle)
}
}

- if (handle->h_sync)
- transaction->t_synchronous_commit = 1;
current->journal_info = NULL;
spin_lock(&journal->j_state_lock);
spin_lock(&transaction->t_handle_lock);
@@ -1463,7 +1461,8 @@ int journal_stop(handle_t *handle)
jbd_debug(2, "transaction too old, requesting commit for "
"handle %p\n", handle);
/* This is non-blocking */
- __log_start_commit(journal, transaction->t_tid);
+ __log_start_commit(journal, transaction->t_tid,
+ handle->h_sync && !(current->flags & PF_MEMALLOC));
spin_unlock(&journal->j_state_lock);

/*
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index e069650..c38f73e 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -541,12 +541,6 @@ struct transaction_s
* How many handles used this transaction? [t_handle_lock]
*/
int t_handle_count;
-
- /*
- * This transaction is being forced and some process is
- * waiting for it to finish.
- */
- unsigned int t_synchronous_commit:1;
};

/**
@@ -594,6 +588,8 @@ struct transaction_s
* transaction
* @j_commit_request: Sequence number of the most recent transaction wanting
* commit
+ * @j_commit_waited: Sequence number of the most recent transaction someone
+ * is waiting for to commit.
* @j_uuid: Uuid of client object.
* @j_task: Pointer to the current commit thread for this journal
* @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a
@@ -762,6 +758,13 @@ struct journal_s
tid_t j_commit_request;

/*
+ * Sequence number of the most recent transaction someone is waiting
+ * for to commit.
+ * [j_state_lock]
+ */
+ tid_t j_commit_waited;
+
+ /*
* Journal uuid: identifies the object (filesystem, LVM volume etc)
* backed by this journal. This will eventually be replaced by an array
* of uuids, allowing us to index multiple devices within a single
@@ -985,9 +988,9 @@ extern void journal_switch_revoke_table(journal_t *journal);
*/

int __log_space_left(journal_t *); /* Called with journal locked */
-int log_start_commit(journal_t *journal, tid_t tid);
-int __log_start_commit(journal_t *journal, tid_t tid);
-int journal_start_commit(journal_t *journal, tid_t *tid);
+int log_start_commit(journal_t *journal, tid_t tid, bool will_wait);
+int __log_start_commit(journal_t *journal, tid_t tid, bool will_wait);
+int journal_start_commit(journal_t *journal, tid_t *tid, bool will_wait);
int journal_force_commit_nested(journal_t *journal);
int log_wait_commit(journal_t *journal, tid_t tid);
int log_do_checkpoint(journal_t *journal);
--
1.7.1


--DocE+STaALJfprDB--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/