Re: [PATCH v4 2/2] jbd2: gracefully abort on transaction state corruptions

From: Jan Kara

Date: Wed Mar 04 2026 - 06:16:48 EST


On Tue 03-03-26 10:01:57, Milos Nikic wrote:
> Auditing the jbd2 codebase reveals several legacy J_ASSERT calls
> that enforce internal state machine invariants (e.g., verifying
> jh->b_transaction or jh->b_next_transaction pointers).
>
> When these invariants are broken, the journal is in a corrupted
> state. However, triggering a fatal panic brings down the entire
> system for a localized filesystem error.
>
> This patch targets a specific class of these asserts: those
> residing inside functions that natively return integer error codes,
> booleans, or error pointers. It replaces the hard J_ASSERTs with
> WARN_ON_ONCE to capture the offending stack trace, safely drops
> any held locks, gracefully aborts the journal, and returns -EINVAL.
>
> This prevents a catastrophic kernel panic while ensuring the
> corrupted journal state is safely contained and upstream callers
> (like ext4 or ocfs2) can gracefully handle the aborted handle.
>
> Functions modified in fs/jbd2/transaction.c:
> - jbd2__journal_start()
> - do_get_write_access()
> - jbd2_journal_dirty_metadata()
> - jbd2_journal_forget()
> - jbd2_journal_try_to_free_buffers()
> - jbd2_journal_file_inode()
>
> Signed-off-by: Milos Nikic <nikic.milos@xxxxxxxxx>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@xxxxxxx>

Honza

> ---
> fs/jbd2/transaction.c | 112 ++++++++++++++++++++++++++++++++----------
> 1 file changed, 86 insertions(+), 26 deletions(-)
>
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index 04d17a5f2a82..bae6c99d635c 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -474,7 +474,8 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
> return ERR_PTR(-EROFS);
>
> if (handle) {
> - J_ASSERT(handle->h_transaction->t_journal == journal);
> + if (WARN_ON_ONCE(handle->h_transaction->t_journal != journal))
> + return ERR_PTR(-EINVAL);
> handle->h_ref++;
> return handle;
> }
> @@ -1036,7 +1037,13 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
> */
> if (!jh->b_transaction) {
> JBUFFER_TRACE(jh, "no transaction");
> - J_ASSERT_JH(jh, !jh->b_next_transaction);
> + if (WARN_ON_ONCE(jh->b_next_transaction)) {
> + spin_unlock(&jh->b_state_lock);
> + unlock_buffer(bh);
> + error = -EINVAL;
> + jbd2_journal_abort(journal, error);
> + goto out;
> + }
> JBUFFER_TRACE(jh, "file as BJ_Reserved");
> /*
> * Make sure all stores to jh (b_modified, b_frozen_data) are
> @@ -1069,13 +1076,27 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
> */
> if (jh->b_frozen_data) {
> JBUFFER_TRACE(jh, "has frozen data");
> - J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
> + if (WARN_ON_ONCE(jh->b_next_transaction)) {
> + spin_unlock(&jh->b_state_lock);
> + error = -EINVAL;
> + jbd2_journal_abort(journal, error);
> + goto out;
> + }
> goto attach_next;
> }
>
> JBUFFER_TRACE(jh, "owned by older transaction");
> - J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
> - J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
> + if (WARN_ON_ONCE(jh->b_next_transaction ||
> + jh->b_transaction !=
> + journal->j_committing_transaction)) {
> + pr_err("JBD2: %s: assertion failure: b_next_transaction=%p b_transaction=%p j_committing_transaction=%p\n",
> + journal->j_devname, jh->b_next_transaction,
> + jh->b_transaction, journal->j_committing_transaction);
> + spin_unlock(&jh->b_state_lock);
> + error = -EINVAL;
> + jbd2_journal_abort(journal, error);
> + goto out;
> + }
>
> /*
> * There is one case we have to be very careful about. If the
> @@ -1496,7 +1517,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
> int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> {
> transaction_t *transaction = handle->h_transaction;
> - journal_t *journal;
> + journal_t *journal = transaction->t_journal;
> struct journal_head *jh;
> int ret = 0;
>
> @@ -1520,8 +1541,14 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> if (data_race(jh->b_transaction != transaction &&
> jh->b_next_transaction != transaction)) {
> spin_lock(&jh->b_state_lock);
> - J_ASSERT_JH(jh, jh->b_transaction == transaction ||
> - jh->b_next_transaction == transaction);
> + if (WARN_ON_ONCE(jh->b_transaction != transaction &&
> + jh->b_next_transaction != transaction)) {
> + pr_err("JBD2: %s: assertion failure: b_transaction=%p transaction=%p b_next_transaction=%p\n",
> + journal->j_devname, jh->b_transaction,
> + transaction, jh->b_next_transaction);
> + ret = -EINVAL;
> + goto out_unlock_bh;
> + }
> spin_unlock(&jh->b_state_lock);
> }
> if (data_race(jh->b_modified == 1)) {
> @@ -1531,13 +1558,15 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> spin_lock(&jh->b_state_lock);
> if (jh->b_transaction == transaction &&
> jh->b_jlist != BJ_Metadata)
> - pr_err("JBD2: assertion failure: h_type=%u "
> - "h_line_no=%u block_no=%llu jlist=%u\n",
> + pr_err("JBD2: assertion failure: h_type=%u h_line_no=%u block_no=%llu jlist=%u\n",
> handle->h_type, handle->h_line_no,
> (unsigned long long) bh->b_blocknr,
> jh->b_jlist);
> - J_ASSERT_JH(jh, jh->b_transaction != transaction ||
> - jh->b_jlist == BJ_Metadata);
> + if (WARN_ON_ONCE(jh->b_transaction == transaction &&
> + jh->b_jlist != BJ_Metadata)) {
> + ret = -EINVAL;
> + goto out_unlock_bh;
> + }
> spin_unlock(&jh->b_state_lock);
> }
> goto out;
> @@ -1557,8 +1586,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> goto out_unlock_bh;
> }
>
> - journal = transaction->t_journal;
> -
> if (jh->b_modified == 0) {
> /*
> * This buffer's got modified and becoming part
> @@ -1636,7 +1663,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> }
>
> /* That test should have eliminated the following case: */
> - J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
> + if (WARN_ON_ONCE(jh->b_frozen_data)) {
> + ret = -EINVAL;
> + goto out_unlock_bh;
> + }
>
> JBUFFER_TRACE(jh, "file as BJ_Metadata");
> spin_lock(&journal->j_list_lock);
> @@ -1675,6 +1705,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
> int err = 0;
> int was_modified = 0;
> int wait_for_writeback = 0;
> + int abort_journal = 0;
>
> if (is_handle_aborted(handle))
> return -EROFS;
> @@ -1708,7 +1739,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
> jh->b_modified = 0;
>
> if (jh->b_transaction == transaction) {
> - J_ASSERT_JH(jh, !jh->b_frozen_data);
> + if (WARN_ON_ONCE(jh->b_frozen_data)) {
> + err = -EINVAL;
> + abort_journal = 1;
> + goto drop;
> + }
>
> /* If we are forgetting a buffer which is already part
> * of this transaction, then we can just drop it from
> @@ -1747,8 +1782,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
> }
> spin_unlock(&journal->j_list_lock);
> } else if (jh->b_transaction) {
> - J_ASSERT_JH(jh, (jh->b_transaction ==
> - journal->j_committing_transaction));
> + if (WARN_ON_ONCE(jh->b_transaction != journal->j_committing_transaction)) {
> + err = -EINVAL;
> + abort_journal = 1;
> + goto drop;
> + }
> /* However, if the buffer is still owned by a prior
> * (committing) transaction, we can't drop it yet... */
> JBUFFER_TRACE(jh, "belongs to older transaction");
> @@ -1766,7 +1804,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
> jh->b_next_transaction = transaction;
> spin_unlock(&journal->j_list_lock);
> } else {
> - J_ASSERT(jh->b_next_transaction == transaction);
> + if (WARN_ON_ONCE(jh->b_next_transaction != transaction)) {
> + err = -EINVAL;
> + abort_journal = 1;
> + goto drop;
> + }
>
> /*
> * only drop a reference if this transaction modified
> @@ -1812,6 +1854,8 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
> drop:
> __brelse(bh);
> spin_unlock(&jh->b_state_lock);
> + if (abort_journal)
> + jbd2_journal_abort(journal, err);
> if (wait_for_writeback)
> wait_on_buffer(bh);
> jbd2_journal_put_journal_head(jh);
> @@ -2136,7 +2180,8 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
> struct buffer_head *bh;
> bool ret = false;
>
> - J_ASSERT(folio_test_locked(folio));
> + if (WARN_ON_ONCE(!folio_test_locked(folio)))
> + return false;
>
> head = folio_buffers(folio);
> bh = head;
> @@ -2651,6 +2696,8 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
> {
> transaction_t *transaction = handle->h_transaction;
> journal_t *journal;
> + int err = 0;
> + int abort_transaction = 0;
>
> if (is_handle_aborted(handle))
> return -EROFS;
> @@ -2685,20 +2732,33 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
> /* On some different transaction's list - should be
> * the committing one */
> if (jinode->i_transaction) {
> - J_ASSERT(jinode->i_next_transaction == NULL);
> - J_ASSERT(jinode->i_transaction ==
> - journal->j_committing_transaction);
> + if (WARN_ON_ONCE(jinode->i_next_transaction ||
> + jinode->i_transaction !=
> + journal->j_committing_transaction)) {
> + pr_err("JBD2: %s: assertion failure: i_next_transaction=%p i_transaction=%p j_committing_transaction=%p\n",
> + journal->j_devname, jinode->i_next_transaction,
> + jinode->i_transaction,
> + journal->j_committing_transaction);
> + err = -EINVAL;
> + abort_transaction = 1;
> + goto done;
> + }
> jinode->i_next_transaction = transaction;
> goto done;
> }
> /* Not on any transaction list... */
> - J_ASSERT(!jinode->i_next_transaction);
> + if (WARN_ON_ONCE(jinode->i_next_transaction)) {
> + err = -EINVAL;
> + abort_transaction = 1;
> + goto done;
> + }
> jinode->i_transaction = transaction;
> list_add(&jinode->i_list, &transaction->t_inode_list);
> done:
> spin_unlock(&journal->j_list_lock);
> -
> - return 0;
> + if (abort_transaction)
> + jbd2_journal_abort(journal, err);
> + return err;
> }
>
> int jbd2_journal_inode_ranged_write(handle_t *handle,
> --
> 2.53.0
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR