[PATCH 26/32] ext4: cleanup transaction restarts during inode deletion

From: Tahsin Erdogan
Date: Wed Jun 21 2017 - 17:22:46 EST


During inode deletion, journal credits that will be needed are hard to
determine, that is why we have journal extend/restart calls in several
places. Whenever a transaction is restarted, filesystem must be in a
consistent state because there is no atomicity guarantee beyond a
restart call.

Add ext4_xattr_ensure_credits() helper function which takes care of
journal extend/restart logic. It also handles getting jbd2 write access
and dirty metadata calls. This function is called at every iteration of
handling an ea_inode reference.

Signed-off-by: Tahsin Erdogan <tahsin@xxxxxxxxxx>
---
v3: fixed checkpatch.pl warnings about long lines and indented label

v2: made ext4_xattr_ensure_credits() static

fs/ext4/inode.c | 66 ++++-----------
fs/ext4/xattr.c | 258 ++++++++++++++++++++++++++++++++++++--------------------
fs/ext4/xattr.h | 3 +-
3 files changed, 184 insertions(+), 143 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cf91532765a4..cd007f9757d1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -239,7 +239,11 @@ void ext4_evict_inode(struct inode *inode)
*/
sb_start_intwrite(inode->i_sb);

- handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
+ if (!IS_NOQUOTA(inode))
+ extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
+
+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
+ ext4_blocks_for_truncate(inode)+extra_credits);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
@@ -251,36 +255,9 @@ void ext4_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
goto no_delete;
}
+
if (IS_SYNC(inode))
ext4_handle_sync(handle);
-
- /*
- * Delete xattr inode before deleting the main inode.
- */
- err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array);
- if (err) {
- ext4_warning(inode->i_sb,
- "couldn't delete inode's xattr (err %d)", err);
- goto stop_handle;
- }
-
- if (!IS_NOQUOTA(inode))
- extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
-
- if (!ext4_handle_has_enough_credits(handle,
- ext4_blocks_for_truncate(inode) + extra_credits)) {
- err = ext4_journal_extend(handle,
- ext4_blocks_for_truncate(inode) + extra_credits);
- if (err > 0)
- err = ext4_journal_restart(handle,
- ext4_blocks_for_truncate(inode) + extra_credits);
- if (err != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", err);
- goto stop_handle;
- }
- }
-
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
@@ -298,25 +275,17 @@ void ext4_evict_inode(struct inode *inode)
}
}

- /*
- * ext4_ext_truncate() doesn't reserve any slop when it
- * restarts journal transactions; therefore there may not be
- * enough credits left in the handle to remove the inode from
- * the orphan list and set the dtime field.
- */
- if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
- err = ext4_journal_extend(handle, extra_credits);
- if (err > 0)
- err = ext4_journal_restart(handle, extra_credits);
- if (err != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", err);
- stop_handle:
- ext4_journal_stop(handle);
- ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
- goto no_delete;
- }
+ /* Remove xattr references. */
+ err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
+ extra_credits);
+ if (err) {
+ ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
+stop_handle:
+ ext4_journal_stop(handle);
+ ext4_orphan_del(NULL, inode);
+ sb_end_intwrite(inode->i_sb);
+ ext4_xattr_inode_array_free(ea_inode_array);
+ goto no_delete;
}

/*
@@ -342,7 +311,6 @@ void ext4_evict_inode(struct inode *inode)
ext4_clear_inode(inode);
else
ext4_free_inode(handle, inode);
-
ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 5f36121c5a00..353a323f30b1 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -108,6 +108,10 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
#define MB_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \
inode->i_sb->s_fs_info)->s_mb_block_cache)

+static int
+ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
+ struct inode *inode);
+
#ifdef CONFIG_LOCKDEP
void ext4_xattr_inode_set_class(struct inode *ea_inode)
{
@@ -652,6 +656,128 @@ static void ext4_xattr_update_super_block(handle_t *handle,
}
}

+static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
+ int credits, struct buffer_head *bh,
+ bool dirty, bool block_csum)
+{
+ int error;
+
+ if (!ext4_handle_valid(handle))
+ return 0;
+
+ if (handle->h_buffer_credits >= credits)
+ return 0;
+
+ error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
+ if (!error)
+ return 0;
+ if (error < 0) {
+ ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
+ return error;
+ }
+
+ if (bh && dirty) {
+ if (block_csum)
+ ext4_xattr_block_csum_set(inode, bh);
+ error = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (error) {
+ ext4_warning(inode->i_sb, "Handle metadata (error %d)",
+ error);
+ return error;
+ }
+ }
+
+ error = ext4_journal_restart(handle, credits);
+ if (error) {
+ ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
+ return error;
+ }
+
+ if (bh) {
+ error = ext4_journal_get_write_access(handle, bh);
+ if (error) {
+ ext4_warning(inode->i_sb,
+ "Get write access failed (error %d)",
+ error);
+ return error;
+ }
+ }
+ return 0;
+}
+
+static void
+ext4_xattr_inode_remove_all(handle_t *handle, struct inode *parent,
+ struct buffer_head *bh,
+ struct ext4_xattr_entry *first, bool block_csum,
+ struct ext4_xattr_inode_array **ea_inode_array,
+ int extra_credits)
+{
+ struct inode *ea_inode;
+ struct ext4_xattr_entry *entry;
+ bool dirty = false;
+ unsigned int ea_ino;
+ int err;
+ int credits;
+
+ /* One credit for dec ref on ea_inode, one for orphan list addition, */
+ credits = 2 + extra_credits;
+
+ for (entry = first; !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ ea_ino = le32_to_cpu(entry->e_value_inum);
+ err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ if (err)
+ continue;
+
+ err = ext4_expand_inode_array(ea_inode_array, ea_inode);
+ if (err) {
+ ext4_warning_inode(ea_inode,
+ "Expand inode array err=%d", err);
+ iput(ea_inode);
+ continue;
+ }
+
+ err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
+ dirty, block_csum);
+ if (err) {
+ ext4_warning_inode(ea_inode, "Ensure credits err=%d",
+ err);
+ continue;
+ }
+
+ inode_lock(ea_inode);
+ clear_nlink(ea_inode);
+ ext4_orphan_add(handle, ea_inode);
+ inode_unlock(ea_inode);
+
+ /*
+ * Forget about ea_inode within the same transaction that
+ * decrements the ref count. This avoids duplicate decrements in
+ * case the rest of the work spills over to subsequent
+ * transactions.
+ */
+ entry->e_value_inum = 0;
+ entry->e_value_size = 0;
+
+ dirty = true;
+ }
+
+ if (dirty) {
+ /*
+ * Note that we are deliberately skipping csum calculation for
+ * the final update because we do not expect any journal
+ * restarts until xattr block is freed.
+ */
+
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ ext4_warning_inode(parent,
+ "handle dirty metadata err=%d", err);
+ }
+}
+
/*
* Release the xattr block BH: If the reference count is > 1, decrement it;
* otherwise free the block.
@@ -1984,42 +2110,6 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
return 0;
}

-/**
- * Add xattr inode to orphan list
- */
-static int
-ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits,
- struct ext4_xattr_inode_array *ea_inode_array)
-{
- int idx = 0, error = 0;
- struct inode *ea_inode;
-
- if (ea_inode_array == NULL)
- return 0;
-
- for (; idx < ea_inode_array->count; ++idx) {
- if (!ext4_handle_has_enough_credits(handle, credits)) {
- error = ext4_journal_extend(handle, credits);
- if (error > 0)
- error = ext4_journal_restart(handle, credits);
-
- if (error != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal "
- "(err %d)", error);
- return error;
- }
- }
- ea_inode = ea_inode_array->inodes[idx];
- inode_lock(ea_inode);
- ext4_orphan_add(handle, ea_inode);
- inode_unlock(ea_inode);
- /* the inode's i_count will be released by caller */
- }
-
- return 0;
-}
-
/*
* ext4_xattr_delete_inode()
*
@@ -2032,16 +2122,23 @@ ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits,
*/
int
ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
- struct ext4_xattr_inode_array **ea_inode_array)
+ struct ext4_xattr_inode_array **ea_inode_array,
+ int extra_credits)
{
struct buffer_head *bh = NULL;
struct ext4_xattr_ibody_header *header;
struct ext4_inode *raw_inode;
- struct ext4_iloc iloc;
- struct ext4_xattr_entry *entry;
- struct inode *ea_inode;
- unsigned int ea_ino;
- int credits = 3, error = 0;
+ struct ext4_iloc iloc = { .bh = NULL };
+ int error;
+
+ error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
+ NULL /* bh */,
+ false /* dirty */,
+ false /* block_csum */);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
+ goto cleanup;
+ }

if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
goto delete_external_ea;
@@ -2049,31 +2146,20 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
error = ext4_get_inode_loc(inode, &iloc);
if (error)
goto cleanup;
+
+ error = ext4_journal_get_write_access(handle, iloc.bh);
+ if (error)
+ goto cleanup;
+
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
- for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
- entry = EXT4_XATTR_NEXT(entry)) {
- if (!entry->e_value_inum)
- continue;
- ea_ino = le32_to_cpu(entry->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
- if (error)
- continue;
- error = ext4_expand_inode_array(ea_inode_array, ea_inode);
- if (error) {
- iput(ea_inode);
- brelse(iloc.bh);
- goto cleanup;
- }
- entry->e_value_inum = 0;
- }
- brelse(iloc.bh);
+ ext4_xattr_inode_remove_all(handle, inode, iloc.bh, IFIRST(header),
+ false /* block_csum */, ea_inode_array,
+ extra_credits);

delete_external_ea:
if (!EXT4_I(inode)->i_file_acl) {
- /* add xattr inode to orphan list */
- error = ext4_xattr_inode_orphan_add(handle, inode, credits,
- *ea_inode_array);
+ error = 0;
goto cleanup;
}
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
@@ -2091,46 +2177,32 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
goto cleanup;
}

- for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
- entry = EXT4_XATTR_NEXT(entry)) {
- if (!entry->e_value_inum)
- continue;
- ea_ino = le32_to_cpu(entry->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
- if (error)
- continue;
- error = ext4_expand_inode_array(ea_inode_array, ea_inode);
- if (error)
- goto cleanup;
- entry->e_value_inum = 0;
- }
-
- /* add xattr inode to orphan list */
- error = ext4_xattr_inode_orphan_add(handle, inode, credits,
- *ea_inode_array);
- if (error)
- goto cleanup;
-
- if (!IS_NOQUOTA(inode))
- credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
-
- if (!ext4_handle_has_enough_credits(handle, credits)) {
- error = ext4_journal_extend(handle, credits);
- if (error > 0)
- error = ext4_journal_restart(handle, credits);
+ if (ext4_has_feature_ea_inode(inode->i_sb)) {
+ error = ext4_journal_get_write_access(handle, bh);
if (error) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", error);
+ EXT4_ERROR_INODE(inode, "write access %llu",
+ EXT4_I(inode)->i_file_acl);
goto cleanup;
}
+ ext4_xattr_inode_remove_all(handle, inode, bh,
+ BFIRST(bh),
+ true /* block_csum */,
+ ea_inode_array,
+ extra_credits);
}

ext4_xattr_release_block(handle, inode, bh);
+ /* Update i_file_acl within the same transaction that releases block. */
EXT4_I(inode)->i_file_acl = 0;
-
+ error = ext4_mark_inode_dirty(handle, inode);
+ if (error) {
+ EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
+ error);
+ goto cleanup;
+ }
cleanup:
+ brelse(iloc.bh);
brelse(bh);
-
return error;
}

diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index adf761518a73..b2005a2716d9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -169,7 +169,8 @@ extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len);

extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
- struct ext4_xattr_inode_array **array);
+ struct ext4_xattr_inode_array **array,
+ int extra_credits);
extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);

extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
--
2.13.1.611.g7e3b11ae1-goog