[PATCH 3.16 028/212] ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea()

From: Ben Hutchings
Date: Thu Jun 01 2017 - 12:34:07 EST


3.16.44-rc1 review patch. If anyone has any objections, please let me know.

------------------

From: Theodore Ts'o <tytso@xxxxxxx>

commit c755e251357a0cee0679081f08c3f4ba797a8009 upstream.

The xattr_sem deadlock problems fixed in commit 2e81a4eeedca: "ext4:
avoid deadlock when expanding inode size" didn't include the use of
xattr_sem in fs/ext4/inline.c. With the addition of project quota
which added a new extra inode field, this exposed deadlocks in the
inline_data code similar to the ones fixed by 2e81a4eeedca.

The deadlock can be reproduced via:

dmesg -n 7
mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768
mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc
mkdir /vdc/a
umount /vdc
mount -t ext4 /dev/vdc /vdc
echo foo > /vdc/a/foo

and looks like this:

[ 11.158815]
[ 11.160276] =============================================
[ 11.161960] [ INFO: possible recursive locking detected ]
[ 11.161960] 4.10.0-rc3-00015-g011b30a8a3cf #160 Tainted: G W
[ 11.161960] ---------------------------------------------
[ 11.161960] bash/2519 is trying to acquire lock:
[ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1225a4b>] ext4_expand_extra_isize_ea+0x3d/0x4cd
[ 11.161960]
[ 11.161960] but task is already holding lock:
[ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152
[ 11.161960]
[ 11.161960] other info that might help us debug this:
[ 11.161960] Possible unsafe locking scenario:
[ 11.161960]
[ 11.161960] CPU0
[ 11.161960] ----
[ 11.161960] lock(&ei->xattr_sem);
[ 11.161960] lock(&ei->xattr_sem);
[ 11.161960]
[ 11.161960] *** DEADLOCK ***
[ 11.161960]
[ 11.161960] May be due to missing lock nesting notation
[ 11.161960]
[ 11.161960] 4 locks held by bash/2519:
[ 11.161960] #0: (sb_writers#3){.+.+.+}, at: [<c11a2414>] mnt_want_write+0x1e/0x3e
[ 11.161960] #1: (&type->i_mutex_dir_key){++++++}, at: [<c119508b>] path_openat+0x338/0x67a
[ 11.161960] #2: (jbd2_handle){++++..}, at: [<c123314a>] start_this_handle+0x582/0x622
[ 11.161960] #3: (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152
[ 11.161960]
[ 11.161960] stack backtrace:
[ 11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G W 4.10.0-rc3-00015-g011b30a8a3cf #160
[ 11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014
[ 11.161960] Call Trace:
[ 11.161960] dump_stack+0x72/0xa3
[ 11.161960] __lock_acquire+0xb7c/0xcb9
[ 11.161960] ? kvm_clock_read+0x1f/0x29
[ 11.161960] ? __lock_is_held+0x36/0x66
[ 11.161960] ? __lock_is_held+0x36/0x66
[ 11.161960] lock_acquire+0x106/0x18a
[ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd
[ 11.161960] down_write+0x39/0x72
[ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd
[ 11.161960] ext4_expand_extra_isize_ea+0x3d/0x4cd
[ 11.161960] ? _raw_read_unlock+0x22/0x2c
[ 11.161960] ? jbd2_journal_extend+0x1e2/0x262
[ 11.161960] ? __ext4_journal_get_write_access+0x3d/0x60
[ 11.161960] ext4_mark_inode_dirty+0x17d/0x26d
[ 11.161960] ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2
[ 11.161960] ext4_add_dirent_to_inline.isra.12+0xa5/0xb2
[ 11.161960] ext4_try_add_inline_entry+0x69/0x152
[ 11.161960] ext4_add_entry+0xa3/0x848
[ 11.161960] ? __brelse+0x14/0x2f
[ 11.161960] ? _raw_spin_unlock_irqrestore+0x44/0x4f
[ 11.161960] ext4_add_nondir+0x17/0x5b
[ 11.161960] ext4_create+0xcf/0x133
[ 11.161960] ? ext4_mknod+0x12f/0x12f
[ 11.161960] lookup_open+0x39e/0x3fb
[ 11.161960] ? __wake_up+0x1a/0x40
[ 11.161960] ? lock_acquire+0x11e/0x18a
[ 11.161960] path_openat+0x35c/0x67a
[ 11.161960] ? sched_clock_cpu+0xd7/0xf2
[ 11.161960] do_filp_open+0x36/0x7c
[ 11.161960] ? _raw_spin_unlock+0x22/0x2c
[ 11.161960] ? __alloc_fd+0x169/0x173
[ 11.161960] do_sys_open+0x59/0xcc
[ 11.161960] SyS_open+0x1d/0x1f
[ 11.161960] do_int80_syscall_32+0x4f/0x61
[ 11.161960] entry_INT80_32+0x2f/0x2f
[ 11.161960] EIP: 0xb76ad469
[ 11.161960] EFLAGS: 00000286 CPU: 0
[ 11.161960] EAX: ffffffda EBX: 08168ac8 ECX: 00008241 EDX: 000001b6
[ 11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0
[ 11.161960] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b

Reported-by: George Spelvin <linux@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Theodore Ts'o <tytso@xxxxxxx>
[bwh: Backported to 3.16: adjust context]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
---
fs/ext4/inline.c | 66 ++++++++++++++++++++++++++------------------------------
fs/ext4/xattr.c | 30 +++++++++++---------------
fs/ext4/xattr.h | 32 +++++++++++++++++++++++++++
3 files changed, 74 insertions(+), 54 deletions(-)

--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -380,7 +380,7 @@ out:
static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
unsigned int len)
{
- int ret, size;
+ int ret, size, no_expand;
struct ext4_inode_info *ei = EXT4_I(inode);

if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
@@ -390,15 +390,14 @@ static int ext4_prepare_inline_data(hand
if (size < len)
return -ENOSPC;

- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);

if (ei->i_inline_off)
ret = ext4_update_inline_data(handle, inode, len);
else
ret = ext4_create_inline_data(handle, inode, len);

- up_write(&EXT4_I(inode)->xattr_sem);
-
+ ext4_write_unlock_xattr(inode, &no_expand);
return ret;
}

@@ -528,7 +527,7 @@ static int ext4_convert_inline_data_to_e
struct inode *inode,
unsigned flags)
{
- int ret, needed_blocks;
+ int ret, needed_blocks, no_expand;
handle_t *handle = NULL;
int retries = 0, sem_held = 0;
struct page *page = NULL;
@@ -568,7 +567,7 @@ retry:
goto out;
}

- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
sem_held = 1;
/* If some one has already done this for us, just exit. */
if (!ext4_has_inline_data(inode)) {
@@ -603,7 +602,7 @@ retry:
unlock_page(page);
page_cache_release(page);
ext4_orphan_add(handle, inode);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
sem_held = 0;
ext4_journal_stop(handle);
handle = NULL;
@@ -628,7 +627,7 @@ out:
page_cache_release(page);
}
if (sem_held)
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
if (handle)
ext4_journal_stop(handle);
brelse(iloc.bh);
@@ -721,7 +720,7 @@ convert:
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page)
{
- int ret;
+ int ret, no_expand;
void *kaddr;
struct ext4_iloc iloc;

@@ -739,7 +738,7 @@ int ext4_write_inline_data_end(struct in
goto out;
}

- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
BUG_ON(!ext4_has_inline_data(inode));

kaddr = kmap_atomic(page);
@@ -749,7 +748,7 @@ int ext4_write_inline_data_end(struct in
/* clear page dirty so that writepages wouldn't work for us. */
ClearPageDirty(page);

- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
brelse(iloc.bh);
out:
return copied;
@@ -760,7 +759,7 @@ ext4_journalled_write_inline_data(struct
unsigned len,
struct page *page)
{
- int ret;
+ int ret, no_expand;
void *kaddr;
struct ext4_iloc iloc;

@@ -770,11 +769,11 @@ ext4_journalled_write_inline_data(struct
return NULL;
}

- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
kaddr = kmap_atomic(page);
ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
kunmap_atomic(kaddr);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);

return iloc.bh;
}
@@ -1238,7 +1237,7 @@ out:
int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
- int ret, inline_size;
+ int ret, inline_size, no_expand;
void *inline_start;
struct ext4_iloc iloc;
struct inode *dir = dentry->d_parent->d_inode;
@@ -1247,7 +1246,7 @@ int ext4_try_add_inline_entry(handle_t *
if (ret)
return ret;

- down_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_lock_xattr(dir, &no_expand);
if (!ext4_has_inline_data(dir))
goto out;

@@ -1292,7 +1291,7 @@ int ext4_try_add_inline_entry(handle_t *

out:
ext4_mark_inode_dirty(handle, dir);
- up_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_unlock_xattr(dir, &no_expand);
brelse(iloc.bh);
return ret;
}
@@ -1648,7 +1647,7 @@ int ext4_delete_inline_entry(handle_t *h
struct buffer_head *bh,
int *has_inline_data)
{
- int err, inline_size;
+ int err, inline_size, no_expand;
struct ext4_iloc iloc;
void *inline_start;

@@ -1656,7 +1655,7 @@ int ext4_delete_inline_entry(handle_t *h
if (err)
return err;

- down_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_lock_xattr(dir, &no_expand);
if (!ext4_has_inline_data(dir)) {
*has_inline_data = 0;
goto out;
@@ -1691,7 +1690,7 @@ int ext4_delete_inline_entry(handle_t *h

ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
out:
- up_write(&EXT4_I(dir)->xattr_sem);
+ ext4_write_unlock_xattr(dir, &no_expand);
brelse(iloc.bh);
if (err != -ENOENT)
ext4_std_error(dir->i_sb, err);
@@ -1790,11 +1789,11 @@ out:

int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
{
- int ret;
+ int ret, no_expand;

- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
ret = ext4_destroy_inline_data_nolock(handle, inode);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);

return ret;
}
@@ -1872,7 +1871,7 @@ out:
void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
{
handle_t *handle;
- int inline_size, value_len, needed_blocks;
+ int inline_size, value_len, needed_blocks, no_expand;
size_t i_size;
void *value = NULL;
struct ext4_xattr_ibody_find is = {
@@ -1889,7 +1888,7 @@ void ext4_inline_data_truncate(struct in
if (IS_ERR(handle))
return;

- down_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
*has_inline = 0;
ext4_journal_stop(handle);
@@ -1947,7 +1946,7 @@ out_error:
up_write(&EXT4_I(inode)->i_data_sem);
out:
brelse(is.iloc.bh);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
kfree(value);
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
@@ -1963,7 +1962,7 @@ out:

int ext4_convert_inline_data(struct inode *inode)
{
- int error, needed_blocks;
+ int error, needed_blocks, no_expand;
handle_t *handle;
struct ext4_iloc iloc;

@@ -1985,15 +1984,10 @@ int ext4_convert_inline_data(struct inod
goto out_free;
}

- down_write(&EXT4_I(inode)->xattr_sem);
- if (!ext4_has_inline_data(inode)) {
- up_write(&EXT4_I(inode)->xattr_sem);
- goto out;
- }
-
- error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
- up_write(&EXT4_I(inode)->xattr_sem);
-out:
+ ext4_write_lock_xattr(inode, &no_expand);
+ if (ext4_has_inline_data(inode))
+ error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
+ ext4_write_unlock_xattr(inode, &no_expand);
ext4_journal_stop(handle);
out_free:
brelse(iloc.bh);
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1106,16 +1106,14 @@ ext4_xattr_set_handle(handle_t *handle,
struct ext4_xattr_block_find bs = {
.s = { .not_found = -ENODATA, },
};
- unsigned long no_expand;
+ int no_expand;
int error;

if (!name)
return -EINVAL;
if (strlen(name) > 255)
return -ERANGE;
- down_write(&EXT4_I(inode)->xattr_sem);
- no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
- ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_write_lock_xattr(inode, &no_expand);

error = ext4_reserve_inode_write(handle, inode, &is.iloc);
if (error)
@@ -1176,7 +1174,7 @@ ext4_xattr_set_handle(handle_t *handle,
ext4_xattr_update_super_block(handle, inode->i_sb);
inode->i_ctime = ext4_current_time(inode);
if (!value)
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ no_expand = 0;
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext4_mark_iloc_dirty, even with
@@ -1190,9 +1188,7 @@ ext4_xattr_set_handle(handle_t *handle,
cleanup:
brelse(is.iloc.bh);
brelse(bs.bh);
- if (no_expand == 0)
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return error;
}

@@ -1275,12 +1271,11 @@ int ext4_expand_extra_isize_ea(struct in
void *base, *start, *end;
int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
+ int no_expand;
+
+ if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
+ return 0;

- down_write(&EXT4_I(inode)->xattr_sem);
- /*
- * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
- */
- ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
retry:
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
goto out;
@@ -1469,8 +1464,7 @@ retry:
}
brelse(bh);
out:
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
- up_write(&EXT4_I(inode)->xattr_sem);
+ ext4_write_unlock_xattr(inode, &no_expand);
return 0;

cleanup:
@@ -1482,10 +1476,10 @@ cleanup:
kfree(bs);
brelse(bh);
/*
- * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
- * size expansion failed.
+ * Inode size expansion failed; don't try again
*/
- up_write(&EXT4_I(inode)->xattr_sem);
+ no_expand = 1;
+ ext4_write_unlock_xattr(inode, &no_expand);
return error;
}

--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -98,6 +98,38 @@ extern const struct xattr_handler ext4_x
extern const struct xattr_handler ext4_xattr_trusted_handler;
extern const struct xattr_handler ext4_xattr_security_handler;

+/*
+ * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes.
+ * The first is to signal that there the inline xattrs and data are
+ * taking up so much space that we might as well not keep trying to
+ * expand it. The second is that xattr_sem is taken for writing, so
+ * we shouldn't try to recurse into the inode expansion. For this
+ * second case, we need to make sure that we take save and restore the
+ * NO_EXPAND state flag appropriately.
+ */
+static inline void ext4_write_lock_xattr(struct inode *inode, int *save)
+{
+ down_write(&EXT4_I(inode)->xattr_sem);
+ *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+}
+
+static inline int ext4_write_trylock_xattr(struct inode *inode, int *save)
+{
+ if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0)
+ return 0;
+ *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ return 1;
+}
+
+static inline void ext4_write_unlock_xattr(struct inode *inode, int *save)
+{
+ if (*save == 0)
+ ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ up_write(&EXT4_I(inode)->xattr_sem);
+}
+
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);

extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);