[RFC-v2 1/2] ext4: Add ext4_ilock & ext4_iunlock API

From: Ritesh Harjani
Date: Mon Sep 23 2019 - 01:44:33 EST


This adds ext4_ilock/iunlock types of APIs.
This is the preparation APIs to make shared
locking/unlocking & restarting with exclusive
locking/unlocking easier in next patch.

Along with above this also addresses the AIM7
regression problem which was only fixed for XFS
in,
commit 942491c9e6d6 ("xfs: fix AIM7 regression")

Signed-off-by: Ritesh Harjani <riteshh@xxxxxxxxxxxxx>
---
fs/ext4/ext4.h | 33 +++++++++++++++++++++++++
fs/ext4/extents.c | 16 ++++++------
fs/ext4/file.c | 63 +++++++++++++++++++++++++----------------------
fs/ext4/inode.c | 4 +--
fs/ext4/ioctl.c | 16 ++++++------
fs/ext4/super.c | 12 ++++-----
fs/ext4/xattr.c | 16 ++++++------
7 files changed, 98 insertions(+), 62 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2ab91815f52d..9ffafbe6bc3f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2945,6 +2945,39 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
return changed;
}

+#define EXT4_IOLOCK_EXCL (1 << 0)
+#define EXT4_IOLOCK_SHARED (1 << 1)
+
+static inline void ext4_ilock(struct inode *inode, unsigned int iolock)
+{
+ if (iolock == EXT4_IOLOCK_EXCL)
+ inode_lock(inode);
+ else
+ inode_lock_shared(inode);
+}
+
+static inline void ext4_iunlock(struct inode *inode, unsigned int iolock)
+{
+ if (iolock == EXT4_IOLOCK_EXCL)
+ inode_unlock(inode);
+ else
+ inode_unlock_shared(inode);
+}
+
+static inline int ext4_ilock_nowait(struct inode *inode, unsigned int iolock)
+{
+ if (iolock == EXT4_IOLOCK_EXCL)
+ return inode_trylock(inode);
+ else
+ return inode_trylock_shared(inode);
+}
+
+static inline void ext4_ilock_demote(struct inode *inode, unsigned int iolock)
+{
+ BUG_ON(iolock != EXT4_IOLOCK_EXCL);
+ downgrade_write(&inode->i_rwsem);
+}
+
int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t len);

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a869e206bd81..ef37f4d4ee7e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4680,7 +4680,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
else
max_blocks -= lblk;

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);

/*
* Indirect files do not support unwritten extnets
@@ -4790,7 +4790,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,

ext4_journal_stop(handle);
out_mutex:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
return ret;
}

@@ -4856,7 +4856,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);

/*
* We only support preallocation for extent-based files only
@@ -4887,7 +4887,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
EXT4_I(inode)->i_sync_tid);
}
out:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
return ret;
}
@@ -5387,7 +5387,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
/*
* There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation
@@ -5486,7 +5486,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
return ret;
}

@@ -5537,7 +5537,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP;
@@ -5664,7 +5664,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
return ret;
}

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d2ff383a8b9f..ce1cecbae932 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -57,14 +57,15 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
/*
* Get exclusion from truncate and other inode operations.
*/
- if (!inode_trylock_shared(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!ext4_ilock_nowait(inode, EXT4_IOLOCK_SHARED))
return -EAGAIN;
- inode_lock_shared(inode);
+ } else {
+ ext4_ilock(inode, EXT4_IOLOCK_SHARED);
}

if (!ext4_dio_checks(inode)) {
- inode_unlock_shared(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
/*
* Fallback to buffered IO if the operation being
* performed on the inode is not supported by direct
@@ -77,7 +78,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
}

ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL);
- inode_unlock_shared(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_SHARED);

file_accessed(iocb->ki_filp);
return ret;
@@ -89,22 +90,23 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;

- if (!inode_trylock_shared(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!ext4_ilock_nowait(inode, EXT4_IOLOCK_SHARED))
return -EAGAIN;
- inode_lock_shared(inode);
+ } else {
+ ext4_ilock(inode, EXT4_IOLOCK_SHARED);
}
/*
* Recheck under inode lock - at this point we are sure it cannot
* change anymore
*/
if (!IS_DAX(inode)) {
- inode_unlock_shared(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
/* Fallback to buffered IO in case we cannot support DAX */
return generic_file_read_iter(iocb, to);
}
ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
- inode_unlock_shared(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_SHARED);

file_accessed(iocb->ki_filp);
return ret;
@@ -241,7 +243,7 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_NOWAIT)
return -EOPNOTSUPP;

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -250,7 +252,7 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
current->backing_dev_info = NULL;
out:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
if (likely(ret > 0)) {
iocb->ki_pos += ret;
ret = generic_write_sync(iocb, ret);
@@ -374,15 +376,17 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset = iocb->ki_pos;
struct inode *inode = file_inode(iocb->ki_filp);
bool extend = false, overwrite = false, unaligned_aio = false;
+ unsigned int iolock = EXT4_IOLOCK_EXCL;

- if (!inode_trylock(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!ext4_ilock_nowait(inode, iolock))
return -EAGAIN;
- inode_lock(inode);
+ } else {
+ ext4_ilock(inode, iolock);
}

if (!ext4_dio_checks(inode)) {
- inode_unlock(inode);
+ ext4_iunlock(inode, iolock);
/*
* Fallback to buffered IO if the operation on the
* inode is not supported by direct IO.
@@ -392,7 +396,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)

ret = ext4_write_checks(iocb, from);
if (ret <= 0) {
- inode_unlock(inode);
+ ext4_iunlock(inode, iolock);
return ret;
}

@@ -416,7 +420,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) &&
ext4_should_dioread_nolock(inode)) {
overwrite = true;
- downgrade_write(&inode->i_rwsem);
+ ext4_ilock_demote(inode, iolock);
+ iolock = EXT4_IOLOCK_SHARED;
}

if (offset + count > i_size_read(inode) ||
@@ -438,10 +443,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret == -EIOCBQUEUED && (unaligned_aio || extend))
inode_dio_wait(inode);

- if (overwrite)
- inode_unlock_shared(inode);
- else
- inode_unlock(inode);
+ ext4_iunlock(inode, iolock);

if (ret >= 0 && iov_iter_count(from))
return ext4_buffered_write_iter(iocb, from);
@@ -457,10 +459,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset;
struct inode *inode = file_inode(iocb->ki_filp);

- if (!inode_trylock(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!ext4_ilock_nowait(inode, EXT4_IOLOCK_EXCL))
return -EAGAIN;
- inode_lock(inode);
+ } else {
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
}

ret = ext4_write_checks(iocb, from);
@@ -480,7 +483,7 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (error)
ret = error;
out:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
@@ -707,14 +710,14 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
return generic_file_llseek_size(file, offset, whence,
maxbytes, i_size_read(inode));
case SEEK_HOLE:
- inode_lock_shared(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_SHARED);
offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
- inode_unlock_shared(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
break;
case SEEK_DATA:
- inode_lock_shared(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_SHARED);
offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
- inode_unlock_shared(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
break;
}

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4f0749527c7..2870699ee504 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3914,7 +3914,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
return ret;
}

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);

/* No need to punch hole beyond i_size */
if (offset >= inode->i_size)
@@ -4021,7 +4021,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
out_dio:
up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
return ret;
}

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 442f7ef873fc..c6ae48567207 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -787,13 +787,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err)
return err;

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
err = ext4_ioctl_check_immutable(inode,
from_kprojid(&init_user_ns, ei->i_projid),
flags);
if (!err)
err = ext4_ioctl_setflags(inode, flags);
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
mnt_drop_write_file(filp);
return err;
}
@@ -824,7 +824,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto setversion_out;
}

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
@@ -839,7 +839,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ext4_journal_stop(handle);

unlock_out:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
setversion_out:
mnt_drop_write_file(filp);
return err;
@@ -958,9 +958,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
* ext4_ext_swap_inode_data before we switch the
* inode format to prevent read.
*/
- inode_lock((inode));
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
err = ext4_ext_migrate(inode);
- inode_unlock((inode));
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
mnt_drop_write_file(filp);
return err;
}
@@ -1150,7 +1150,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err)
return err;

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
ext4_fill_fsxattr(inode, &old_fa);
err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
if (err)
@@ -1165,7 +1165,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto out;
err = ext4_ioctl_setproject(filp, fa.fsx_projid);
out:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
mnt_drop_write_file(filp);
return err;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4079605d437a..45519036de83 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2681,12 +2681,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ret = ext4_truncate(inode);
if (ret)
ext4_std_error(inode->i_sb, ret);
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
nr_truncates++;
} else {
if (test_opt(sb, DEBUG))
@@ -5763,7 +5763,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* files. If this fails, we return success anyway since quotas
* are already enabled and this is not a hard failure.
*/
- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
if (IS_ERR(handle))
goto unlock_inode;
@@ -5773,7 +5773,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
unlock_inode:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
}
return err;
}
@@ -5865,7 +5865,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
if (err || ext4_has_feature_quota(sb))
goto out_put;

- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
/*
* Update modification times of quota files when userspace can
* start looking at them. If we fail, we return success anyway since
@@ -5880,7 +5880,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out_unlock:
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
out_put:
lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
iput(inode);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 491f9ee4040e..dbe3e2900c24 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -422,9 +422,9 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
ext4_xattr_inode_set_ref(inode, 1);
} else {
- inode_lock(inode);
+ ext4_ilock(inode, EXT4_IOLOCK_EXCL);
inode->i_flags |= S_NOQUOTA;
- inode_unlock(inode);
+ ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
}

*ea_inode = inode;
@@ -1025,7 +1025,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
u32 hash;
int ret;

- inode_lock(ea_inode);
+ ext4_ilock(ea_inode, EXT4_IOLOCK_EXCL);

ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
if (ret)
@@ -1079,7 +1079,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
ext4_warning_inode(ea_inode,
"ext4_mark_iloc_dirty() failed ret=%d", ret);
out:
- inode_unlock(ea_inode);
+ ext4_iunlock(ea_inode, EXT4_IOLOCK_EXCL);
return ret;
}

@@ -1400,10 +1400,10 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
block += 1;
}

- inode_lock(ea_inode);
+ ext4_ilock(ea_inode, EXT4_IOLOCK_EXCL);
i_size_write(ea_inode, wsize);
ext4_update_i_disksize(ea_inode, wsize);
- inode_unlock(ea_inode);
+ ext4_iunlock(ea_inode, EXT4_IOLOCK_EXCL);

ext4_mark_inode_dirty(handle, ea_inode);

@@ -1452,9 +1452,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
*/
dquot_free_inode(ea_inode);
dquot_drop(ea_inode);
- inode_lock(ea_inode);
+ ext4_ilock(ea_inode, EXT4_IOLOCK_EXCL);
ea_inode->i_flags |= S_NOQUOTA;
- inode_unlock(ea_inode);
+ ext4_iunlock(ea_inode, EXT4_IOLOCK_EXCL);
}

return ea_inode;
--
2.21.0