[PATCH v2] ext4: make xattr inode reads faster

From: Tahsin Erdogan
Date: Mon Jul 17 2017 - 02:20:27 EST


ext4_xattr_inode_read() currently reads each block sequentially while
waiting for io operation to complete before moving on to the next
block. This prevents request merging in block layer.

Add a ext4_bread_batch() function that starts reads for all blocks
then optionally waits for them to complete. A similar logic is used
in ext4_find_entry(), so update that code to use the new function.

Signed-off-by: Tahsin Erdogan <tahsin@xxxxxxxxxx>
---
v2:
- updated ext4_find_entry() to also use ext4_bread_batch()
- added wait parameter to ext4_bread_batch()

fs/ext4/ext4.h | 2 ++
fs/ext4/inode.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
fs/ext4/namei.c | 43 ++++++++++++++-----------------------------
fs/ext4/xattr.c | 51 ++++++++++++++++++++++++++++++++-------------------
4 files changed, 92 insertions(+), 48 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9ebde0cd632e..e9440ed605c0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2462,6 +2462,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
int ext4_inode_is_fast_symlink(struct inode *inode);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+ bool wait, struct buffer_head **bhs);
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3c600f02673f..70699940e20d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1015,6 +1015,50 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return ERR_PTR(-EIO);
}

+/* Read a contiguous batch of blocks. */
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+ bool wait, struct buffer_head **bhs)
+{
+ int i, err;
+
+ for (i = 0; i < bh_count; i++) {
+ bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);
+ if (IS_ERR(bhs[i])) {
+ err = PTR_ERR(bhs[i]);
+ bh_count = i;
+ goto out_brelse;
+ }
+ }
+
+ for (i = 0; i < bh_count; i++)
+ /* Note that NULL bhs[i] is valid because of holes. */
+ if (bhs[i] && !buffer_uptodate(bhs[i]))
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
+ &bhs[i]);
+
+ if (!wait)
+ return 0;
+
+ for (i = 0; i < bh_count; i++)
+ if (bhs[i])
+ wait_on_buffer(bhs[i]);
+
+ for (i = 0; i < bh_count; i++) {
+ if (bhs[i] && !buffer_uptodate(bhs[i])) {
+ err = -EIO;
+ goto out_brelse;
+ }
+ }
+ return 0;
+
+out_brelse:
+ for (i = 0; i < bh_count; i++) {
+ brelse(bhs[i]);
+ bhs[i] = NULL;
+ }
+ return err;
+}
+
int ext4_walk_page_buffers(handle_t *handle,
struct buffer_head *head,
unsigned from,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 13f0cadb1238..2758ff1b940f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1342,13 +1342,12 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
struct super_block *sb;
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
- ext4_lblk_t start, block, b;
+ ext4_lblk_t start, block;
const u8 *name = d_name->name;
- int ra_max = 0; /* Number of bh's in the readahead
+ size_t ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
- int ra_ptr = 0; /* Current index into readahead
+ size_t ra_ptr = 0; /* Current index into readahead
buffer */
- int num = 0;
ext4_lblk_t nblocks;
int i, namelen, retval;
struct ext4_filename fname;
@@ -1411,31 +1410,17 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
if (ra_ptr >= ra_max) {
/* Refill the readahead buffer */
ra_ptr = 0;
- b = block;
- for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
- /*
- * Terminate if we reach the end of the
- * directory and must wrap, or if our
- * search has finished at this block.
- */
- if (b >= nblocks || (num && block == start)) {
- bh_use[ra_max] = NULL;
- break;
- }
- num++;
- bh = ext4_getblk(NULL, dir, b++, 0);
- if (IS_ERR(bh)) {
- if (ra_max == 0) {
- ret = bh;
- goto cleanup_and_exit;
- }
- break;
- }
- bh_use[ra_max] = bh;
- if (bh)
- ll_rw_block(REQ_OP_READ,
- REQ_META | REQ_PRIO,
- 1, &bh);
+ if (block < start)
+ ra_max = start - block;
+ else
+ ra_max = nblocks - block;
+ ra_max = min(ra_max, ARRAY_SIZE(bh_use));
+ retval = ext4_bread_batch(dir, block, ra_max,
+ false /* wait */, bh_use);
+ if (retval) {
+ ret = ERR_PTR(retval);
+ ra_max = 0;
+ goto cleanup_and_exit;
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index cff4f41ced61..68b5b4f9fbcb 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -317,28 +317,41 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
*/
static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
{
- unsigned long block = 0;
- struct buffer_head *bh;
- int blocksize = ea_inode->i_sb->s_blocksize;
- size_t csize, copied = 0;
- void *copy_pos = buf;
-
- while (copied < size) {
- csize = (size - copied) > blocksize ? blocksize : size - copied;
- bh = ext4_bread(NULL, ea_inode, block, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
- if (!bh)
- return -EFSCORRUPTED;
+ int blocksize = 1 << ea_inode->i_blkbits;
+ int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits;
+ int tail_size = (size % blocksize) ?: blocksize;
+ struct buffer_head *bhs_inline[8];
+ struct buffer_head **bhs = bhs_inline;
+ int i, ret;
+
+ if (bh_count > ARRAY_SIZE(bhs_inline)) {
+ bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS);
+ if (!bhs)
+ return -ENOMEM;
+ }

- memcpy(copy_pos, bh->b_data, csize);
- brelse(bh);
+ ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count,
+ true /* wait */, bhs);
+ if (ret)
+ goto free_bhs;

- copy_pos += csize;
- block += 1;
- copied += csize;
+ for (i = 0; i < bh_count; i++) {
+ /* There shouldn't be any holes in ea_inode. */
+ if (!bhs[i]) {
+ ret = -EFSCORRUPTED;
+ goto put_bhs;
+ }
+ memcpy((char *)buf + blocksize * i, bhs[i]->b_data,
+ i < bh_count - 1 ? blocksize : tail_size);
}
- return 0;
+ ret = 0;
+put_bhs:
+ for (i = 0; i < bh_count; i++)
+ brelse(bhs[i]);
+free_bhs:
+ if (bhs != bhs_inline)
+ kfree(bhs);
+ return ret;
}

static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
--
2.13.2.932.g7449e964c-goog