[PATCH] exfat: call bh_read in get_block only when necessary

From: Sungjong Seo
Date: Wed Mar 26 2025 - 11:08:22 EST


With commit 11a347fb6cef ("exfat: change to get file size from DataLength"),
exfat_get_block() can now handle valid_size. However, most partial
unwritten blocks that could be mapped with other blocks are being
inefficiently processed separately as individual blocks.

Except for partial unwritten blocks that require independent processing,
let's handle them simply as before.

Signed-off-by: Sungjong Seo <sj1557.seo@xxxxxxxxxxx>
---
fs/exfat/inode.c | 159 +++++++++++++++++++++++------------------------
1 file changed, 77 insertions(+), 82 deletions(-)

diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index a23677de4544..b22c02d6000f 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -274,9 +274,11 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
sector_t last_block;
sector_t phys = 0;
sector_t valid_blks;
+ loff_t i_size;

mutex_lock(&sbi->s_lock);
- last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size_read(inode), sb);
+ i_size = i_size_read(inode);
+ last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size, sb);
if (iblock >= last_block && !create)
goto done;

@@ -305,102 +307,95 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
if (buffer_delay(bh_result))
clear_buffer_delay(bh_result);

- if (create) {
+ /*
+ * In most cases, we just need to set bh_result to mapped, unmapped
+ * or new status as follows:
+ * 1. i_size == valid_size
+ * 2. write case (create == 1)
+ * 3. direct_read (!bh_result->b_folio)
+ * -> the unwritten part will be zeroed in exfat_direct_IO()
+ *
+ * Otherwise, in the case of buffered read, it is necessary to take
+ * care the last nested block if valid_size is not equal to i_size.
+ */
+ if (i_size == ei->valid_size || create || !bh_result->b_folio)
valid_blks = EXFAT_B_TO_BLK_ROUND_UP(ei->valid_size, sb);
+ else
+ valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb);

- if (iblock + max_blocks < valid_blks) {
- /* The range has been written, map it */
- goto done;
- } else if (iblock < valid_blks) {
- /*
- * The range has been partially written,
- * map the written part.
- */
- max_blocks = valid_blks - iblock;
- goto done;
- }
+ /* The range has been fully written, map it */
+ if (iblock + max_blocks < valid_blks)
+ goto done;

- /* The area has not been written, map and mark as new. */
- set_buffer_new(bh_result);
+ /* The range has been partially written, map the written part */
+ if (iblock < valid_blks) {
+ max_blocks = valid_blks - iblock;
+ goto done;
+ }

+ /* The area has not been written, map and mark as new for create case */
+ if (create) {
+ set_buffer_new(bh_result);
ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb);
mark_inode_dirty(inode);
- } else {
- valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb);
+ goto done;
+ }

- if (iblock + max_blocks < valid_blks) {
- /* The range has been written, map it */
- goto done;
- } else if (iblock < valid_blks) {
- /*
- * The area has been partially written,
- * map the written part.
- */
- max_blocks = valid_blks - iblock;
+ /*
+ * The area has just one block partially written.
+ * In that case, we should read and fill the unwritten part of
+ * a block with zero.
+ */
+ if (bh_result->b_folio && iblock == valid_blks &&
+ (ei->valid_size & (sb->s_blocksize - 1))) {
+ loff_t size, pos;
+ void *addr;
+
+ max_blocks = 1;
+
+ /*
+ * No buffer_head is allocated.
+ * (1) bmap: It's enough to set blocknr without I/O.
+ * (2) read: The unwritten part should be filled with zero.
+ * If a folio does not have any buffers,
+ * let's returns -EAGAIN to fallback to
+ * block_read_full_folio() for per-bh IO.
+ */
+ if (!folio_buffers(bh_result->b_folio)) {
+ err = -EAGAIN;
goto done;
- } else if (iblock == valid_blks &&
- (ei->valid_size & (sb->s_blocksize - 1))) {
- /*
- * The block has been partially written,
- * zero the unwritten part and map the block.
- */
- loff_t size, pos;
- void *addr;
-
- max_blocks = 1;
-
- /*
- * For direct read, the unwritten part will be zeroed in
- * exfat_direct_IO()
- */
- if (!bh_result->b_folio)
- goto done;
-
- /*
- * No buffer_head is allocated.
- * (1) bmap: It's enough to fill bh_result without I/O.
- * (2) read: The unwritten part should be filled with 0
- * If a folio does not have any buffers,
- * let's returns -EAGAIN to fallback to
- * per-bh IO like block_read_full_folio().
- */
- if (!folio_buffers(bh_result->b_folio)) {
- err = -EAGAIN;
- goto done;
- }
+ }

- pos = EXFAT_BLK_TO_B(iblock, sb);
- size = ei->valid_size - pos;
- addr = folio_address(bh_result->b_folio) +
- offset_in_folio(bh_result->b_folio, pos);
+ pos = EXFAT_BLK_TO_B(iblock, sb);
+ size = ei->valid_size - pos;
+ addr = folio_address(bh_result->b_folio) +
+ offset_in_folio(bh_result->b_folio, pos);

- /* Check if bh->b_data points to proper addr in folio */
- if (bh_result->b_data != addr) {
- exfat_fs_error_ratelimit(sb,
+ /* Check if bh->b_data points to proper addr in folio */
+ if (bh_result->b_data != addr) {
+ exfat_fs_error_ratelimit(sb,
"b_data(%p) != folio_addr(%p)",
bh_result->b_data, addr);
- err = -EINVAL;
- goto done;
- }
-
- /* Read a block */
- err = bh_read(bh_result, 0);
- if (err < 0)
- goto done;
+ err = -EINVAL;
+ goto done;
+ }

- /* Zero unwritten part of a block */
- memset(bh_result->b_data + size, 0,
- bh_result->b_size - size);
+ /* Read a block */
+ err = bh_read(bh_result, 0);
+ if (err < 0)
+ goto done;

- err = 0;
- } else {
- /*
- * The range has not been written, clear the mapped flag
- * to only zero the cache and do not read from disk.
- */
- clear_buffer_mapped(bh_result);
- }
+ /* Zero unwritten part of a block */
+ memset(bh_result->b_data + size, 0, bh_result->b_size - size);
+ err = 0;
+ goto done;
}
+
+ /*
+ * The area has not been written, clear mapped for read/bmap cases.
+ * If so, it will be filled with zero without reading from disk.
+ */
+ clear_buffer_mapped(bh_result);
done:
bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb);
if (err < 0)
--
2.25.1