Re: [PATCH 05/27] ext4: refactor ext4_zero_range()

From: Jan Kara
Date: Wed Dec 04 2024 - 07:31:54 EST


On Tue 22-10-24 19:10:36, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@xxxxxxxxxx>
>
> The current implementation of ext4_zero_range() contains complex
> position calculations and stale error tags. To improve the code's
> clarity and maintainability, it is essential to clean up the code and
> improve its readability, this can be achieved by: a) simplifying and
> renaming variables, making the style the same as ext4_punch_hole(); b)
> eliminating unnecessary position calculations, writing back all data in
> data=journal mode, and drop page cache from the original offset to the
> end, rather than using aligned blocks; c) renaming the stale out_mutex
> tags.
>
> Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx>

...

> - goto out_mutex;
> -
> - /* Preallocate the range including the unaligned edges */
> - if (partial_begin || partial_end) {
> - ret = ext4_alloc_file_blocks(file,
> - round_down(offset, 1 << blkbits) >> blkbits,
> - (round_up((offset + len), 1 << blkbits) -
> - round_down(offset, 1 << blkbits)) >> blkbits,
> - new_size, flags);
> - if (ret)
> - goto out_mutex;
> -
> - }

So I think we should keep this first ext4_alloc_file_blocks() call before
we truncate the page cache. Otherwise if ext4_alloc_file_blocks() fails due
to ENOSPC, we have already lost the dirty data originally in the zeroed
range. All the other failure modes are kind of catastrophic anyway, so they
are fine after dropping the page cache. But this is can be quite common and
should be handled more gracefully.

Honza

> -
> - /* Zero range excluding the unaligned edges */
> - if (max_blocks > 0) {
> - flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
> - EXT4_EX_NOCACHE);
> + goto out;
>
> - /*
> - * Prevent page faults from reinstantiating pages we have
> - * released from page cache.
> - */
> - filemap_invalidate_lock(mapping);
> + /*
> + * Prevent page faults from reinstantiating pages we have released
> + * from page cache.
> + */
> + filemap_invalidate_lock(mapping);
>
> - ret = ext4_break_layouts(inode);
> - if (ret) {
> - filemap_invalidate_unlock(mapping);
> - goto out_mutex;
> - }
> + ret = ext4_break_layouts(inode);
> + if (ret)
> + goto out_invalidate_lock;
>
> + /*
> + * For journalled data we need to write (and checkpoint) pages before
> + * discarding page cache to avoid inconsitent data on disk in case of
> + * crash before zeroing trans is committed.
> + */
> + if (ext4_should_journal_data(inode)) {
> + ret = filemap_write_and_wait_range(mapping, offset, end - 1);
> + } else {
> ret = ext4_update_disksize_before_punch(inode, offset, len);
> - if (ret) {
> - filemap_invalidate_unlock(mapping);
> - goto out_mutex;
> - }
> + ext4_truncate_folios_range(inode, offset, end);
> + }
> + if (ret)
> + goto out_invalidate_lock;
>
> - /*
> - * For journalled data we need to write (and checkpoint) pages
> - * before discarding page cache to avoid inconsitent data on
> - * disk in case of crash before zeroing trans is committed.
> - */
> - if (ext4_should_journal_data(inode)) {
> - ret = filemap_write_and_wait_range(mapping, start,
> - end - 1);
> - if (ret) {
> - filemap_invalidate_unlock(mapping);
> - goto out_mutex;
> - }
> - }
> + /* Now release the pages and zero block aligned part of pages */
> + truncate_pagecache_range(inode, offset, end - 1);
>
> - /* Now release the pages and zero block aligned part of pages */
> - ext4_truncate_folios_range(inode, start, end);
> - truncate_pagecache_range(inode, start, end - 1);
> + flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
> + /* Preallocate the range including the unaligned edges */
> + if (offset & (blocksize - 1) || end & (blocksize - 1)) {
> + ext4_lblk_t alloc_lblk = offset >> blkbits;
> + ext4_lblk_t len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits);
>
> - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
> - flags);
> - filemap_invalidate_unlock(mapping);
> + ret = ext4_alloc_file_blocks(file, alloc_lblk, len_lblk,
> + new_size, flags);
> if (ret)
> - goto out_mutex;
> + goto out_invalidate_lock;
> }
> - if (!partial_begin && !partial_end)
> - goto out_mutex;
> +
> + /* Zero range excluding the unaligned edges */
> + start_lblk = round_up(offset, blocksize) >> blkbits;
> + end_lblk = end >> blkbits;
> + if (end_lblk > start_lblk) {
> + ext4_lblk_t zero_blks = end_lblk - start_lblk;
> +
> + flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE);
> + ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
> + new_size, flags);
> + if (ret)
> + goto out_invalidate_lock;
> + }
> + /* Finish zeroing out if it doesn't contain partial block */
> + if (!(offset & (blocksize - 1)) && !(end & (blocksize - 1)))
> + goto out_invalidate_lock;
>
> /*
> * In worst case we have to writeout two nonadjacent unwritten
> @@ -4700,25 +4665,29 @@ static long ext4_zero_range(struct file *file, loff_t offset,
> if (IS_ERR(handle)) {
> ret = PTR_ERR(handle);
> ext4_std_error(inode->i_sb, ret);
> - goto out_mutex;
> + goto out_invalidate_lock;
> }
>
> + /* Zero out partial block at the edges of the range */
> + ret = ext4_zero_partial_blocks(handle, inode, offset, len);
> + if (ret)
> + goto out_handle;
> +
> if (new_size)
> ext4_update_inode_size(inode, new_size);
> ret = ext4_mark_inode_dirty(handle, inode);
> if (unlikely(ret))
> goto out_handle;
> - /* Zero out partial block at the edges of the range */
> - ret = ext4_zero_partial_blocks(handle, inode, offset, len);
> - if (ret >= 0)
> - ext4_update_inode_fsync_trans(handle, inode, 1);
>
> + ext4_update_inode_fsync_trans(handle, inode, 1);
> if (file->f_flags & O_SYNC)
> ext4_handle_sync(handle);
>
> out_handle:
> ext4_journal_stop(handle);
> -out_mutex:
> +out_invalidate_lock:
> + filemap_invalidate_unlock(mapping);
> +out:
> inode_unlock(inode);
> return ret;
> }
> --
> 2.46.1
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR