Re: [PATCH RFC v3 2/2] block: enable RWF_DONTCACHE for block devices

From: Jan Kara

Date: Mon Mar 02 2026 - 04:17:12 EST


On Fri 27-02-26 11:41:08, Tal Zussman wrote:
> Block device buffered reads and writes already pass through
> filemap_read() and iomap_file_buffered_write() respectively, both of
> which handle IOCB_DONTCACHE. Enable RWF_DONTCACHE for block device files
> by setting FOP_DONTCACHE in def_blk_fops.
>
> For CONFIG_BUFFER_HEAD paths, add block_write_begin_iocb() which threads
> the kiocb through so that buffer_head-based I/O can use DONTCACHE
> behavior. The existing block_write_begin() is preserved as a wrapper
> that passes a NULL iocb.
>
> This support is useful for databases that operate on raw block devices,
> among other userspace applications.
>
> Signed-off-by: Tal Zussman <tz2294@xxxxxxxxxxxx>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@xxxxxxx>

Honza

> ---
> block/fops.c | 5 +++--
> fs/buffer.c | 19 ++++++++++++++++---
> include/linux/buffer_head.h | 3 +++
> 3 files changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/block/fops.c b/block/fops.c
> index 4d32785b31d9..d8165f6ba71c 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -505,7 +505,8 @@ static int blkdev_write_begin(const struct kiocb *iocb,
> unsigned len, struct folio **foliop,
> void **fsdata)
> {
> - return block_write_begin(mapping, pos, len, foliop, blkdev_get_block);
> + return block_write_begin_iocb(iocb, mapping, pos, len, foliop,
> + blkdev_get_block);
> }
>
> static int blkdev_write_end(const struct kiocb *iocb,
> @@ -967,7 +968,7 @@ const struct file_operations def_blk_fops = {
> .splice_write = iter_file_splice_write,
> .fallocate = blkdev_fallocate,
> .uring_cmd = blkdev_uring_cmd,
> - .fop_flags = FOP_BUFFER_RASYNC,
> + .fop_flags = FOP_BUFFER_RASYNC | FOP_DONTCACHE,
> };
>
> static __init int blkdev_init(void)
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 838c0c571022..18f1d128bb19 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -2241,14 +2241,19 @@ EXPORT_SYMBOL(block_commit_write);
> *
> * The filesystem needs to handle block truncation upon failure.
> */
> -int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
> +int block_write_begin_iocb(const struct kiocb *iocb,
> + struct address_space *mapping, loff_t pos, unsigned len,
> struct folio **foliop, get_block_t *get_block)
> {
> pgoff_t index = pos >> PAGE_SHIFT;
> + fgf_t fgp_flags = FGP_WRITEBEGIN;
> struct folio *folio;
> int status;
>
> - folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
> + if (iocb && iocb->ki_flags & IOCB_DONTCACHE)
> + fgp_flags |= FGP_DONTCACHE;
> +
> + folio = __filemap_get_folio(mapping, index, fgp_flags,
> mapping_gfp_mask(mapping));
> if (IS_ERR(folio))
> return PTR_ERR(folio);
> @@ -2263,6 +2268,13 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
> *foliop = folio;
> return status;
> }
> +
> +int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
> + struct folio **foliop, get_block_t *get_block)
> +{
> + return block_write_begin_iocb(NULL, mapping, pos, len, foliop,
> + get_block);
> +}
> EXPORT_SYMBOL(block_write_begin);
>
> int block_write_end(loff_t pos, unsigned len, unsigned copied,
> @@ -2591,7 +2603,8 @@ int cont_write_begin(const struct kiocb *iocb, struct address_space *mapping,
> (*bytes)++;
> }
>
> - return block_write_begin(mapping, pos, len, foliop, get_block);
> + return block_write_begin_iocb(iocb, mapping, pos, len, foliop,
> + get_block);
> }
> EXPORT_SYMBOL(cont_write_begin);
>
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index b16b88bfbc3e..ddf88ce290f2 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -260,6 +260,9 @@ int block_read_full_folio(struct folio *, get_block_t *);
> bool block_is_partially_uptodate(struct folio *, size_t from, size_t count);
> int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
> struct folio **foliop, get_block_t *get_block);
> +int block_write_begin_iocb(const struct kiocb *iocb,
> + struct address_space *mapping, loff_t pos, unsigned len,
> + struct folio **foliop, get_block_t *get_block);
> int __block_write_begin(struct folio *folio, loff_t pos, unsigned len,
> get_block_t *get_block);
> int block_write_end(loff_t pos, unsigned len, unsigned copied, struct folio *);
>
> --
> 2.39.5
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR