Re: [PATCH RFC v5 10/10] iomap: Rename ATOMIC flags again
From: Christoph Hellwig
Date: Thu Mar 13 2025 - 04:18:39 EST
Something like this (untestested):
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 5299f70428ef..3fa21445906a 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -312,27 +312,20 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
}
/*
- * Figure out the bio's operation flags from the dio request, the
- * mapping, and whether or not we want FUA. Note that we can end up
- * clearing the WRITE_THROUGH flag in the dio request.
+ * Use a FUA write if we need datasync semantics and this is a pure data I/O
+ * that doesn't require any metadata updates (including after I/O completion
+ * such as unwritten extent conversion) and the underlying device either
+ * doesn't have a volatile write cache or supports FUA.
+ * This allows us to avoid cache flushes on I/O completion.
*/
-static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
- const struct iomap *iomap, bool use_fua, bool atomic_hw)
+static inline bool iomap_dio_can_use_fua(const struct iomap *iomap,
+ struct iomap_dio *dio)
{
- blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
-
- if (!(dio->flags & IOMAP_DIO_WRITE))
- return REQ_OP_READ;
-
- opflags |= REQ_OP_WRITE;
- if (use_fua)
- opflags |= REQ_FUA;
- else
- dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
- if (atomic_hw)
- opflags |= REQ_ATOMIC;
-
- return opflags;
+ if (iomap->flags & (IOMAP_F_SHARED | IOMAP_F_DIRTY))
+ return false;
+ if (!(dio->flags & IOMAP_DIO_WRITE_THROUGH))
+ return false;
+ return !bdev_write_cache(iomap->bdev) || bdev_fua(iomap->bdev);
}
static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
@@ -343,49 +336,53 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
bool atomic_hw = iter->flags & IOMAP_ATOMIC_HW;
const loff_t length = iomap_length(iter);
loff_t pos = iter->pos;
- blk_opf_t bio_opf;
+ blk_opf_t bio_opf = REQ_SYNC | REQ_IDLE;
struct bio *bio;
bool need_zeroout = false;
- bool use_fua = false;
int nr_pages, ret = 0;
u64 copied = 0;
size_t orig_count;
- if (atomic_hw && length != iter->len)
- return -EINVAL;
-
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
!bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
return -EINVAL;
- if (iomap->type == IOMAP_UNWRITTEN) {
- dio->flags |= IOMAP_DIO_UNWRITTEN;
- need_zeroout = true;
- }
+ if (dio->flags & IOMAP_DIO_WRITE) {
+ bio_opf |= REQ_OP_WRITE;
+
+ if (atomic_hw) {
+ if (length != iter->len)
+ return -EINVAL;
+ bio_opf |= REQ_ATOMIC;
+ }
- if (iomap->flags & IOMAP_F_SHARED)
- dio->flags |= IOMAP_DIO_COW;
+ if (iomap->type == IOMAP_UNWRITTEN) {
+ dio->flags |= IOMAP_DIO_UNWRITTEN;
+ need_zeroout = true;
+ }
- if (iomap->flags & IOMAP_F_NEW) {
- need_zeroout = true;
- } else if (iomap->type == IOMAP_MAPPED) {
- /*
- * Use a FUA write if we need datasync semantics, this is a pure
- * data IO that doesn't require any metadata updates (including
- * after IO completion such as unwritten extent conversion) and
- * the underlying device either supports FUA or doesn't have
- * a volatile write cache. This allows us to avoid cache flushes
- * on IO completion. If we can't use writethrough and need to
- * sync, disable in-task completions as dio completion will
- * need to call generic_write_sync() which will do a blocking
- * fsync / cache flush call.
- */
- if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
- (dio->flags & IOMAP_DIO_WRITE_THROUGH) &&
- (bdev_fua(iomap->bdev) || !bdev_write_cache(iomap->bdev)))
- use_fua = true;
- else if (dio->flags & IOMAP_DIO_NEED_SYNC)
- dio->flags &= ~IOMAP_DIO_CALLER_COMP;
+ if (iomap->flags & IOMAP_F_SHARED)
+ dio->flags |= IOMAP_DIO_COW;
+
+ if (iomap->flags & IOMAP_F_NEW) {
+ need_zeroout = true;
+ } else if (iomap->type == IOMAP_MAPPED) {
+ if (iomap_dio_can_use_fua(iomap, dio)) {
+ bio_opf |= REQ_FUA;
+ } else {
+ dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
+ /*
+ * Disable in-task completions if we can't use
+ * writethrough and need to sync as the I/O
+ * completion handler has to force a (blocking)
+ * cache flush.
+ */
+ if (dio->flags & IOMAP_DIO_NEED_SYNC)
+ dio->flags &= ~IOMAP_DIO_CALLER_COMP;
+ }
+ }
+ } else {
+ bio_opf |= REQ_OP_READ;
}
/*
@@ -407,7 +404,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
* during completion processing.
*/
if (need_zeroout ||
- ((dio->flags & IOMAP_DIO_NEED_SYNC) && !use_fua) ||
+ ((dio->flags & IOMAP_DIO_NEED_SYNC) && !(bio_opf & REQ_FUA)) ||
((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode)))
dio->flags &= ~IOMAP_DIO_CALLER_COMP;
@@ -428,8 +425,6 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
goto out;
}
- bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic_hw);
-
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
do {
size_t n;