unsigned int fs_block_size = i_blocksize(inode), pad;I wonder, should iomap be nice and not require filesystems to set
+ u64 io_block_size = iomap->io_block_size;
io_block_size themselves unless they really need it?
Anyone working on
an iomap port while this patchset is in progress may or may not remember
to add this bit if they get their port merged after atomicwrites is
merged; and you might not remember to prevent the bitrot if the reverse
order happens.
u64 io_block_size = iomap->io_block_size ?: i_blocksize(inode);
loff_t length = iomap_length(iter);Please don't opencode this twice.
loff_t pos = iter->pos;
blk_opf_t bio_opf;
@@ -287,6 +287,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
int nr_pages, ret = 0;
size_t copied = 0;
size_t orig_count;
+ unsigned int pad;
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
!bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
@@ -355,7 +356,14 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
if (need_zeroout) {
/* zero out from the start of the block to the write offset */
- pad = pos & (fs_block_size - 1);
+ if (is_power_of_2(io_block_size)) {
+ pad = pos & (io_block_size - 1);
+ } else {
+ loff_t _pos = pos;
+
+ pad = do_div(_pos, io_block_size);
+ }
static unsigned int offset_in_block(loff_t pos, u64 blocksize)
{
if (likely(is_power_of_2(blocksize)))
return pos & (blocksize - 1);
return do_div(pos, blocksize);
}
pad = offset_in_block(pos, io_block_size);
if (pad)
...
Also, what happens if pos-pad points to a byte before the mapping?
+What if pos + io_block_size - pad points to a byte after the end of the
if (pad)
iomap_dio_zero(iter, dio, pos - pad, pad);
}
@@ -429,9 +437,16 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
if (need_zeroout ||
((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
/* zero out from the end of the write to the end of the block */
- pad = pos & (fs_block_size - 1);
+ if (is_power_of_2(io_block_size)) {
+ pad = pos & (io_block_size - 1);
+ } else {
+ loff_t _pos = pos;
+
+ pad = do_div(_pos, io_block_size);
+ }
+
if (pad)
- iomap_dio_zero(iter, dio, pos, fs_block_size - pad);
+ iomap_dio_zero(iter, dio, pos, io_block_size - pad);
mapping?
}size in bytes?
out:
/* Undo iter limitation to current extent */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 378342673925..ecb4cae88248 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -127,6 +127,7 @@ xfs_bmbt_to_iomap(
}
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
+ iomap->io_block_size = i_blocksize(VFS_I(ip));
if (mapping_flags & IOMAP_DAX)
iomap->dax_dev = target->bt_daxdev;
else
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 3b103715acc9..bf2cc4bee309 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -50,6 +50,7 @@ static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset;
iomap->length = isize - iomap->offset;
}
+ iomap->io_block_size = i_blocksize(inode);
mutex_unlock(&zi->i_truncate_mutex);
trace_zonefs_iomap_begin(inode, iomap);
@@ -99,6 +100,7 @@ static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
iomap->type = IOMAP_MAPPED;
iomap->length = isize - iomap->offset;
}
+ iomap->io_block_size = i_blocksize(inode);
mutex_unlock(&zi->i_truncate_mutex);
trace_zonefs_iomap_begin(inode, iomap);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 6fc1c858013d..d63a35b77907 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -103,6 +103,8 @@ struct iomap {
void *private; /* filesystem private */
const struct iomap_folio_ops *folio_ops;
u64 validity_cookie; /* used with .iomap_valid() */
+ /* io block zeroing size, not necessarily a power-of-2 */
I'm not sure what "io block zeroing" means.
What are you trying to
accomplish here? Let's say the fsblock size is 4k and the allocation
unit (aka the atomic write size) is 16k.
Userspace wants a direct write
to file offset 8192-12287, and that space is unwritten:
uuuu
^
Currently we'd just write the 4k and run the io completion handler, so
the final state is:
uuWu
Instead, if the fs sets io_block_size to 16384, does this direct write
now amplify into a full 16k write?
With the end result being:
ZZWZ
only.... I don't see the unwritten areas being converted to written?
I guess for an atomic write you'd require the user to write 0-16383?
<still confused about why we need to do this, maybe i'll figure it out
as I go along>