Re: [PATCH v4 2/2] erofs: set block size to the on-disk block size

From: Jingbo Xu
Date: Mon Mar 06 2023 - 04:53:08 EST




On 3/6/23 3:31 PM, Yue Hu wrote:
> On Thu, 2 Mar 2023 22:39:15 +0800
> Jingbo Xu <jefflexu@xxxxxxxxxxxxxxxxx> wrote:
>
>> Set the block size to that specified in on-disk superblock.
>>
>> Also remove the hard constraint of PAGE_SIZE block size for the
>> uncompressed device backend. This constraint is temporarily remained
>> for compressed device and fscache backend, as there is more work needed
>> to handle the condition where the block size is not equal to PAGE_SIZE.
>>
>> It is worth noting that the on-disk block size is read prior to
>> erofs_superblock_csum_verify(), as the read block size is needed in the
>> latter.
>>
>> Besides, later we are going to make erofs refer to tar data blobs (which
>> is 512-byte aligned) for OCI containers, where the block size is 512
>> bytes. In this case, the 512-byte block size may not be adequate for a
>> directory to contain enough dirents. To fix this, we are also going to
>> introduce directory block size independent on the block size.
>>
>> Due to we have already supported block size smaller than PAGE_SIZE now,
>> disable all these images with such separated directory block size until
>> we supported this feature later.
>>
>> Signed-off-by: Jingbo Xu <jefflexu@xxxxxxxxxxxxxxxxx>
>> ---
>> fs/erofs/erofs_fs.h | 5 +++--
>> fs/erofs/inode.c | 3 ++-
>> fs/erofs/internal.h | 11 ++---------
>> fs/erofs/super.c | 46 +++++++++++++++++++++++++++++----------------
>> 4 files changed, 37 insertions(+), 28 deletions(-)
>>
>> diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
>> index dbcd24371002..ac9c2ff3aa66 100644
>> --- a/fs/erofs/erofs_fs.h
>> +++ b/fs/erofs/erofs_fs.h
>> @@ -53,7 +53,7 @@ struct erofs_super_block {
>> __le32 magic; /* file system magic number */
>> __le32 checksum; /* crc32c(super_block) */
>> __le32 feature_compat;
>> - __u8 blkszbits; /* support block_size == PAGE_SIZE only */
>> + __u8 blkszbits; /* filesystem block size */
>
> /* filesystem block size in bit shift */
>
>> __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */
>>
>> __le16 root_nid; /* nid of root directory */
>> @@ -75,7 +75,8 @@ struct erofs_super_block {
>> } __packed u1;
>> __le16 extra_devices; /* # of devices besides the primary device */
>> __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */
>> - __u8 reserved[6];
>> + __u8 dirblkbits; /* directory block size */
>
> Ditto.
>
>> + __u8 reserved[5];
>> __le64 packed_nid; /* nid of the special packed inode */
>> __u8 reserved2[24];
>> };
>> diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
>> index de26dac4e07e..0e6ff8a98c68 100644
>> --- a/fs/erofs/inode.c
>> +++ b/fs/erofs/inode.c
>> @@ -291,7 +291,8 @@ static int erofs_fill_inode(struct inode *inode)
>> }
>>
>> if (erofs_inode_is_data_compressed(vi->datalayout)) {
>> - if (!erofs_is_fscache_mode(inode->i_sb))
>> + if (!erofs_is_fscache_mode(inode->i_sb) &&
>> + inode->i_sb->s_blocksize_bits == PAGE_SHIFT)
>> err = z_erofs_fill_inode(inode);
>> else
>> err = -EOPNOTSUPP;
>> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
>> index d8019d835405..f99ba4061a3b 100644
>> --- a/fs/erofs/internal.h
>> +++ b/fs/erofs/internal.h
>> @@ -148,7 +148,8 @@ struct erofs_sb_info {
>> u16 device_id_mask; /* valid bits of device id to be used */
>>
>> unsigned char islotbits; /* inode slot unit size in bit shift */
>> - unsigned char blkszbits;
>> + unsigned char blkszbits; /* filesystem block size */
>
> Ditto.
>
>> + unsigned char dirblkbits; /* directory block size */
>
> Ditto.
>
>>
>> u32 sb_size; /* total superblock size */
>> u32 build_time_nsec;
>> @@ -240,14 +241,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
>> VAL != EROFS_LOCKED_MAGIC);
>> }
>>
>> -/* we strictly follow PAGE_SIZE and no buffer head yet */
>> -#define LOG_BLOCK_SIZE PAGE_SHIFT
>> -#define EROFS_BLKSIZ (1 << LOG_BLOCK_SIZE)
>> -
>> -#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ)
>> -#error erofs cannot be used in this platform
>> -#endif
>> -
>> enum erofs_kmap_type {
>> EROFS_NO_KMAP, /* don't map the buffer */
>> EROFS_KMAP, /* use kmap_local_page() to map the buffer */
>> diff --git a/fs/erofs/super.c b/fs/erofs/super.c
>> index c97615c96ef8..d5fa0f582cbe 100644
>> --- a/fs/erofs/super.c
>> +++ b/fs/erofs/super.c
>> @@ -330,7 +330,6 @@ static int erofs_read_superblock(struct super_block *sb)
>> struct erofs_sb_info *sbi;
>> struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
>> struct erofs_super_block *dsb;
>> - unsigned int blkszbits;
>> void *data;
>> int ret;
>>
>> @@ -349,6 +348,17 @@ static int erofs_read_superblock(struct super_block *sb)
>> goto out;
>> }
>>
>> + sbi->blkszbits = dsb->blkszbits;
>> + sbi->dirblkbits = dsb->dirblkbits;
>> + if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) {
>> + erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits);
>> + goto out;
>> + }
>> + if (sbi->dirblkbits) {
>> + erofs_err(sb, "dirblkbits %u isn't supported", sbi->dirblkbits);
>> + goto out;
>> + }
>> +
>> sbi->feature_compat = le32_to_cpu(dsb->feature_compat);
>> if (erofs_sb_has_sb_chksum(sbi)) {
>> ret = erofs_superblock_csum_verify(sb, data);
>> @@ -357,19 +367,11 @@ static int erofs_read_superblock(struct super_block *sb)
>> }
>>
>> ret = -EINVAL;
>> - blkszbits = dsb->blkszbits;
>> - /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
>> - if (blkszbits != LOG_BLOCK_SIZE) {
>> - erofs_err(sb, "blkszbits %u isn't supported on this platform",
>> - blkszbits);
>> - goto out;
>> - }
>> -
>> if (!check_layout_compatibility(sb, dsb))
>> goto out;
>>
>> sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE;
>> - if (sbi->sb_size > EROFS_BLKSIZ) {
>> + if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) {
>> erofs_err(sb, "invalid sb_extslots %u (more than a fs block)",
>> sbi->sb_size);
>> goto out;
>> @@ -736,8 +738,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
>>
>> sbi->blkszbits = PAGE_SHIFT;
>> if (erofs_is_fscache_mode(sb)) {
>> - sb->s_blocksize = EROFS_BLKSIZ;
>> - sb->s_blocksize_bits = LOG_BLOCK_SIZE;
>> + sb->s_blocksize = PAGE_SIZE;
>> + sb->s_blocksize_bits = PAGE_SHIFT;
>>
>> err = erofs_fscache_register_fs(sb);
>> if (err)
>> @@ -747,8 +749,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
>> if (err)
>> return err;
>> } else {
>> - if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) {
>> - erofs_err(sb, "failed to set erofs blksize");
>> + if (!sb_set_blocksize(sb, PAGE_SIZE)) {
>> + errorfc(fc, "failed to set initial blksize");
>> return -EINVAL;
>> }
>>
>> @@ -761,12 +763,24 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
>> if (err)
>> return err;
>>
>> - if (test_opt(&sbi->opt, DAX_ALWAYS)) {
>> - BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE);
>> + if (sb->s_blocksize_bits != sbi->blkszbits) {
>> + if (erofs_is_fscache_mode(sb)) {
>> + errorfc(fc, "unsupported blksize for fscache mode");
>> + return -EINVAL;
>> + }
>> + if (!sb_set_blocksize(sb, 1 << sbi->blkszbits)) {
>> + errorfc(fc, "failed to set erofs blksize");
>> + return -EINVAL;
>> + }
>> + }
>>
>> + if (test_opt(&sbi->opt, DAX_ALWAYS)) {
>> if (!sbi->dax_dev) {
>> errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
>> clear_opt(&sbi->opt, DAX_ALWAYS);
>> + } else if (sbi->blkszbits != PAGE_SHIFT) {
>> + errorfc(fc, "unsupported blocksize for DAX");
>> + clear_opt(&sbi->opt, DAX_ALWAYS);
>> }
>> }
>>

Thanks for the review. I will update all these in the next version.

--
Thanks,
Jingbo