Re: [PATCH 1/2] erofs: introduce chunk-based file on-disk format

From: Liu Bo
Date: Wed Aug 18 2021 - 18:28:14 EST


On Wed, Aug 18, 2021 at 03:07:12PM +0800, Gao Xiang wrote:
> Currently, uncompressed data except for tail-packing inline is
> consecutive on disk.
>
> In order to support chunk-based data deduplication, add a new
> corresponding inode data layout.
>
> In the future, the data source of chunks can be either (un)compressed.
>
> Signed-off-by: Gao Xiang <hsiangkao@xxxxxxxxxxxxxxxxx>
> ---
> Documentation/filesystems/erofs.rst | 16 ++++++++++--
> fs/erofs/erofs_fs.h | 40 +++++++++++++++++++++++++++--
> 2 files changed, 52 insertions(+), 4 deletions(-)
>
> diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
> index 868e3972227f..b46d0fc46eb6 100644
> --- a/Documentation/filesystems/erofs.rst
> +++ b/Documentation/filesystems/erofs.rst
> @@ -156,13 +156,14 @@ may not. All metadatas can be now observed in two different spaces (views):
>
> Xattrs, extents, data inline are followed by the corresponding inode with
> proper alignment, and they could be optional for different data mappings.
> - _currently_ total 4 valid data mappings are supported:
> + _currently_ total 5 data layouts are supported:
>
> == ====================================================================
> 0 flat file data without data inline (no extent);
> 1 fixed-sized output data compression (with non-compacted indexes);
> 2 flat file data with tail packing data inline (no extent);
> - 3 fixed-sized output data compression (with compacted indexes, v5.3+).
> + 3 fixed-sized output data compression (with compacted indexes, v5.3+);
> + 4 chunk-based file (v5.15+).
> == ====================================================================
>
> The size of the optional xattrs is indicated by i_xattr_count in inode
> @@ -213,6 +214,17 @@ Note that apart from the offset of the first filename, nameoff0 also indicates
> the total number of directory entries in this block since it is no need to
> introduce another on-disk field at all.
>
> +Chunk-based file
> +----------------
> +In order to support chunk-based file deduplication, a new inode data layout has
> +been supported since Linux v5.15: Files are split in equal-sized data chunks
> +with ``extents`` area of the inode metadata indicating how to get the chunk
> +data: these can be simply as a 4-byte block address array or in the 8-byte
> +chunk index form (see struct erofs_inode_chunk_index in erofs_fs.h for more
> +details.)
> +
> +By the way, chunk-based files are all uncompressed for now.
> +
> Data compression
> ----------------
> EROFS implements LZ4 fixed-sized output compression which generates fixed-sized
> diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
> index 0f8da74570b4..6210fe434930 100644
> --- a/fs/erofs/erofs_fs.h
> +++ b/fs/erofs/erofs_fs.h
> @@ -4,6 +4,7 @@
> *
> * Copyright (C) 2017-2018 HUAWEI, Inc.
> * https://www.huawei.com/
> + * Copyright (C) 2021, Alibaba Cloud
> */
> #ifndef __EROFS_FS_H
> #define __EROFS_FS_H
> @@ -19,10 +20,12 @@
> #define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
> #define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
> #define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
> +#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
> #define EROFS_ALL_FEATURE_INCOMPAT \
> (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
> EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
> - EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER)
> + EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
> + EROFS_FEATURE_INCOMPAT_CHUNKED_FILE)
>
> #define EROFS_SB_EXTSLOT_SIZE 16
>
> @@ -64,13 +67,16 @@ struct erofs_super_block {
> * inode, [xattrs], last_inline_data, ... | ... | no-holed data
> * 3 - inode compression D:
> * inode, [xattrs], map_header, extents ... | ...
> - * 4~7 - reserved
> + * 4 - inode chunk-based E:
> + * inode, [xattrs], chunk indexes ... | ...
> + * 5~7 - reserved
> */
> enum {
> EROFS_INODE_FLAT_PLAIN = 0,
> EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1,
> EROFS_INODE_FLAT_INLINE = 2,
> EROFS_INODE_FLAT_COMPRESSION = 3,
> + EROFS_INODE_CHUNK_BASED = 4,
> EROFS_INODE_DATALAYOUT_MAX
> };
>
> @@ -90,6 +96,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
> #define EROFS_I_ALL \
> ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
>
> +/* indicate chunk blkbits, thus `chunksize = blocksize << chunk blkbits' */

A typo in the quotation marks. (`chunksize = ) should be ('chunksize =)

Otherwise it looks good.

Reviewed-by: Liu Bo <bo.liu@xxxxxxxxxxxxxxxxx>

thanks,
liubo

> +#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F
> +/* with chunk indexes or just a 4-byte blkaddr array */
> +#define EROFS_CHUNK_FORMAT_INDEXES 0x0020
> +
> +#define EROFS_CHUNK_FORMAT_ALL \
> + (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
> +
> +struct erofs_inode_chunk_info {
> + __le16 format; /* chunk blkbits */
> + __le16 reserved;
> +};
> +
> /* 32-byte reduced form of an ondisk inode */
> struct erofs_inode_compact {
> __le16 i_format; /* inode format hints */
> @@ -107,6 +126,9 @@ struct erofs_inode_compact {
>
> /* for device files, used to indicate old/new device # */
> __le32 rdev;
> +
> + /* for chunk-based files, it contains the summary info */
> + struct erofs_inode_chunk_info c;
> } i_u;
> __le32 i_ino; /* only used for 32-bit stat compatibility */
> __le16 i_uid;
> @@ -135,6 +157,9 @@ struct erofs_inode_extended {
>
> /* for device files, used to indicate old/new device # */
> __le32 rdev;
> +
> + /* for chunk-based files, it contains the summary info */
> + struct erofs_inode_chunk_info c;
> } i_u;
>
> /* only used for 32-bit stat compatibility */
> @@ -204,6 +229,15 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
> e->e_name_len + le16_to_cpu(e->e_value_size));
> }
>
> +/* represent a zeroed chunk (hole) */
> +#define EROFS_NULL_ADDR -1
> +
> +struct erofs_inode_chunk_index {
> + __le32 blkaddr;
> + __le16 device_id; /* back-end storage id, always 0 for now */
> + __le16 reserved; /* reserved, don't care */
> +};
> +
> /* maximum supported size of a physical compression cluster */
> #define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
>
> @@ -338,6 +372,8 @@ static inline void erofs_check_ondisk_layout_definitions(void)
> BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
> BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
> BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
> + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4);
> + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8);
> BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
> BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
> BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
> --
> 2.24.4