Re: [RFC PATCH v2 30/30] WIP: fs: ext2: add rust ro ext2 implementation

From: Darrick J. Wong
Date: Mon May 20 2024 - 16:01:26 EST


On Tue, May 14, 2024 at 10:17:11AM -0300, Wedson Almeida Filho wrote:
> From: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx>
>
> Signed-off-by: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx>
> ---
> fs/Kconfig | 1 +
> fs/Makefile | 1 +
> fs/rust-ext2/Kconfig | 13 +
> fs/rust-ext2/Makefile | 8 +
> fs/rust-ext2/defs.rs | 173 +++++++++++++
> fs/rust-ext2/ext2.rs | 551 ++++++++++++++++++++++++++++++++++++++++++
> rust/kernel/lib.rs | 3 +
> 7 files changed, 750 insertions(+)
> create mode 100644 fs/rust-ext2/Kconfig
> create mode 100644 fs/rust-ext2/Makefile
> create mode 100644 fs/rust-ext2/defs.rs
> create mode 100644 fs/rust-ext2/ext2.rs
>
> diff --git a/fs/Kconfig b/fs/Kconfig
> index 2cbd99d6784c..cf0cac5c5b1e 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -338,6 +338,7 @@ source "fs/ufs/Kconfig"
> source "fs/erofs/Kconfig"
> source "fs/vboxsf/Kconfig"
> source "fs/tarfs/Kconfig"
> +source "fs/rust-ext2/Kconfig"
>
> endif # MISC_FILESYSTEMS
>
> diff --git a/fs/Makefile b/fs/Makefile
> index d8bbda73e3a9..c1a3007efc7d 100644
> --- a/fs/Makefile
> +++ b/fs/Makefile
> @@ -130,3 +130,4 @@ obj-$(CONFIG_EROFS_FS) += erofs/
> obj-$(CONFIG_VBOXSF_FS) += vboxsf/
> obj-$(CONFIG_ZONEFS_FS) += zonefs/
> obj-$(CONFIG_TARFS_FS) += tarfs/
> +obj-$(CONFIG_RUST_EXT2_FS) += rust-ext2/
> diff --git a/fs/rust-ext2/Kconfig b/fs/rust-ext2/Kconfig
> new file mode 100644
> index 000000000000..976371655ca6
> --- /dev/null
> +++ b/fs/rust-ext2/Kconfig
> @@ -0,0 +1,13 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +#
> +
> +config RUST_EXT2_FS
> + tristate "Rust second extended fs support"
> + depends on RUST && BLOCK
> + help
> + Ext2 is a standard Linux file system for hard disks.
> +
> + To compile this file system support as a module, choose M here: the
> + module will be called rust_ext2.
> +
> + If unsure, say Y.
> diff --git a/fs/rust-ext2/Makefile b/fs/rust-ext2/Makefile
> new file mode 100644
> index 000000000000..ac960b5f89d7
> --- /dev/null
> +++ b/fs/rust-ext2/Makefile
> @@ -0,0 +1,8 @@
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# Makefile for the linux tarfs filesystem routines.
> +#
> +
> +obj-$(CONFIG_RUST_EXT2_FS) += rust_ext2.o
> +
> +rust_ext2-y := ext2.o
> diff --git a/fs/rust-ext2/defs.rs b/fs/rust-ext2/defs.rs
> new file mode 100644
> index 000000000000..5f84852b4961
> --- /dev/null
> +++ b/fs/rust-ext2/defs.rs
> @@ -0,0 +1,173 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! Definitions of tarfs structures.
> +
> +use kernel::types::LE;
> +
> +pub(crate) const EXT2_SUPER_MAGIC: u16 = 0xEF53;
> +
> +pub(crate) const EXT2_MAX_BLOCK_LOG_SIZE: u32 = 16;
> +
> +pub(crate) const EXT2_GOOD_OLD_REV: u32 = 0; /* The good old (original) format */
> +pub(crate) const EXT2_DYNAMIC_REV: u32 = 1; /* V2 format w/ dynamic inode sizes */
> +
> +pub(crate) const EXT2_GOOD_OLD_INODE_SIZE: u16 = 128;
> +
> +pub(crate) const EXT2_ROOT_INO: u32 = 2; /* Root inode */
> +
> +/* First non-reserved inode for old ext2 filesystems. */
> +pub(crate) const EXT2_GOOD_OLD_FIRST_INO: u32 = 11;
> +
> +pub(crate) const EXT2_FEATURE_INCOMPAT_FILETYPE: u32 = 0x0002;
> +
> +/*
> + * Constants relative to the data blocks
> + */
> +pub(crate) const EXT2_NDIR_BLOCKS: usize = 12;
> +pub(crate) const EXT2_IND_BLOCK: usize = EXT2_NDIR_BLOCKS;
> +pub(crate) const EXT2_DIND_BLOCK: usize = EXT2_IND_BLOCK + 1;
> +pub(crate) const EXT2_TIND_BLOCK: usize = EXT2_DIND_BLOCK + 1;
> +pub(crate) const EXT2_N_BLOCKS: usize = EXT2_TIND_BLOCK + 1;
> +
> +kernel::derive_readable_from_bytes! {
> + #[repr(C)]
> + pub(crate) struct Super {
> + pub(crate) inodes_count: LE<u32>,
> + pub(crate) blocks_count: LE<u32>,
> + pub(crate) r_blocks_count: LE<u32>,
> + pub(crate) free_blocks_count: LE<u32>, /* Free blocks count */
> + pub(crate) free_inodes_count: LE<u32>, /* Free inodes count */
> + pub(crate) first_data_block: LE<u32>, /* First Data Block */
> + pub(crate) log_block_size: LE<u32>, /* Block size */
> + pub(crate) log_frag_size: LE<u32>, /* Fragment size */
> + pub(crate) blocks_per_group: LE<u32>, /* # Blocks per group */
> + pub(crate) frags_per_group: LE<u32>, /* # Fragments per group */
> + pub(crate) inodes_per_group: LE<u32>, /* # Inodes per group */
> + pub(crate) mtime: LE<u32>, /* Mount time */
> + pub(crate) wtime: LE<u32>, /* Write time */
> + pub(crate) mnt_count: LE<u16>, /* Mount count */
> + pub(crate) max_mnt_count: LE<u16>, /* Maximal mount count */
> + pub(crate) magic: LE<u16>, /* Magic signature */
> + pub(crate) state: LE<u16>, /* File system state */
> + pub(crate) errors: LE<u16>, /* Behaviour when detecting errors */
> + pub(crate) minor_rev_level: LE<u16>, /* minor revision level */
> + pub(crate) lastcheck: LE<u32>, /* time of last check */
> + pub(crate) checkinterval: LE<u32>, /* max. time between checks */
> + pub(crate) creator_os: LE<u32>, /* OS */
> + pub(crate) rev_level: LE<u32>, /* Revision level */
> + pub(crate) def_resuid: LE<u16>, /* Default uid for reserved blocks */
> + pub(crate) def_resgid: LE<u16>, /* Default gid for reserved blocks */
> + /*
> + * These fields are for EXT2_DYNAMIC_REV superblocks only.
> + *
> + * Note: the difference between the compatible feature set and
> + * the incompatible feature set is that if there is a bit set
> + * in the incompatible feature set that the kernel doesn't
> + * know about, it should refuse to mount the filesystem.
> + *
> + * e2fsck's requirements are more strict; if it doesn't know
> + * about a feature in either the compatible or incompatible
> + * feature set, it must abort and not try to meddle with
> + * things it doesn't understand...
> + */
> + pub(crate) first_ino: LE<u32>, /* First non-reserved inode */
> + pub(crate) inode_size: LE<u16>, /* size of inode structure */
> + pub(crate) block_group_nr: LE<u16>, /* block group # of this superblock */
> + pub(crate) feature_compat: LE<u32>, /* compatible feature set */
> + pub(crate) feature_incompat: LE<u32>, /* incompatible feature set */
> + pub(crate) feature_ro_compat: LE<u32>, /* readonly-compatible feature set */
> + pub(crate) uuid: [u8; 16], /* 128-bit uuid for volume */
> + pub(crate) volume_name: [u8; 16], /* volume name */
> + pub(crate) last_mounted: [u8; 64], /* directory where last mounted */
> + pub(crate) algorithm_usage_bitmap: LE<u32>, /* For compression */
> + /*
> + * Performance hints. Directory preallocation should only
> + * happen if the EXT2_COMPAT_PREALLOC flag is on.
> + */
> + pub(crate) prealloc_blocks: u8, /* Nr of blocks to try to preallocate*/
> + pub(crate) prealloc_dir_blocks: u8, /* Nr to preallocate for dirs */
> + padding1: u16,
> + /*
> + * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
> + */
> + pub(crate) journal_uuid: [u8; 16], /* uuid of journal superblock */
> + pub(crate) journal_inum: u32, /* inode number of journal file */
> + pub(crate) journal_dev: u32, /* device number of journal file */
> + pub(crate) last_orphan: u32, /* start of list of inodes to delete */
> + pub(crate) hash_seed: [u32; 4], /* HTREE hash seed */
> + pub(crate) def_hash_version: u8, /* Default hash version to use */
> + pub(crate) reserved_char_pad: u8,
> + pub(crate) reserved_word_pad: u16,
> + pub(crate) default_mount_opts: LE<u32>,
> + pub(crate) first_meta_bg: LE<u32>, /* First metablock block group */
> + reserved: [u32; 190], /* Padding to the end of the block */
> + }
> +
> + #[repr(C)]
> + #[derive(Clone, Copy)]
> + pub(crate) struct Group {

Might want to call these GroupDescriptor to match(ish) the ext2
structure? I dunno, it's going to be hard to remember to change
"struct ext2_group_desc" in my head to "struct ext2::GroupDescriptor" or
even "struct ext2::group_desc".

> + /// Blocks bitmap block.
> + pub block_bitmap: LE<u32>,
> +
> + /// Inodes bitmap block.
> + pub inode_bitmap: LE<u32>,
> +
> + /// Inodes table block.
> + pub inode_table: LE<u32>,
> +
> + /// Number of free blocks.
> + pub free_blocks_count: LE<u16>,
> +
> + /// Number of free inodes.
> + pub free_inodes_count: LE<u16>,
> +
> + /// Number of directories.
> + pub used_dirs_count: LE<u16>,
> +
> + pad: LE<u16>,
> + reserved: [u32; 3],
> + }
> +
> + #[repr(C)]
> + pub(crate) struct INode {
> + pub mode: LE<u16>, /* File mode */
> + pub uid: LE<u16>, /* Low 16 bits of Owner Uid */
> + pub size: LE<u32>, /* Size in bytes */
> + pub atime: LE<u32>, /* Access time */
> + pub ctime: LE<u32>, /* Creation time */
> + pub mtime: LE<u32>, /* Modification time */
> + pub dtime: LE<u32>, /* Deletion Time */
> + pub gid: LE<u16>, /* Low 16 bits of Group Id */
> + pub links_count: LE<u16>, /* Links count */
> + pub blocks: LE<u32>, /* Blocks count */
> + pub flags: LE<u32>, /* File flags */
> + pub reserved1: LE<u32>,
> + pub block: [LE<u32>; EXT2_N_BLOCKS],/* Pointers to blocks */
> + pub generation: LE<u32>, /* File version (for NFS) */
> + pub file_acl: LE<u32>, /* File ACL */
> + pub dir_acl: LE<u32>, /* Directory ACL */
> + pub faddr: LE<u32>, /* Fragment address */
> + pub frag: u8, /* Fragment number */
> + pub fsize: u8, /* Fragment size */
> + pub pad1: LE<u16>,
> + pub uid_high: LE<u16>,
> + pub gid_high: LE<u16>,
> + pub reserved2: LE<u32>,
> + }
> +
> + #[repr(C)]
> + pub(crate) struct DirEntry {
> + pub(crate) inode: LE<u32>, /* Inode number */
> + pub(crate) rec_len: LE<u16>, /* Directory entry length */
> + pub(crate) name_len: u8, /* Name length */
> + pub(crate) file_type: u8, /* Only if the "filetype" feature flag is set. */
> + }
> +}
> +
> +pub(crate) const FT_REG_FILE: u8 = 1;
> +pub(crate) const FT_DIR: u8 = 2;
> +pub(crate) const FT_CHRDEV: u8 = 3;
> +pub(crate) const FT_BLKDEV: u8 = 4;
> +pub(crate) const FT_FIFO: u8 = 5;
> +pub(crate) const FT_SOCK: u8 = 6;
> +pub(crate) const FT_SYMLINK: u8 = 7;
> diff --git a/fs/rust-ext2/ext2.rs b/fs/rust-ext2/ext2.rs
> new file mode 100644
> index 000000000000..2d6b1e7ca156
> --- /dev/null
> +++ b/fs/rust-ext2/ext2.rs
> @@ -0,0 +1,551 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! Ext2 file system.
> +
> +use alloc::vec::Vec;
> +use core::mem::size_of;
> +use defs::*;
> +use kernel::fs::{
> + self, address_space, dentry, dentry::DEntry, file, file::File, inode, inode::INode, iomap, sb,
> + sb::SuperBlock, Offset,
> +};
> +use kernel::types::{ARef, Either, FromBytes, Locked, LE};
> +use kernel::{block, c_str, prelude::*, str::CString, time::Timespec, user, PAGE_SIZE};
> +
> +pub mod defs;
> +
> +kernel::module_fs! {
> + type: Ext2Fs,
> + name: "ext2",
> + author: "Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx>",
> + description: "ext2 file system",
> + license: "GPL",
> +}
> +
> +const SB_OFFSET: Offset = 1024;
> +
> +struct INodeData {
> + data_blocks: [u32; defs::EXT2_N_BLOCKS],
> +}
> +
> +struct Ext2Fs {
> + mapper: inode::Mapper,
> + block_size: u32,
> + has_file_type: bool,
> + _block_size_bits: u32,
> + inodes_per_block: u32,
> + inodes_per_group: u32,
> + inode_count: u32,
> + inode_size: u16,
> + first_ino: u32,
> + group: Vec<defs::Group>,
> +}
> +
> +impl Ext2Fs {
> + fn iget(sb: &SuperBlock<Self>, ino: u32) -> Result<ARef<INode<Self>>> {
> + let s = sb.data();
> + if (ino != EXT2_ROOT_INO && ino < s.first_ino) || ino > s.inode_count {
> + return Err(ENOENT);
> + }
> + let group = ((ino - 1) / s.inodes_per_group) as usize;
> + let offset = (ino - 1) % s.inodes_per_group;
> +
> + if group >= s.group.len() {
> + return Err(ENOENT);
> + }
> +
> + // Create an inode or find an existing (cached) one.
> + let mut inode = match sb.get_or_create_inode(ino.into())? {
> + Either::Left(existing) => return Ok(existing),
> + Either::Right(new) => new,
> + };
> +
> + let inodes_block = Offset::from(s.group[group].inode_table.value());
> + let inode_block = inodes_block + Offset::from(offset / s.inodes_per_block);
> + let offset = (offset % s.inodes_per_block) as usize;
> + let b = sb
> + .data()
> + .mapper
> + .mapped_folio(inode_block * Offset::from(s.block_size))?;

It almost feels like you need a buffer cache here for fs metadata... ;)

> + let idata = defs::INode::from_bytes(&b, offset * s.inode_size as usize).ok_or(EIO)?;
> + let mode = idata.mode.value();
> +
> + if idata.links_count.value() == 0 && (mode == 0 || idata.dtime.value() != 0) {
> + return Err(ESTALE);
> + }
> +
> + const DIR_FOPS: file::Ops<Ext2Fs> = file::Ops::new::<Ext2Fs>();
> + const DIR_IOPS: inode::Ops<Ext2Fs> = inode::Ops::new::<Ext2Fs>();
> + const FILE_AOPS: address_space::Ops<Ext2Fs> = iomap::ro_aops::<Ext2Fs>();
> +
> + let mut size = idata.size.value().into();
> + let typ = match mode & fs::mode::S_IFMT {
> + fs::mode::S_IFREG => {
> + size |= Offset::from(idata.dir_acl.value())
> + .checked_shl(32)
> + .ok_or(EUCLEAN)?;

I wonder, is there a clean way to log these kinds of corruption errors?
ext4 (and soon xfs) have the ability to log health problems and pass
those kinds of errors to a monitoring daemon via fanotify.

> + inode
> + .set_aops(FILE_AOPS)
> + .set_fops(file::Ops::generic_ro_file());
> + inode::Type::Reg
> + }
> + fs::mode::S_IFDIR => {
> + inode
> + .set_iops(DIR_IOPS)
> + .set_fops(DIR_FOPS)
> + .set_aops(FILE_AOPS);
> + inode::Type::Dir
> + }
> + fs::mode::S_IFLNK => {
> + if idata.blocks.value() == 0 {
> + const OFFSET: usize = core::mem::offset_of!(defs::INode, block);
> + let name = &b[offset * usize::from(s.inode_size) + OFFSET..];
> + let name_len = size as usize;
> + if name_len > name.len() || name_len == 0 {
> + return Err(EIO);
> + }
> + inode.set_iops(inode::Ops::simple_symlink_inode());
> + inode::Type::Lnk(Some(CString::try_from(&name[..name_len])?))
> + } else {
> + inode
> + .set_aops(FILE_AOPS)
> + .set_iops(inode::Ops::page_symlink_inode());
> + inode::Type::Lnk(None)
> + }
> + }
> + fs::mode::S_IFSOCK => inode::Type::Sock,
> + fs::mode::S_IFIFO => inode::Type::Fifo,
> + fs::mode::S_IFCHR => {
> + let (major, minor) = decode_dev(&idata.block);
> + inode::Type::Chr(major, minor)
> + }
> + fs::mode::S_IFBLK => {
> + let (major, minor) = decode_dev(&idata.block);
> + inode::Type::Blk(major, minor)
> + }
> + _ => return Err(ENOENT),
> + };
> + inode.init(inode::Params {
> + typ,
> + mode: mode & 0o777,
> + size,
> + blocks: idata.blocks.value().into(),
> + nlink: idata.links_count.value().into(),
> + uid: u32::from(idata.uid.value()) | u32::from(idata.uid_high.value()) << 16,
> + gid: u32::from(idata.gid.value()) | u32::from(idata.gid_high.value()) << 16,
> + ctime: Timespec::new(idata.ctime.value().into(), 0)?,
> + mtime: Timespec::new(idata.mtime.value().into(), 0)?,
> + atime: Timespec::new(idata.atime.value().into(), 0)?,
> + value: INodeData {
> + data_blocks: core::array::from_fn(|i| idata.block[i].value()),
> + },
> + })
> + }
> +
> + fn offsets<'a>(&self, mut block: u64, out: &'a mut [u32]) -> Option<&'a [u32]> {
> + let ptrs = u64::from(self.block_size / size_of::<u32>() as u32);
> + let ptr_mask = ptrs - 1;
> + let ptr_bits = ptrs.trailing_zeros();
> +
> + if block < EXT2_NDIR_BLOCKS as u64 {
> + out[0] = block as u32;
> + return Some(&out[..1]);
> + }
> +
> + block -= EXT2_NDIR_BLOCKS as u64;
> + if block < ptrs {
> + out[0] = EXT2_IND_BLOCK as u32;
> + out[1] = block as u32;
> + return Some(&out[..2]);
> + }
> +
> + block -= ptrs;
> + if block < (1 << (2 * ptr_bits)) {
> + out[0] = EXT2_DIND_BLOCK as u32;
> + out[1] = (block >> ptr_bits) as u32;
> + out[2] = (block & ptr_mask) as u32;
> + return Some(&out[..3]);
> + }
> +
> + block -= ptrs * ptrs;
> + if block < ptrs * ptrs * ptrs {
> + out[0] = EXT2_TIND_BLOCK as u32;
> + out[1] = (block >> (2 * ptr_bits)) as u32;
> + out[2] = ((block >> ptr_bits) & ptr_mask) as u32;
> + out[3] = (block & ptr_mask) as u32;
> + return Some(&out[..4]);
> + }
> +
> + None
> + }
> +
> + fn offset_to_block(inode: &INode<Self>, block: Offset) -> Result<u64> {
> + let s = inode.super_block().data();
> + let mut indices = [0u32; 4];
> + let boffsets = s.offsets(block as u64, &mut indices).ok_or(EIO)?;
> + let mut boffset = inode.data().data_blocks[boffsets[0] as usize];
> + let mapper = &s.mapper;
> + for i in &boffsets[1..] {
> + let b = mapper.mapped_folio(Offset::from(boffset) * Offset::from(s.block_size))?;
> + let table = LE::<u32>::from_bytes_to_slice(&b).ok_or(EIO)?;
> + boffset = table[*i as usize].value();
> + }
> + Ok(boffset.into())
> + }
> +
> + fn check_descriptors(s: &Super, groups: &[Group]) -> Result {

It's ... very odd to mix file space mapping functions and group
descriptors into the same structure. Does offset_to_block belong in a
Ext2Inode structure?

I was also wondering, is there a convenient way to make it so that the
compiler can enforce that a directory inode can only be passed an
Operations that actually has all the directory operations initialized?
Or that a regular file can't have a lookup function in its iops?

> + for (i, g) in groups.iter().enumerate() {
> + let first = i as u32 * s.blocks_per_group.value() + s.first_data_block.value();
> + let last = if i == groups.len() - 1 {
> + s.blocks_count.value()
> + } else {
> + first + s.blocks_per_group.value() - 1
> + };
> +
> + if g.block_bitmap.value() < first || g.block_bitmap.value() > last {
> + pr_err!(
> + "Block bitmap for group {i} no in group (block {})\n",
> + g.block_bitmap.value()
> + );
> + return Err(EINVAL);
> + }
> +
> + if g.inode_bitmap.value() < first || g.inode_bitmap.value() > last {
> + pr_err!(
> + "Inode bitmap for group {i} no in group (block {})\n",
> + g.inode_bitmap.value()
> + );
> + return Err(EINVAL);
> + }
> +
> + if g.inode_table.value() < first || g.inode_table.value() > last {
> + pr_err!(
> + "Inode table for group {i} no in group (block {})\n",
> + g.inode_table.value()
> + );
> + return Err(EINVAL);
> + }
> + }
> + Ok(())
> + }
> +}
> +
> +impl fs::FileSystem for Ext2Fs {
> + type Data = Box<Self>;
> + type INodeData = INodeData;
> + const NAME: &'static CStr = c_str!("rust-ext2");
> + const SUPER_TYPE: sb::Type = sb::Type::BlockDev;
> +
> + fn fill_super(
> + sb: &mut SuperBlock<Self, sb::New>,
> + mapper: Option<inode::Mapper>,
> + ) -> Result<Self::Data> {
> + let Some(mapper) = mapper else {
> + return Err(EINVAL);
> + };
> +
> + if sb.min_blocksize(PAGE_SIZE as i32) == 0 {
> + pr_err!("Unable to set block size\n");
> + return Err(EINVAL);
> + }
> +
> + // Map the super block and check the magic number.
> + let mapped = mapper.mapped_folio(SB_OFFSET)?;
> + let s = Super::from_bytes(&mapped, 0).ok_or(EIO)?;
> +
> + if s.magic.value() != EXT2_SUPER_MAGIC {
> + return Err(EINVAL);
> + }
> +
> + // Check for unsupported flags.
> + let mut has_file_type = false;
> + if s.rev_level.value() >= EXT2_DYNAMIC_REV {
> + let features = s.feature_incompat.value();
> + if features & !EXT2_FEATURE_INCOMPAT_FILETYPE != 0 {
> + pr_err!("Unsupported incompatible feature: {:x}\n", features);
> + return Err(EINVAL);
> + }
> +
> + has_file_type = features & EXT2_FEATURE_INCOMPAT_FILETYPE != 0;
> +
> + let features = s.feature_ro_compat.value();
> + if !sb.rdonly() && features != 0 {
> + pr_err!("Unsupported rw incompatible feature: {:x}\n", features);
> + return Err(EINVAL);
> + }
> + }
> +
> + // Set the block size.
> + let block_size_bits = s.log_block_size.value();
> + if block_size_bits > EXT2_MAX_BLOCK_LOG_SIZE - 10 {
> + pr_err!("Invalid log block size: {}\n", block_size_bits);
> + return Err(EINVAL);
> + }
> +
> + let block_size = 1024u32 << block_size_bits;
> + if sb.min_blocksize(block_size as i32) != block_size as i32 {
> + pr_err!("Bad block size: {}\n", block_size);
> + return Err(ENXIO);
> + }
> +
> + // Get the first inode and the inode size.
> + let (inode_size, first_ino) = if s.rev_level.value() == EXT2_GOOD_OLD_REV {
> + (EXT2_GOOD_OLD_INODE_SIZE, EXT2_GOOD_OLD_FIRST_INO)
> + } else {
> + let size = s.inode_size.value();
> + if size < EXT2_GOOD_OLD_INODE_SIZE
> + || !size.is_power_of_two()
> + || u32::from(size) > block_size
> + {
> + pr_err!("Unsupported inode size: {}\n", size);
> + return Err(EINVAL);
> + }
> + (size, s.first_ino.value())
> + };
> +
> + // Get the number of inodes per group and per block.
> + let inode_count = s.inodes_count.value();
> + let inodes_per_group = s.inodes_per_group.value();
> + let inodes_per_block = block_size / u32::from(inode_size);
> + if inodes_per_group == 0 || inodes_per_block == 0 {
> + return Err(EINVAL);
> + }
> +
> + if inodes_per_group > block_size * 8 || inodes_per_group < inodes_per_block {
> + pr_err!("Bad inodes per group: {}\n", inodes_per_group);
> + return Err(EINVAL);
> + }
> +
> + // Check the size of the groups.
> + let itb_per_group = inodes_per_group / inodes_per_block;
> + let blocks_per_group = s.blocks_per_group.value();
> + if blocks_per_group > block_size * 8 || blocks_per_group <= itb_per_group + 3 {
> + pr_err!("Bad blocks per group: {}\n", blocks_per_group);
> + return Err(EINVAL);
> + }
> +
> + let blocks_count = s.blocks_count.value();
> + if block::Sector::from(blocks_count) > sb.sector_count() >> (1 + block_size_bits) {
> + pr_err!(
> + "Block count ({blocks_count}) exceeds size of device ({})\n",
> + sb.sector_count() >> (1 + block_size_bits)
> + );
> + return Err(EINVAL);
> + }
> +
> + let group_count = (blocks_count - s.first_data_block.value() - 1) / blocks_per_group + 1;
> + if group_count * inodes_per_group != inode_count {
> + pr_err!(
> + "Unexpected inode count: {inode_count} vs {}",
> + group_count * inodes_per_group
> + );
> + return Err(EINVAL);
> + }
> +
> + let mut groups = Vec::new();
> + groups.reserve(group_count as usize, GFP_NOFS)?;

Why not GFP_KERNEL here?

Oh wow the C ext2 driver pins a bunch of buffer heads doesn't it...
/me runs

> +
> + let mut remain = group_count;
> + let mut offset = (SB_OFFSET / Offset::from(block_size) + 1) * Offset::from(block_size);
> + while remain > 0 {
> + let b = mapper.mapped_folio(offset)?;
> + for g in Group::from_bytes_to_slice(&b).ok_or(EIO)? {
> + groups.push(*g, GFP_NOFS)?;
> + remain -= 1;
> + if remain == 0 {
> + break;
> + }
> + }
> + offset += Offset::try_from(b.len())?;
> + }
> +
> + Self::check_descriptors(s, &groups)?;
> +
> + sb.set_magic(s.magic.value().into());
> + drop(mapped);
> + Ok(Box::new(
> + Ext2Fs {
> + mapper,
> + block_size,
> + _block_size_bits: block_size_bits,
> + has_file_type,
> + inodes_per_group,
> + inodes_per_block,
> + inode_count,
> + inode_size,
> + first_ino,
> + group: groups,
> + },
> + GFP_KERNEL,
> + )?)
> + }
> +
> + fn init_root(sb: &SuperBlock<Self>) -> Result<dentry::Root<Self>> {
> + let inode = Self::iget(sb, EXT2_ROOT_INO)?;
> + dentry::Root::try_new(inode)
> + }
> +}
> +
> +fn rec_len(d: &DirEntry) -> u32 {
> + let len = d.rec_len.value();
> +
> + if PAGE_SIZE >= 65536 && len == u16::MAX {
> + 1u32 << 16
> + } else {
> + len.into()
> + }
> +}
> +
> +#[vtable]
> +impl file::Operations for Ext2Fs {
> + type FileSystem = Self;
> +
> + fn seek(file: &File<Self>, offset: Offset, whence: file::Whence) -> Result<Offset> {
> + file::generic_seek(file, offset, whence)
> + }
> +
> + fn read(_: &File<Self>, _: &mut user::Writer, _: &mut Offset) -> Result<usize> {
> + Err(EISDIR)
> + }
> +
> + fn read_dir(

Wait, does this imply that regular files also have read_dir that you can
call?

> + _file: &File<Self>,
> + inode: &Locked<&INode<Self>, inode::ReadSem>,
> + emitter: &mut file::DirEmitter,
> + ) -> Result {
> + let has_file_type = inode.super_block().data().has_file_type;
> +
> + inode.for_each_page(emitter.pos(), Offset::MAX, |data| {

Neat that Rust can turn an indirect call to a lambda function into a
direct call. I hear C can do that now too?

> + let mut offset = 0usize;
> + let mut acc: Offset = 0;
> + let limit = data.len().saturating_sub(size_of::<DirEntry>());
> + while offset < limit {
> + let dirent = DirEntry::from_bytes(data, offset).ok_or(EIO)?;
> + offset += size_of::<DirEntry>();
> +
> + let name_len = usize::from(dirent.name_len);
> + if data.len() - offset < name_len {
> + return Err(EIO);
> + }
> +
> + let name = &data[offset..][..name_len];
> + let rec_len = rec_len(dirent);
> + offset = offset - size_of::<DirEntry>() + rec_len as usize;
> + if rec_len == 0 || offset > data.len() {
> + return Err(EIO);
> + }
> +
> + acc += Offset::from(rec_len);
> + let ino = dirent.inode.value();
> + if ino == 0 {
> + continue;
> + }
> +
> + let t = if !has_file_type {
> + file::DirEntryType::Unknown
> + } else {
> + match dirent.file_type {
> + FT_REG_FILE => file::DirEntryType::Reg,
> + FT_DIR => file::DirEntryType::Dir,
> + FT_SYMLINK => file::DirEntryType::Lnk,
> + FT_CHRDEV => file::DirEntryType::Chr,
> + FT_BLKDEV => file::DirEntryType::Blk,
> + FT_FIFO => file::DirEntryType::Fifo,
> + FT_SOCK => file::DirEntryType::Sock,
> + _ => continue,

Isn't this a directory corruption? return Err(EFSCORRUPTED) ?

> + }
> + };
> +
> + if !emitter.emit(acc, name, ino.into(), t) {
> + return Ok(Some(()));
> + }
> + acc = 0;
> + }
> + Ok(None)
> + })?;
> + Ok(())
> + }
> +}
> +
> +#[vtable]
> +impl inode::Operations for Ext2Fs {
> + type FileSystem = Self;
> +
> + fn lookup(
> + parent: &Locked<&INode<Self>, inode::ReadSem>,
> + dentry: dentry::Unhashed<'_, Self>,
> + ) -> Result<Option<ARef<DEntry<Self>>>> {
> + let inode = parent.for_each_page(0, Offset::MAX, |data| {
> + let mut offset = 0usize;
> + while data.len() - offset > size_of::<DirEntry>() {
> + let dirent = DirEntry::from_bytes(data, offset).ok_or(EIO)?;
> + offset += size_of::<DirEntry>();
> +
> + let name_len = usize::from(dirent.name_len);
> + if data.len() - offset < name_len {
> + return Err(EIO);
> + }
> +
> + let name = &data[offset..][..name_len];
> +
> + offset = offset - size_of::<DirEntry>() + usize::from(dirent.rec_len.value());
> + if offset > data.len() {
> + return Err(EIO);
> + }
> +
> + let ino = dirent.inode.value();
> + if ino != 0 && name == dentry.name() {
> + return Ok(Some(Self::iget(parent.super_block(), ino)?));
> + }
> + }
> + Ok(None)
> + })?;
> +
> + dentry.splice_alias(inode)
> + }
> +}
> +
> +impl iomap::Operations for Ext2Fs {
> + type FileSystem = Self;
> +
> + fn begin<'a>(
> + inode: &'a INode<Self>,
> + pos: Offset,
> + length: Offset,
> + _flags: u32,
> + map: &mut iomap::Map<'a>,
> + _srcmap: &mut iomap::Map<'a>,
> + ) -> Result {
> + let size = inode.size();
> + if pos >= size {
> + map.set_offset(pos)
> + .set_length(length.try_into()?)
> + .set_flags(iomap::map_flags::MERGED)
> + .set_type(iomap::Type::Hole);
> + return Ok(());
> + }
> +
> + let block_size = inode.super_block().data().block_size as Offset;
> + let block = pos / block_size;
> +
> + let boffset = Self::offset_to_block(inode, block)?;
> + map.set_offset(block * block_size)
> + .set_length(block_size as u64)
> + .set_flags(iomap::map_flags::MERGED)
> + .set_type(iomap::Type::Mapped)
> + .set_bdev(Some(inode.super_block().bdev()))
> + .set_addr(boffset * block_size as u64);

Neat use of chaining here.

> +
> + Ok(())
> + }
> +}
> +
> +fn decode_dev(block: &[LE<u32>]) -> (u32, u32) {
> + let v = block[0].value();
> + if v != 0 {
> + ((v >> 8) & 255, v & 255)
> + } else {
> + let v = block[1].value();
> + ((v & 0xfff00) >> 8, (v & 0xff) | ((v >> 12) & 0xfff00))

Nice not to have leXX_to_cpu calls everywhere here.

--D

> + }
> +}
> diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
> index 445599d4bff6..732bc9939f7f 100644
> --- a/rust/kernel/lib.rs
> +++ b/rust/kernel/lib.rs
> @@ -165,3 +165,6 @@ macro_rules! container_of {
> ptr.wrapping_sub(offset) as *const $type
> }}
> }
> +
> +/// The size in bytes of a page of memory.
> +pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
> --
> 2.34.1
>
>