Re: [PATCH RFC] ext4: don't treat fhandle lookup of ea_inode as FS corruption
From: Jan Kara
Date: Thu Mar 27 2025 - 07:05:39 EST
Hi Ted!
was this fix missed?
On Fri 29-11-24 21:20:53, Jann Horn wrote:
> A file handle that userspace provides to open_by_handle_at() can
> legitimately contain an outdated inode number that has since been reused
> for another purpose - that's why the file handle also contains a generation
> number.
>
> But if the inode number has been reused for an ea_inode, check_igot_inode()
> will notice, __ext4_iget() will go through ext4_error_inode(), and if the
> inode was newly created, it will also be marked as bad by iget_failed().
> This all happens before the point where the inode generation is checked.
>
> ext4_error_inode() is supposed to only be used on filesystem corruption; it
> should not be used when userspace just got unlucky with a stale file
> handle. So when this happens, let __ext4_iget() just return an error.
>
> Fixes: b3e6bcb94590 ("ext4: add EA_INODE checking to ext4_iget()")
> Signed-off-by: Jann Horn <jannh@xxxxxxxxxx>
The patch looks good to me. Feel free to add:
Reviewed-by: Jan Kara <jack@xxxxxxx>
Honza
> ---
> I'm sending this as an RFC patch because the patch I came up with is
> pretty ugly; I think it would be ideal if someone else comes up with a
> nicer patch that can be used instead of this one. I'm also not sure
> whether it actually matters if we call iget_failed() when this happens
> with a new inode.
>
> The following testcase demonstrates this issue, and shows that a filesystem
> mounted with errors=remount-ro is remounted to read-only when hitting it.
> Run this as root.
>
> ```
> #define _GNU_SOURCE
> #include <err.h>
> #include <stdarg.h>
> #include <stdio.h>
> #include <sched.h>
> #include <stddef.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <string.h>
> #include <stdlib.h>
> #include <sys/mount.h>
> #include <sys/mman.h>
> #include <sys/xattr.h>
> #include <sys/stat.h>
>
> #define SYSCHK(x) ({ \
> typeof(x) __res = (x); \
> if (__res == (typeof(x))-1) \
> err(1, "SYSCHK(" #x ")"); \
> __res; \
> })
>
> static int systemf(const char *cmd, ...) {
> char *full_cmd;
> va_list ap;
> va_start(ap, cmd);
> if (vasprintf(&full_cmd, cmd, ap) == -1)
> err(1, "vasprintf");
> int res = system(full_cmd);
> free(full_cmd);
> return res;
> }
>
> int main(void) {
> // avoid messing with the main mount hierarchy
> SYSCHK(unshare(CLONE_NEWNS));
> SYSCHK(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));
>
> // create and mount new ext4 fs
> int fs_fd = SYSCHK(memfd_create("ext4-image", 0));
> SYSCHK(ftruncate(fs_fd, 1024*1024));
> if (systemf("mkfs.ext4 -O ea_inode /proc/self/fd/%d", fs_fd))
> errx(1, "mkfs failed");
> if (systemf("mount -o errors=remount-ro -t ext4 /proc/self/fd/%d /mnt", fs_fd))
> errx(1, "mount failed");
>
> // create file with inode xattr
> char xattr_body[8192];
> memset(xattr_body, 'A', sizeof(xattr_body));
> int file_fd = SYSCHK(open("/mnt/file", O_RDWR|O_CREAT, 0600));
> SYSCHK(fsetxattr(file_fd, "user.foo", xattr_body, sizeof(xattr_body), XATTR_CREATE));
> struct stat st;
> SYSCHK(fstat(file_fd, &st));
>
> // trigger bug: do fhandle lookup on inode of file plus one (which will be
> // the xattr inode)
> struct handle {
> unsigned int handle_bytes;
> unsigned int handle_type;
> unsigned int ino, gen, parent_ino, parent_gen;
> } handle = {
> .handle_bytes = sizeof(handle) - offsetof(struct handle, ino),
> .handle_type = 1/*FILEID_INO32_GEN*/,
> .ino = st.st_ino+1,
> .gen = 0
> };
> // this should fail
> SYSCHK(open_by_handle_at(file_fd, (void*)&handle, O_RDONLY));
> }
> ```
>
> resulting dmesg:
> ```
> EXT4-fs (loop0): mounted filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798 r/w without journal. Quota mode: none.
> EXT4-fs error (device loop0): ext4_nfs_get_inode:1545: inode #13: comm ext4-ea-inode-f: unexpected EA_INODE flag
> EXT4-fs (loop0): Remounting filesystem read-only
> EXT4-fs (loop0): unmounting filesystem 13b7e98f-901a-41a4-ba59-4cc58d597798.
> ```
> ---
> fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 48 insertions(+), 20 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 89aade6f45f62d9fd6300ef84c118c6b919cddc9..8a8cc29b211c875a1d22b943004dc3f10b9c4d79 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4705,22 +4705,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
> inode_set_iversion_queried(inode, val);
> }
>
> -static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
> -
> +static int check_igot_inode(struct inode *inode, ext4_iget_flags flags,
> + const char *function, unsigned int line)
> {
> + const char *err_str;
> +
> if (flags & EXT4_IGET_EA_INODE) {
> - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
> - return "missing EA_INODE flag";
> + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
> + err_str = "missing EA_INODE flag";
> + goto error;
> + }
> if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
> - EXT4_I(inode)->i_file_acl)
> - return "ea_inode with extended attributes";
> + EXT4_I(inode)->i_file_acl) {
> + err_str = "ea_inode with extended attributes";
> + goto error;
> + }
> } else {
> - if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
> - return "unexpected EA_INODE flag";
> + if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
> + /*
> + * open_by_handle_at() could provide an old inode number
> + * that has since been reused for an ea_inode; this does
> + * not indicate filesystem corruption
> + */
> + if (flags & EXT4_IGET_HANDLE)
> + return -ESTALE;
> + err_str = "unexpected EA_INODE flag";
> + goto error;
> + }
> + }
> + if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
> + err_str = "unexpected bad inode w/o EXT4_IGET_BAD";
> + goto error;
> }
> - if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
> - return "unexpected bad inode w/o EXT4_IGET_BAD";
> - return NULL;
> + return 0;
> +
> +error:
> + ext4_error_inode(inode, function, line, 0, err_str);
> + return -EFSCORRUPTED;
> }
>
> struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
> @@ -4732,7 +4753,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
> struct ext4_inode_info *ei;
> struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> struct inode *inode;
> - const char *err_str;
> journal_t *journal = EXT4_SB(sb)->s_journal;
> long ret;
> loff_t size;
> @@ -4761,10 +4781,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
> if (!inode)
> return ERR_PTR(-ENOMEM);
> if (!(inode->i_state & I_NEW)) {
> - if ((err_str = check_igot_inode(inode, flags)) != NULL) {
> - ext4_error_inode(inode, function, line, 0, err_str);
> + ret = check_igot_inode(inode, flags, function, line);
> + if (ret) {
> iput(inode);
> - return ERR_PTR(-EFSCORRUPTED);
> + return ERR_PTR(ret);
> }
> return inode;
> }
> @@ -5036,13 +5056,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
> ret = -EFSCORRUPTED;
> goto bad_inode;
> }
> - if ((err_str = check_igot_inode(inode, flags)) != NULL) {
> - ext4_error_inode(inode, function, line, 0, err_str);
> - ret = -EFSCORRUPTED;
> - goto bad_inode;
> + ret = check_igot_inode(inode, flags, function, line);
> + /*
> + * -ESTALE here means there is nothing inherently wrong with the inode,
> + * it's just not an inode we can return for an fhandle lookup.
> + */
> + if (ret == -ESTALE) {
> + brelse(iloc.bh);
> + unlock_new_inode(inode);
> + iput(inode);
> + return ERR_PTR(-ESTALE);
> }
> -
> + if (ret)
> + goto bad_inode;
> brelse(iloc.bh);
> +
> unlock_new_inode(inode);
> return inode;
>
>
> ---
> base-commit: b86545e02e8c22fb89218f29d381fa8e8b91d815
> change-id: 20241129-ext4-ignore-ea-fhandle-743d3723c5e9
>
> --
> Jann Horn <jannh@xxxxxxxxxx>
>
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR