Re: [PATCH v1 1/5] hostfs: Fix ephemeral inodes
From: Roberto Sassu
Date: Tue Jun 06 2023 - 09:14:20 EST
On Thu, 2023-03-09 at 17:54 +0100, Mickaël Salaün wrote:
> hostfs creates a new inode for each opened or created file, which created
> useless inode allocations and forbade identifying a host file with a kernel
> inode.
>
> Fix this uncommon filesystem behavior by tying kernel inodes to host
> file's inode and device IDs. Even if the host filesystem inodes may be
> recycled, this cannot happen while a file referencing it is open, which
> is the case with hostfs. It should be noted that hostfs inode IDs may
> not be unique for the same hostfs superblock because multiple host's
> (backed) superblocks may be used.
I hoped that this patch solved an issue when testing the
inode_setsecurity and inode_getsecurity combination. Unfortunately, it
does not work, since after inode_setsecurity the inode is dropped. At
the time inode_getsecurity is called, the security blob is lost.
Roberto
> Delete inodes when dropping them to force backed host's file descriptors
> closing.
>
> This enables to entirely remove ARCH_EPHEMERAL_INODES, and then makes
> Landlock fully supported by UML. This is very useful for testing
> (ongoing and backported) changes.
>
> These changes also factor out and simplify some helpers thanks to the
> new hostfs_inode_update() and the hostfs_iget() revamp: read_name(),
> hostfs_create(), hostfs_lookup(), hostfs_mknod(), and
> hostfs_fill_sb_common().
>
> A following commit with new Landlock tests check this new hostfs inode
> consistency.
>
> Cc: Anton Ivanov <anton.ivanov@xxxxxxxxxxxxxxxxxx>
> Cc: Johannes Berg <johannes@xxxxxxxxxxxxxxxx>
> Cc: Richard Weinberger <richard@xxxxxx>
> Cc: <stable@xxxxxxxxxxxxxxx> # 5.15.x: ce72750f04d6: hostfs: Fix writeback of dirty pages
> Cc: <stable@xxxxxxxxxxxxxxx> # 5.15+
> Signed-off-by: Mickaël Salaün <mic@xxxxxxxxxxx>
> Link: https://lore.kernel.org/r/20230309165455.175131-2-mic@xxxxxxxxxxx
> ---
> arch/Kconfig | 7 --
> arch/um/Kconfig | 1 -
> fs/hostfs/hostfs.h | 1 +
> fs/hostfs/hostfs_kern.c | 213 +++++++++++++++++++-------------------
> fs/hostfs/hostfs_user.c | 1 +
> security/landlock/Kconfig | 2 +-
> 6 files changed, 109 insertions(+), 116 deletions(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index e3511afbb7f2..d5f0841ac3c1 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1156,13 +1156,6 @@ config COMPAT_32BIT_TIME
> config ARCH_NO_PREEMPT
> bool
>
> -config ARCH_EPHEMERAL_INODES
> - def_bool n
> - help
> - An arch should select this symbol if it doesn't keep track of inode
> - instances on its own, but instead relies on something else (e.g. the
> - host kernel for an UML kernel).
> -
> config ARCH_SUPPORTS_RT
> bool
>
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index 541a9b18e343..4057d5267c6a 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -5,7 +5,6 @@ menu "UML-specific options"
> config UML
> bool
> default y
> - select ARCH_EPHEMERAL_INODES
> select ARCH_HAS_FORTIFY_SOURCE
> select ARCH_HAS_GCOV_PROFILE_ALL
> select ARCH_HAS_KCOV
> diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
> index 69cb796f6270..0239e3af3945 100644
> --- a/fs/hostfs/hostfs.h
> +++ b/fs/hostfs/hostfs.h
> @@ -65,6 +65,7 @@ struct hostfs_stat {
> unsigned long long blocks;
> unsigned int maj;
> unsigned int min;
> + dev_t dev;
> };
>
> extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
> diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
> index 28b4f15c19eb..19496f732016 100644
> --- a/fs/hostfs/hostfs_kern.c
> +++ b/fs/hostfs/hostfs_kern.c
> @@ -26,6 +26,7 @@ struct hostfs_inode_info {
> fmode_t mode;
> struct inode vfs_inode;
> struct mutex open_mutex;
> + dev_t dev;
> };
>
> static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
> @@ -182,14 +183,6 @@ static char *follow_link(char *link)
> return ERR_PTR(n);
> }
>
> -static struct inode *hostfs_iget(struct super_block *sb)
> -{
> - struct inode *inode = new_inode(sb);
> - if (!inode)
> - return ERR_PTR(-ENOMEM);
> - return inode;
> -}
> -
> static int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
> {
> /*
> @@ -228,6 +221,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
> return NULL;
> hi->fd = -1;
> hi->mode = 0;
> + hi->dev = 0;
> inode_init_once(&hi->vfs_inode);
> mutex_init(&hi->open_mutex);
> return &hi->vfs_inode;
> @@ -240,6 +234,7 @@ static void hostfs_evict_inode(struct inode *inode)
> if (HOSTFS_I(inode)->fd != -1) {
> close_file(&HOSTFS_I(inode)->fd);
> HOSTFS_I(inode)->fd = -1;
> + HOSTFS_I(inode)->dev = 0;
> }
> }
>
> @@ -265,6 +260,7 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
> static const struct super_operations hostfs_sbops = {
> .alloc_inode = hostfs_alloc_inode,
> .free_inode = hostfs_free_inode,
> + .drop_inode = generic_delete_inode,
> .evict_inode = hostfs_evict_inode,
> .statfs = hostfs_statfs,
> .show_options = hostfs_show_options,
> @@ -512,18 +508,31 @@ static const struct address_space_operations hostfs_aops = {
> .write_end = hostfs_write_end,
> };
>
> -static int read_name(struct inode *ino, char *name)
> +static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)
> +{
> + set_nlink(ino, st->nlink);
> + i_uid_write(ino, st->uid);
> + i_gid_write(ino, st->gid);
> + ino->i_atime =
> + (struct timespec64){ st->atime.tv_sec, st->atime.tv_nsec };
> + ino->i_mtime =
> + (struct timespec64){ st->mtime.tv_sec, st->mtime.tv_nsec };
> + ino->i_ctime =
> + (struct timespec64){ st->ctime.tv_sec, st->ctime.tv_nsec };
> + ino->i_size = st->size;
> + ino->i_blocks = st->blocks;
> + return 0;
> +}
> +
> +static int hostfs_inode_set(struct inode *ino, void *data)
> {
> + struct hostfs_stat *st = data;
> dev_t rdev;
> - struct hostfs_stat st;
> - int err = stat_file(name, &st, -1);
> - if (err)
> - return err;
>
> /* Reencode maj and min with the kernel encoding.*/
> - rdev = MKDEV(st.maj, st.min);
> + rdev = MKDEV(st->maj, st->min);
>
> - switch (st.mode & S_IFMT) {
> + switch (st->mode & S_IFMT) {
> case S_IFLNK:
> ino->i_op = &hostfs_link_iops;
> break;
> @@ -535,7 +544,7 @@ static int read_name(struct inode *ino, char *name)
> case S_IFBLK:
> case S_IFIFO:
> case S_IFSOCK:
> - init_special_inode(ino, st.mode & S_IFMT, rdev);
> + init_special_inode(ino, st->mode & S_IFMT, rdev);
> ino->i_op = &hostfs_iops;
> break;
> case S_IFREG:
> @@ -547,17 +556,42 @@ static int read_name(struct inode *ino, char *name)
> return -EIO;
> }
>
> - ino->i_ino = st.ino;
> - ino->i_mode = st.mode;
> - set_nlink(ino, st.nlink);
> - i_uid_write(ino, st.uid);
> - i_gid_write(ino, st.gid);
> - ino->i_atime = (struct timespec64){ st.atime.tv_sec, st.atime.tv_nsec };
> - ino->i_mtime = (struct timespec64){ st.mtime.tv_sec, st.mtime.tv_nsec };
> - ino->i_ctime = (struct timespec64){ st.ctime.tv_sec, st.ctime.tv_nsec };
> - ino->i_size = st.size;
> - ino->i_blocks = st.blocks;
> - return 0;
> + HOSTFS_I(ino)->dev = st->dev;
> + ino->i_ino = st->ino;
> + ino->i_mode = st->mode;
> + return hostfs_inode_update(ino, st);
> +}
> +
> +static int hostfs_inode_test(struct inode *inode, void *data)
> +{
> + const struct hostfs_stat *st = data;
> +
> + return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == st->dev;
> +}
> +
> +static struct inode *hostfs_iget(struct super_block *sb, char *name)
> +{
> + struct inode *inode;
> + struct hostfs_stat st;
> + int err = stat_file(name, &st, -1);
> +
> + if (err)
> + return ERR_PTR(err);
> +
> + inode = iget5_locked(sb, st.ino, hostfs_inode_test, hostfs_inode_set,
> + &st);
> + if (!inode)
> + return ERR_PTR(-ENOMEM);
> +
> + if (inode->i_state & I_NEW) {
> + unlock_new_inode(inode);
> + } else {
> + spin_lock(&inode->i_lock);
> + hostfs_inode_update(inode, &st);
> + spin_unlock(&inode->i_lock);
> + }
> +
> + return inode;
> }
>
> static int hostfs_create(struct mnt_idmap *idmap, struct inode *dir,
> @@ -565,62 +599,48 @@ static int hostfs_create(struct mnt_idmap *idmap, struct inode *dir,
> {
> struct inode *inode;
> char *name;
> - int error, fd;
> -
> - inode = hostfs_iget(dir->i_sb);
> - if (IS_ERR(inode)) {
> - error = PTR_ERR(inode);
> - goto out;
> - }
> + int fd;
>
> - error = -ENOMEM;
> name = dentry_name(dentry);
> if (name == NULL)
> - goto out_put;
> + return -ENOMEM;
>
> fd = file_create(name, mode & 0777);
> - if (fd < 0)
> - error = fd;
> - else
> - error = read_name(inode, name);
> + if (fd < 0) {
> + __putname(name);
> + return fd;
> + }
>
> + inode = hostfs_iget(dir->i_sb, name);
> __putname(name);
> - if (error)
> - goto out_put;
> + if (IS_ERR(inode))
> + return PTR_ERR(inode);
>
> HOSTFS_I(inode)->fd = fd;
> HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE;
> d_instantiate(dentry, inode);
> return 0;
> -
> - out_put:
> - iput(inode);
> - out:
> - return error;
> }
>
> static struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
> unsigned int flags)
> {
> - struct inode *inode;
> + struct inode *inode = NULL;
> char *name;
> - int err;
> -
> - inode = hostfs_iget(ino->i_sb);
> - if (IS_ERR(inode))
> - goto out;
>
> - err = -ENOMEM;
> name = dentry_name(dentry);
> - if (name) {
> - err = read_name(inode, name);
> - __putname(name);
> - }
> - if (err) {
> - iput(inode);
> - inode = (err == -ENOENT) ? NULL : ERR_PTR(err);
> + if (name == NULL)
> + return ERR_PTR(-ENOMEM);
> +
> + inode = hostfs_iget(ino->i_sb, name);
> + __putname(name);
> + if (IS_ERR(inode)) {
> + if (PTR_ERR(inode) == -ENOENT)
> + inode = NULL;
> + else
> + return ERR_CAST(inode);
> }
> - out:
> +
> return d_splice_alias(inode, dentry);
> }
>
> @@ -704,35 +724,23 @@ static int hostfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
> char *name;
> int err;
>
> - inode = hostfs_iget(dir->i_sb);
> - if (IS_ERR(inode)) {
> - err = PTR_ERR(inode);
> - goto out;
> - }
> -
> - err = -ENOMEM;
> name = dentry_name(dentry);
> if (name == NULL)
> - goto out_put;
> + return -ENOMEM;
>
> err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
> - if (err)
> - goto out_free;
> + if (err) {
> + __putname(name);
> + return err;
> + }
>
> - err = read_name(inode, name);
> + inode = hostfs_iget(dir->i_sb, name);
> __putname(name);
> - if (err)
> - goto out_put;
> + if (IS_ERR(inode))
> + return PTR_ERR(inode);
>
> d_instantiate(dentry, inode);
> return 0;
> -
> - out_free:
> - __putname(name);
> - out_put:
> - iput(inode);
> - out:
> - return err;
> }
>
> static int hostfs_rename2(struct mnt_idmap *idmap,
> @@ -929,49 +937,40 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
> sb->s_maxbytes = MAX_LFS_FILESIZE;
> err = super_setup_bdi(sb);
> if (err)
> - goto out;
> + return err;
>
> /* NULL is printed as '(null)' by printf(): avoid that. */
> if (req_root == NULL)
> req_root = "";
>
> - err = -ENOMEM;
> sb->s_fs_info = host_root_path =
> kasprintf(GFP_KERNEL, "%s/%s", root_ino, req_root);
> if (host_root_path == NULL)
> - goto out;
> -
> - root_inode = new_inode(sb);
> - if (!root_inode)
> - goto out;
> + return -ENOMEM;
>
> - err = read_name(root_inode, host_root_path);
> - if (err)
> - goto out_put;
> + root_inode = hostfs_iget(sb, host_root_path);
> + if (IS_ERR(root_inode))
> + return PTR_ERR(root_inode);
>
> if (S_ISLNK(root_inode->i_mode)) {
> - char *name = follow_link(host_root_path);
> - if (IS_ERR(name)) {
> - err = PTR_ERR(name);
> - goto out_put;
> - }
> - err = read_name(root_inode, name);
> + char *name;
> +
> + iput(root_inode);
> + name = follow_link(host_root_path);
> + if (IS_ERR(name))
> + return PTR_ERR(name);
> +
> + root_inode = hostfs_iget(sb, name);
> kfree(name);
> - if (err)
> - goto out_put;
> + if (IS_ERR(root_inode))
> + return PTR_ERR(root_inode);
> }
>
> - err = -ENOMEM;
> sb->s_root = d_make_root(root_inode);
> if (sb->s_root == NULL)
> - goto out;
> + return -ENOMEM;
>
> return 0;
> -
> -out_put:
> - iput(root_inode);
> -out:
> - return err;
> }
>
> static struct dentry *hostfs_read_sb(struct file_system_type *type,
> diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
> index 5ecc4706172b..840619e39a1a 100644
> --- a/fs/hostfs/hostfs_user.c
> +++ b/fs/hostfs/hostfs_user.c
> @@ -36,6 +36,7 @@ static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
> p->blocks = buf->st_blocks;
> p->maj = os_major(buf->st_rdev);
> p->min = os_minor(buf->st_rdev);
> + p->dev = buf->st_dev;
> }
>
> int stat_file(const char *path, struct hostfs_stat *p, int fd)
> diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
> index 8e33c4e8ffb8..c1e862a38410 100644
> --- a/security/landlock/Kconfig
> +++ b/security/landlock/Kconfig
> @@ -2,7 +2,7 @@
>
> config SECURITY_LANDLOCK
> bool "Landlock support"
> - depends on SECURITY && !ARCH_EPHEMERAL_INODES
> + depends on SECURITY
> select SECURITY_PATH
> help
> Landlock is a sandboxing mechanism that enables processes to restrict