Re: [PATCH v5 2/6] kernfs: add a revision to identify directory node changes

From: Eric W. Biederman
Date: Mon Jun 07 2021 - 13:54:14 EST


Ian Kent <raven@xxxxxxxxxx> writes:

> Add a revision counter to kernfs directory nodes so it can be used
> to detect if a directory node has changed.
>
> There's an assumption that sizeof(unsigned long) <= sizeof(pointer)
> on all architectures and as far as I know that assumption holds.
>
> So adding a revision counter to the struct kernfs_elem_dir variant of
> the kernfs_node type union won't increase the size of the kernfs_node
> struct. This is because struct kernfs_elem_dir is at least
> sizeof(pointer) smaller than the largest union variant. It's tempting
> to make the revision counter a u64 but that would increase the size of
> kernfs_node on archs where sizeof(pointer) is smaller than the revision
> counter.
>
> Signed-off-by: Ian Kent <raven@xxxxxxxxxx>
> ---
> fs/kernfs/dir.c | 8 ++++++++
> fs/kernfs/kernfs-internal.h | 24 ++++++++++++++++++++++++
> include/linux/kernfs.h | 5 +++++
> 3 files changed, 37 insertions(+)
>
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 33166ec90a112..b88432c48851f 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -372,6 +372,7 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
> /* successfully added, account subdir number */
> if (kernfs_type(kn) == KERNFS_DIR)
> kn->parent->dir.subdirs++;
> + kernfs_inc_rev(kn->parent);
>
> return 0;
> }
> @@ -394,6 +395,7 @@ static bool kernfs_unlink_sibling(struct kernfs_node *kn)
>
> if (kernfs_type(kn) == KERNFS_DIR)
> kn->parent->dir.subdirs--;
> + kernfs_inc_rev(kn->parent);
>
> rb_erase(&kn->rb, &kn->parent->dir.children);
> RB_CLEAR_NODE(&kn->rb);
> @@ -1105,6 +1107,12 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
>
> /* instantiate and hash dentry */
> ret = d_splice_alias(inode, dentry);
> + if (!IS_ERR(ret)) {
> + if (unlikely(ret))
> + kernfs_set_rev(parent, ret);
> + else
> + kernfs_set_rev(parent, dentry);

Do we care about d_time on non-NULL dentries?

For d_splice_alias to return a different dentry implies
that the dentry was non-NULL.

I am wondering if having a guarantee that d_time never changes could
help simplify the implementation. For never changing it would see to
make sense to call kernfs_set_rev before d_splice_alias on dentry, and
simply not worry about it after d_splice_alias.

> + }
> out_unlock:
> mutex_unlock(&kernfs_mutex);
> return ret;
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index ccc3b44f6306f..1536002584fc4 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -81,6 +81,30 @@ static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry)
> return d_inode(dentry)->i_private;
> }
>
> +static inline void kernfs_set_rev(struct kernfs_node *kn,
> + struct dentry *dentry)
> +{
> + if (kernfs_type(kn) == KERNFS_DIR)
> + dentry->d_time = kn->dir.rev;
> +}
> +
> +static inline void kernfs_inc_rev(struct kernfs_node *kn)
> +{
> + if (kernfs_type(kn) == KERNFS_DIR)
> + kn->dir.rev++;
> +}
> +
> +static inline bool kernfs_dir_changed(struct kernfs_node *kn,
> + struct dentry *dentry)
> +{
> + if (kernfs_type(kn) == KERNFS_DIR) {
> + /* Not really a time bit it does what's needed */
> + if (time_after(kn->dir.rev, dentry->d_time))
> + return true;

Why not simply make this:
if (kn->dir.rev != dentry->d_time)
return true;

I don't see what is gained by not counting as changed something in the
wrong half of the values.

> + }
> + return false;
> +}
> +
> extern const struct super_operations kernfs_sops;
> extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
>
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index 9e8ca8743c268..7947acb1163d7 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -98,6 +98,11 @@ struct kernfs_elem_dir {
> * better directly in kernfs_node but is here to save space.
> */
> struct kernfs_root *root;
> + /*
> + * Monotonic revision counter, used to identify if a directory
> + * node has changed during revalidation.
> + */
> + unsigned long rev;
> };
>
> struct kernfs_elem_symlink {

Eric